Is There a Better Way to do This?

closed account (N36fSL3A)
I've been writing a class that allocates large amounts of memory blocks to avoid calling 'new' and 'delete' (it's slow). I wrote this up in some spare time but I'm questioning it's efficiency, especially in the functions like 'Free', 'Allocate' and 'Combine'.

If there is a better way to do this please tell me, really need this code to be extremely fast.

I'm also looking for methods to avoid memory fragmentation, as that requires extra unneeded memory to be allocated.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
class CF::Management::Allocator
{
	public:
		Allocator(CF::Uint64 size)
		{
			this->size = size;
		};

		void AllocateBlock()
		{
			CF::Uint8* block = nullptr;
			MemAddr address;

			try{block = new CF::Uint8[size];}

			catch(std::bad_alloc &ba)
			{
				throw CF::Error("CF::Management::Allocator::AllocateBlock() - Failure allocating new block.");
			}

			// Clear the memory allocated
			memset(block, 0, size);
			memset(&address, 0, sizeof(address));

			// Fill out the structure.
			address.addr  = (CF::UintPtr)static_cast<void*>(block);
			address.size  = size;  // Set the address size to the whole block since it's empty.
			address.block = block;

			// Push back the data.
			freeAddrs.push_back(address);
			blocks.push_back(block);
		}
		
		template<class T>
		void Allocate(T* ptr)
		{
			CF::Uint8* block = nullptr;   // The useable block
			size_t typeSize  = sizeof(T); // The size of the block
			bool space       = false;     // If there is space for the type or not

			// Check if the pointer type is larger than the block itself. If it is, throw an error.
			if(typeSize > size)
			{
				throw CF::Error("CF::Management::Allocator::Allocate() - the memory blocks aren't large enough.");
			}

			// Loop through the index of the free addresses
			for(size_t i = 0; i < freeAddrs.size(); i++)
			{
				MemAddr &adr = freeAddrs[i]; // Reference to the current block

				// If there's enough space to construct the object
				if(adr.size >= typeSize)
				{
					space = true; // Then let us know that there is space

					MemAddr tmp = adr;
					tmp.size    = typeSize;

					usedAddrs.push_back(tmp);

					// Give the pointer a bit of memory
					ptr   = adr.block[addr];

					// Move the address over
					adr.size -= typeSize;

					// If there is no more memory left in the free space then delete
					// it from the vector
					if(adr.size == 0) {freeAddrs.erase(freeAddrs.begin() + i);}

					break;
				}
			}

			// If there isn't any space in the currently allocated blocks
			if(!space)
			{
				// Allocate another block
				AllocateBlock();
				
				// Call this function again.
				Allocate(ptr);
			}
		}

		template<class T>
		void Free(T* ptr)
		{
			Uint8* ptr_u8 = static_cast<Uint8*>(ptr);

			// Loop through the memory blocks and check to see if this address is equal to the block
			for(size_t i = 0; i < usedAddrs.size(); i++)
			{
				for(size_t b = 0; b < size; b++)
				{
					if(ptr_u8 == usedAddrs[i][addr])
					{
						size_t next = i + 1;

						// Simply clear the memory
						memset(ptr_u8, 0, usedAddrs[i].numElements);

						if(next != freeAddrs.size())
						{
							// If the blocks don't have any interuptions between them
							if((freeAddrs[i].block + (freeAddrs[i].size * freeAddrs[i].numElements)) == freeAddrs[next].block)
							{
								MemAddr address  = freeAddrs[i];
								MemAddr cAddress = freeAddrs[next];

								cAddress.addr     = address.addr;
								cAddress.block    = address.block;
								cAddress.size    += address.size;

								freeAddrs.erase(freeAddrs.begin() + i);
								freeAddrs[i] = cAddress;
							}
						}
					}
				}
			}
		}

		void Combine()
		{
			for(size_t i = 0; i < freeAddrs.size(); i++)
			{
				size_t next = i + 1;

				if(next != freeAddrs.size())
				{
					// If the blocks don't have any interuptions between them
					if((freeAddrs[i].block + (freeAddrs[i].size * freeAddrs[i].numElements)) == freeAddrs[next].block)
					{
						MemAddr address  = freeAddrs[i];
						MemAddr cAddress = freeAddrs[next];

						cAddress.addr     = address.addr;
						cAddress.block    = address.block;
						cAddress.size    += address.size;

						freeAddrs.erase(freeAddrs.begin() + i);
						freeAddrs[i] = cAddress;
					}
				}
			}
		}

		~Allocator()
		{
			for(size_t i = 0; i < blocks.size(); i++)
			{
				delete[] blocks[i];
			}
		}

	private:
		struct MemAddr
		{
			CF::Uint8*  block;
			CF::UintPtr addr;
			CF::Uint64  size;
			CF::Uint64  numElements;
		};

		CF::Uint64 size;

		std::vector<MemAddr>    freeAddrs;
		std::vector<MemAddr>    usedAddrs;
		std::vector<CF::Uint8*> blocks;
};
What makes you believe that this allocator is more efficient (either in terms of space or in terms of time) than the default allocation functions? Have you measured anything?

> really need this code to be extremely fast.

Write a pool allocator, perhaps. Or better, use a debugged, tested, profiled, pool allocator.

For instance, Boost pool.
http://www.boost.org/doc/libs/1_55_0/libs/pool/doc/html/index.html

Or if you are using libstdc++, GNU's __gnu_cxx::__pool_alloc
http://gcc.gnu.org/onlinedocs/libstdc++/manual/bk01pt04ch11.html#allocator.custom
closed account (N36fSL3A)
Nope, but Framework told me for his job or whatever that he allocated large blocks of memory and constructed objects from them because 'new/malloc' and 'delete/free' were slow.

I'm trying to reduce the amount of dependencies that my engine uses as much as possible, and I heard that boost is pretty large, so I decided to just write my own.
Boost is large. But you don't use all of boost in every project. Using boost::pool won't cost you anything noticeable in size.
I suppose that I should mention this....

The speed of memory allocation and/or the chance of fragmentation occurring depends on several factors, like how big of clumps you're allocating, the lifetime of the allocated memory, and how frequently you are allocating.

Memory pools are good for when you will be doing rapid and frequent allocation/destruction of a single type.

That said... they're not really necessary for a lot of situations. Just because some other programmer said "we needed to implement memory pools because dynamic allocation was too slow".... you should not misinterpret that to mean "dynamic allocation is slow and you shouldn't do it".

By doing this you might be trying to solve a problem that doesn't exist. Normal allocation might be just fine for you.

Of course, exploring this area is great for academic purposes... but if your goal is on the finished product, I would not do this until you know dynamic allocation is causing performance issues in your program. Don't try to fix what isn't broken.
I agree with what has been said before. I also found it to be a fun project.

We made a memory pool in a C class I took, give me some time to type it out.

I don't think you have to zero the memory.

> Memory pools are good for when you will be doing rapid and frequent
> allocation/destruction of a single type.

The most dramatic improvements in performance with memory pools are typically seen when:

a. Many different types are involved (with one pool catering to objects which could belong to different types, but all of them having the same size). Heap fragmentation becomes a significant issue when a large number of allocations/deallocations of varying sizes are intermingled.

b. The memory footprint of the objects are small (in comparison to the chunk size that the pool uses). The classic trade-off between space-efficiency and time-efficiency.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#include <iostream>
#include <ctime>
#include <boost/pool/pool_alloc.hpp>
#include <deque>
#include <memory>

template < std::size_t N > struct A { char filler[N] ; };

template < std::size_t N > struct B final
{
    A<N> a ;

    static void* operator new ( std::size_t ) { return pool.malloc() ; }
    static void operator delete ( void* p ) { pool.free(p) ; }

    private:
        struct big_chunk_allocator
        {
          using size_type = std::size_t ;
          using difference_type = std::ptrdiff_t ;

          static char* malloc( size_type sz ) { return new char[sz] ; }
          static void free( const char* p ) { delete [] p ; }
        };

        static boost::pool<big_chunk_allocator> pool ;
};

template < std::size_t N >
boost::pool< typename B<N>::big_chunk_allocator > B<N>::pool( sizeof( B<N> ) ) ;

template < typename T > void test( std::size_t n = 1024*1024 )
{
    const auto start = std::clock() ;
    {
        std::deque< std::unique_ptr<T> > seq(n/4) ;

        while( seq.size() < n )
        {
            const auto sz = seq.size() ;
            for( std::size_t i = 0 ; i < sz ; ++i ) seq.emplace_back( new T ) ;
            seq.erase( seq.begin(), seq.begin() + sz/4 ) ;
        }
    }

    const auto end = std::clock() ;
    std::cout << double(end-start) / CLOCKS_PER_SEC << " secs.\n" ;
}

template < std::size_t N > void do_test()
{
    static_assert( sizeof( A<N> ) == N, "check size" ) ;
    static_assert( sizeof( B<N> ) == N, "check size" ) ;

    std::cout << "size: " << N << "\n\tdefault allocation: " ;
    test< A<N> > () ;

    std::cout <<  "\t   pool allocation: " ;
    test< B<N> > () ;

    std::cout << '\n' ;
}

int main()
{
    std::cout << std::fixed ;
    std::cout.precision(3) ;

    do_test<12>() ;
    do_test<16>() ;
    do_test<24>() ;
    do_test<48>() ;
    do_test<64>() ;
    do_test<96>() ;
    do_test<128>() ;
}

http://coliru.stacked-crooked.com/a/9497eda6955f11b5

Note for Lumpkin: Before you actually start using Combine() (rght now you are not), you would need to make sure that the pointer returned by Allocate() meets the alignment requirements of the type.
Topic archived. No new replies allowed.