// bit map arena virtual memory allocator // using one bit of overhead per block // When releasing memory, you must supply the block size. Under // modern programming discipline, this should be available since // you generally have to check OBJECT SIZE FOR OVERFLOW anyway. // A standard malloc and free routine is conditionally supplied // that remembers the block sizes if this is a problem. // please report bugs located to the program author, // karl_m@acm.org www.geocities.com/malbrain // n.b. the semantics of valloc differ slightly from the standard // unix V R4 definition: the memory allocated is aligned to // the blocksize of the appropriate templated arena, not // necessarily to the virtual memory system page size. // Also, valloc requests must be released with vfree. // That said, with the default templates given below, // valloc requests for 4K or more will align // on a page boundary. #include #include #define NDEBUG #include "assert.h" #ifdef unix #include #else #define WIN32_LEAN_AND_MEAN #include #endif // each memory arena structure is followed by a bit map // allocation table made of unsigned ints where the low // order bit represents the lowest block number. typedef struct Arena_ { char *mem, *end; // beginning and end of arena allocation void *next; // next arena in chain with same template unsigned blksize; // block size for memory allocation unsigned mapmax; // map size in ints immediately following unsigned blkmax; // maximum block allocation in bytes unsigned avail; // amount of available arena space unsigned scan; // current scan offset } Arena; // the Arenas are arrayed with their bit maps into page frames; // the first structure in the page frame describes the page // new arenas are allocated from the end typedef struct Page_ { unsigned first; // offset of lowest arena allocated unsigned size; // size of this page in bytes void *prev; // previous page of arenas in chain } Page; #define MAP_BITS (CHAR_BIT * sizeof(unsigned)) #define FRAME_SIZE 8192 Page *PageLIFO; // allocate and initialize a new page frame for arenas Page *vframe (void) { unsigned xtra, size = FRAME_SIZE; Page *page; #ifdef unix page = (Page *)sbrk (size); // round brk value to page boundary if( xtra = (unsigned)page & 0xfff ) sbrk (0x1000 - xtra), size += 0x1000 - xtra; #else page = (Page *)VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE); #endif page->first = page->size = size; page->prev = PageLIFO; PageLIFO = page; return page; } // arena templates for three arena shapes, but feel // free to add others to suit your own needs. // overlap between blkmax and blksize can reduce // the percentage of granularity induced waste, // as illustrated in the 16 byte and 128 byte // templates below. typedef struct { unsigned blksize; // allocation granularity unsigned blkcnt; // number of blocks in the arena unsigned blkmax; // maximum allocation to support Arena *arena; // chain of arenas with template Arena *scan; // current arena being allocated } Template; Template ArenaDefs[] = { {16, 8192, 511 }, // 16 x 8K = 512K total {128, 4096, 4095 }, // 128 x 4K = 512K total {4096, 512, 65535 }, // 4K x 512 = 2MB total {0} // 64K and above }; // allocate and initialize a new arena // marking an initial allocation void *varena (size_t bytes, Template *tmpl) { unsigned xtra, blks, bits, blkmax, blksize, mapmax, mapbytes; unsigned *map; Arena *arena; Page *page; assert (bytes > 0); // build the arena from a table template, // or make a huge arena with two blocks // of one half the total request each if( blksize = tmpl->blksize ) { blkmax = tmpl->blkmax; blks = tmpl->blkcnt; } else { if( xtra = bytes & 0xfff ) // round for 4K virtual memory bytes += 4096 - xtra; assert (~bytes & 1); blksize = bytes / 2; blkmax = bytes - 1; blks = 2; } // block count for the initial allocation assert (blksize > 0); bits = (bytes + blksize - 1); bits /= blksize; // round mapmax up to unsigned size multiple // unless doing a huge block without map bits assert (MAP_BITS > 2); if( mapmax = blks / MAP_BITS ) if( blks & (MAP_BITS - 1) ) mapmax++; mapbytes = mapmax * sizeof(unsigned); // allocate new page on overflow of current frame // or on startup if( page = PageLIFO ) { if( page->first < mapbytes + sizeof(Page) + sizeof(Arena) ) page = vframe(); } else page = vframe(); // allocate the arena in the frame page->first -= mapbytes; map = (unsigned *)((char *)page + page->first); page->first -= sizeof(Arena); arena = (Arena *)((char *)page + page->first); assert (page->first >= sizeof(Page)); // initialize and insert arena into template chain memset (arena, 0, sizeof(Arena)); arena->avail = blks * blksize; arena->blksize = blksize; arena->mapmax = mapmax; arena->blkmax = blkmax; arena->next = tmpl->arena; tmpl->arena = arena; if( !arena->next ) tmpl->scan = arena; // allocate the arena's memory block #ifdef unix arena->mem = sbrk (arena->avail); // align allocation on page boundary if( xtra = (unsigned)arena->mem & 0xfff ) sbrk (0x1000 - xtra), arena->mem += 0x1000 - xtra; #else arena->mem = VirtualAlloc(NULL, arena->avail, MEM_COMMIT, PAGE_READWRITE); #endif arena->end = arena->mem + arena->avail; arena->avail -= bits * blksize; if( mapbytes * CHAR_BIT < bits ) return arena->mem; // clear the initial allocation of bits in the map while( bits >= MAP_BITS ) *map++ = 0, bits -= MAP_BITS, mapmax--; // and mark the rest of the map as available if( mapmax-- ) *map++ = ~0U << bits; else return arena->mem; while( mapmax-- ) *map++ = ~0U; return arena->mem; } // mark a partial sub-byte map allocation of 7 or fewer blocks void *vmarkbyte (Arena *arena, unsigned char val, unsigned blks, int bit) { unsigned *map = (unsigned *)(arena + 1) + arena->scan; unsigned char mask = UCHAR_MAX >> (CHAR_BIT - blks); unsigned block; assert (blks && blks < CHAR_BIT); assert (val); // find the available run of ones in the byte // ignoring the rightmost bit, and assuming // the map byte is non zero do val >>= 1, bit++, assert (bit < MAP_BITS); while( val & mask ^ mask ); // clear the allocation bits arena->avail -= blks * arena->blksize; *map ^= (unsigned)mask << bit; assert(!(*map & (unsigned)mask << bit)); block = arena->scan * MAP_BITS + bit; return arena->mem + block * arena->blksize; } // allocate consecutive run of blocks from the map // blks must be at least one void *vmarkmap (Arena *arena, unsigned run, unsigned blks, unsigned bit) { unsigned *map = (unsigned *)(arena + 1) + arena->scan, mask; char *mem = arena->mem; unsigned block; block = arena->scan * MAP_BITS + bit - run; arena->avail -= blks * arena->blksize; mem += block * arena->blksize; // clear initial bits of hightest map allocation blks += bit - run; assert (blks > 0); mask = ~0U >> MAP_BITS - blks; if( run < bit ) { bit -= run; *map ^= mask ^= ((1 << bit) - 1); assert (!(*map & mask)); return mem; } *map-- ^= mask; assert (!(map[1] & mask)); run -= bit; // clear preceeding run bits from map while( run >= MAP_BITS ) assert (*map == ~0U), *map-- = 0, run -= MAP_BITS; assert ((*map | ~0U >> run) == ~0U); *map &= ~0U >> run; return mem; } // scan tables built for contents of map bytes #if CHAR_BIT != 8 // you'll have to construct your own tables #else // new run value: consecutive high order bits // to the right of pretested high order bit // (table also removes low order bit) unsigned char ArenaRun[64] = { 1, 1, 1, 1, 1, 1, 1, 1, // 0x80 - 0x8f 1, 1, 1, 1, 1, 1, 1, 1, // 0x90 - 0x9f 1, 1, 1, 1, 1, 1, 1, 1, // 0xA0 - 0xAf 1, 1, 1, 1, 1, 1, 1, 1, // 0xB0 - 0xBf 2, 2, 2, 2, 2, 2, 2, 2, // 0xC0 - 0xCf 2, 2, 2, 2, 2, 2, 2, 2, // 0xD0 - 0xDf 3, 3, 3, 3, 3, 3, 3, 3, // 0xE0 - 0xEf 4, 4, 4, 4, 5, 5, 6, 7, // 0xF0 - 0xFf }; // available bits to continue existing run of consecutive blocks // (entries w/high order bit set are folded onto these values) // (low order bit is removed, and 0xFF is also removed) unsigned char ArenaGlom[64] = { 1, 2, 1, 3, 1, 2, 1, 4, // 0x00 - 0x0f 1, 2, 1, 3, 1, 2, 1, 5, // 0x10 - 0x1f 1, 2, 1, 3, 1, 2, 1, 4, // 0x20 - 0x2f 1, 2, 1, 3, 1, 2, 1, 6, // 0x30 - 0x3f 1, 2, 1, 3, 1, 2, 1, 4, // 0x40 - 0x4f 1, 2, 1, 3, 1, 2, 1, 5, // 0x50 - 0x5f 1, 2, 1, 3, 1, 2, 1, 4, // 0x60 - 0x6f 1, 2, 1, 3, 1, 2, 1, 7, // 0x70 - 0x7f }; // available bits remaining after first zero // (without redundant low order bit) unsigned char ArenaAfter[128] = { 0, 1, 1, 2, 1, 1, 2, 3, // 0x00 - 0x0f 1, 1, 1, 2, 2, 2, 3, 4, // 0x10 - 0x1f 1, 1, 1, 2, 1, 1, 2, 3, // 0x20 - 0x2f 2, 2, 2, 2, 3, 3, 4, 5, // 0x30 - 0x3f 1, 1, 1, 2, 1, 1, 2, 3, // 0x40 - 0x4f 1, 1, 1, 2, 2, 2, 3, 4, // 0x50 - 0x5f 2, 2, 2, 2, 2, 2, 2, 3, // 0x60 - 0x6f 3, 3, 3, 3, 4, 4, 5, 6, // 0x70 - 0x7f 1, 1, 1, 2, 1, 1, 2, 3, // 0x80 - 0x8f 1, 1, 1, 2, 2, 2, 3, 4, // 0x90 - 0x9f 1, 1, 1, 2, 1, 1, 2, 3, // 0xA0 - 0xAf 2, 2, 2, 2, 3, 3, 4, 5, // 0xB0 - 0xBf 2, 2, 2, 2, 2, 2, 2, 3, // 0xC0 - 0xCf 2, 2, 2, 2, 2, 2, 3, 4, // 0xD0 - 0xDf 3, 3, 3, 3, 3, 3, 3, 3, // 0xE0 - 0xEf 4, 4, 4, 4, 5, 5, 6, 7, // 0xF0 - 0xFf }; #endif // scan an existing arena for available space // processing the map in byte size pieces // of int sized chunks using the tables void *vscan (Arena *arena, size_t bytes) { unsigned chunk, *map = (unsigned *)(arena + 1); unsigned run = 0, blks, bit; unsigned char nxt; blks = bytes + arena->blksize - 1; assert (arena->blksize > 0); if( blks /= arena->blksize ) do if( chunk = map[arena->scan] ) for( bit = 0; bit < MAP_BITS; bit += CHAR_BIT ) if( nxt = chunk >> bit ) // next byte of chunk if( nxt < UCHAR_MAX ) // less than 8 blocks available ??? if( ~nxt & 1 || run + ArenaGlom[nxt/2 & 0x3f] < blks ) if( ArenaAfter[nxt/2] < blks ) // bits available in byte if( nxt & 0x80 ) // establish new run run = ArenaRun[nxt/2 & 0x3f]; else run = 0; // no run possible without the leading bit else // request of fewer than 8 blocks will fit return vmarkbyte (arena, nxt, blks, bit); else // 8 blocks or a run spanning two or more map bytes return vmarkmap (arena, run, blks, bit); else if( run + CHAR_BIT < blks ) run += CHAR_BIT;// 8 more blocks still not enough else // or, run now fits return vmarkmap (arena, run, blks, bit); else run = 0; // byte is all zero bits else run = 0; // chunk is all zero bits while( ++arena->scan < arena->mapmax ); arena->scan = 0; // next time start scan from the beginning return NULL; } // allocate a new block of size bytes void *valloc (size_t bytes) { Template *tmpl = ArenaDefs; unsigned blkmax, doit = 2; Arena *start, *first; void *mem; // round request up to smallest Template blocksize assert (tmpl->blksize > 0); if( bytes < tmpl->blksize ) bytes = tmpl->blksize; // find a suitable template, or default to the // huge-block template at the end of the table while( blkmax = tmpl->blkmax ) if( bytes > blkmax ) tmpl++; else break; // scan existing arenas built under the template // for available space, going through each arena // once before giving up and building a new arena, // but do the current allocating arena twice if( first = tmpl->arena ) if( start = tmpl->scan ) do if( tmpl->scan == start && !doit-- ) break; else if( bytes < tmpl->scan->blksize ) continue; else if( bytes > tmpl->scan->avail ) continue; else if( tmpl->scan->mapmax ) // scan blocks with maps if( mem = vscan(tmpl->scan, bytes) ) return mem; else continue; else // huge arena, allocate by clearing available space return tmpl->scan->avail = 0, tmpl->scan->mem; while( tmpl->scan = tmpl->scan->next ? tmpl->scan->next : first ); // build a new arena and allocate space there return varena (bytes, tmpl); } // starting with block number, set the // map bits to make blocks available void vsetmap (Arena *arena, unsigned block, unsigned blks) { unsigned mask, *map = (unsigned *)(arena + 1) + block / MAP_BITS; unsigned max = arena->mapmax * MAP_BITS; assert (blks <= max - block); assert (block <= max); assert (blks > 0); if( block > max ) // ensure sanity of starting block no. block = max; if( blks > max - block ) // ditto for the block count blks = max - block; arena->avail += blks * arena->blksize; // set block available bits in lowest map entry of the run // (these are high order bits) if( blks < MAP_BITS ) // less than full int of bits? mask = ~0U >> (MAP_BITS - blks); else mask = ~0U; block &= MAP_BITS - 1; assert (!(*map & mask << block)); *map++ |= mask << block; // calculate number of blks remaining if( blks > MAP_BITS - block ) blks -= MAP_BITS - block; else return; // set all block available bits for intermediate map blocks while( blks > MAP_BITS ) assert (!*map), *map++ = ~0U, blks -= MAP_BITS; // set block available bits in highest map block // (these are low order bits) assert (!(*map & ~0U >> (MAP_BITS - blks))); *map |= ~0U >> (MAP_BITS - blks); } // release memory blocks into an arena void vfree (void *mem, size_t bytes) { unsigned block, entry, blks; Arena *arena; Page *page; // round request up to smallest Template blocksize assert (ArenaDefs->blksize > 0); if( bytes < ArenaDefs->blksize ) bytes = ArenaDefs->blksize; // locate the correct arena // by scanning all existing // arena memory ranges assert (PageLIFO); if( page = PageLIFO ) entry = page->first; else return; while( 1 ) if( entry < page->size ) if( arena = (Arena *)((char *)page + entry), (char *)mem < arena->mem ) entry += sizeof(Arena) + sizeof(unsigned) * arena->mapmax; else if( (char *)mem < arena->end ) break; else entry += sizeof(Arena) + sizeof(unsigned) * arena->mapmax; else if( assert(page->prev), page = page->prev ) entry = page->first; else return; // huge blocks have no map, so we just mark // the entire arena available and return if( !arena->mapmax ) { arena->avail = arena->blkmax + 1; return; } // calculate the number of blocks in the request assert (arena->blksize > 0); blks = bytes + arena->blksize - 1; blks /= arena->blksize; // calculate the starting block number // and set the available bits block = ((char *)mem - (char *)arena->mem) / arena->blksize; vsetmap (arena, block, blks); } // trim memory block to smaller new size // blks is oldsize, bytes is newsize void vtrim (void *mem, size_t blks, size_t bytes) { unsigned block, entry; Arena *arena; Page *page; assert (blks >= bytes); // round sizes up to smallest Template blocksize assert (ArenaDefs->blksize > 0); if( blks < ArenaDefs->blksize ) blks = ArenaDefs->blksize; if( bytes < ArenaDefs->blksize ) bytes = ArenaDefs->blksize; // locate the correct arena by scanning // all existing arena memory ranges assert (PageLIFO); if( page = PageLIFO ) entry = page->first; else return; while( 1 ) if( entry < page->size ) if( arena = (Arena *)((char *)page + entry), (char *)mem < arena->mem ) entry += sizeof(Arena) + sizeof(unsigned) * arena->mapmax; else if( (char *)mem < arena->end ) break; else entry += sizeof(Arena) + sizeof(unsigned) * arena->mapmax; else if( assert (page->prev), page = page->prev ) entry = page->first; else return; // huge blocks have no map; do nothing to resize these. if( !arena->mapmax ) return; // calculate the number of blocks in the new size assert (arena->blksize > 0); bytes += arena->blksize - 1; bytes /= arena->blksize; // calculate the number of blocks in the original block, the // starting block number, and the change in block counts block = ((char *)mem - (char *)arena->mem) / arena->blksize; blks += arena->blksize - 1; blks /= arena->blksize; // free extra blocks if( blks > bytes ) vsetmap (arena, block + bytes, blks - bytes); } #ifdef DEFINEMALLOC void *malloc (size_t bytes) { size_t *ans = valloc (bytes + sizeof(size_t)); *ans++ = bytes; return ans; } void free (void *mem) { size_t *size; if( size = (size_t *)mem ) size--, vfree (size, *size + sizeof(size_t)); } size_t msizeof (void *mem) { size_t *size; if( size = (size_t *)mem ) return *--size; return 0; } void *calloc (size_t ele, size_t num) { size_t xtra; void *ans; if( xtra = ele % sizeof(unsigned) - 1 ) ele += sizeof(unsigned) - xtra; ans = malloc (ele * num); memset (ans, 0, ele * num); return ans; } void *realloc (void *mem, size_t size) { size_t *old; void *ans; if( old = (size_t *)mem ) if( *--old >= size ) return vtrim (old, *old, size), *old = size, mem; ans = malloc (size); if( old ) memcpy (ans, mem, *old), vfree (old, *old + sizeof(size_t)); return ans; } #endif #ifdef ARENASTANDALONE #include FILE *Out = stdout; FILE *In = stdin; char *Buff[5000]; char Line[80000]; void blkaddrs (Arena *arena, unsigned idx) { char *mem = arena->mem + idx * MAP_BITS * arena->blksize; unsigned *map = (unsigned *)(arena + 1) + idx; unsigned bit; for( bit = 0; bit < MAP_BITS; bit++ ) if( !(*map >> bit & 1) ) fprintf (stderr, "block %x not freed\n", mem + bit * arena->blksize); } int main (int argc, char **argv) { int usemalloc = 0; int check = 0; int count = 0; int debug = 0; int len, idx; // process any program options present if( argc > 1 ) if( argv[1][0] == '-' ) for( argc--, argv++; *++(*argv); ) switch( **argv | 0x20 ) { case 'm': usemalloc++; break; case 'd': debug++; break; case 'c': check++; break; } // open input and output files if present if( argc > 1 ) if( !(In = fopen (argv[1], "r")) ) return 1; if( argc > 2 ) if( !(Out = fopen (argv[2], "w")) ) return 1; // scramble the input as the test plan while( fgets (Line, sizeof(Line), In) ) { if( count < 5000 ) idx = count++; else { idx = (unsigned)rand() % count; len = strlen(Buff[idx]); if( debug ) fprintf(stderr, "free %.6d %x\n", len + 1, Buff[idx]); fwrite (Buff[idx], len, 1, Out); usemalloc ? free (Buff[idx]) : vfree (Buff[idx], len + 1); } len = strlen (Line); Buff[idx] = usemalloc ? malloc (len + 1) : valloc (len + 1); if( debug ) fprintf(stderr, "req %.6d %x\n", len + 1, Buff[idx]); memcpy (Buff[idx], Line, len + 1); } while( count-- ) { len = strlen (Buff[count]); fwrite (Buff[count], len, 1, Out); usemalloc ? free (Buff[count]) : vfree (Buff[count], len + 1); if( debug ) fprintf(stderr, "free %.6d %x\n", len + 1, Buff[count]); } fclose (Out); fclose (In); // ensure all bits were reset in all arenas if( check ) { unsigned entry, idx; Arena *arena; Page *page; if( page = PageLIFO ) do for( entry = page->first; entry < page->size; entry += sizeof(Arena) + arena->mapmax * sizeof(unsigned) ) for( arena = (Arena *)((char *)page + entry), idx = 0; idx < arena->mapmax; idx++ ) if( ((unsigned *)(arena + 1))[idx] != ~0U ) blkaddrs(arena, idx); while( page = page->prev ); } } #endif