#include #include #include #include #include #include #include #define RANK_0_LOG 5 #define RANK_0_SIZE (1ul << RANK_0_LOG) #define RANKS 20 // NPER is an architecture-dependent number. // NPER is nodes per element of `nodes` #define NPERBITS 4 #define NPER (1ul << NPERBITS) // BPER is bits per element of `nodes` #define BPERBITS 1 #define BPER (1ul << BPERBITS) #define SI static inline // TODO need to guarantee alignment of the pool static char pool[RANK_0_SIZE << (RANKS - 1)]; static const size_t pool_size = sizeof pool; static unsigned counts[RANKS] = { [RANKS - 1] = 1 }; // implicit binary tree of nodes // node at n has children at (2n + 1) and (2n + 2) // bit0 : 0=leaf, 1=internal // bit1 : full static uint32_t nodes[(1ul << RANKS) / NPER - (1 / NPER)]; typedef uint32_t np; typedef size_t sz; #define TN 0 /* top node */ // ----------------------------------------------------------------------------- static inline unsigned ilog2(unsigned long x) { unsigned result = 0; while (x >>= 1) result++; return result; } SI sz IRANK (sz r) { return RANKS - 1 - r; } SI np LLINK (np n) { return (n << 1) + 1; } SI np RLINK (np n) { return (n << 1) + 2; } SI sz ISLEFT (np n) { return n & 1; } SI sz ISRIGHT (np n) { return !ISLEFT(n); } SI np PARENT (np n) { return (n - 1) >> 1; } SI np SIBLING (np n) { return n + ISLEFT(n) - ISRIGHT(n); } SI sz NODE2RANK (np n) { return (n ? NODE2RANK(PARENT(n)) : RANKS) - 1; } SI sz SIZE2BLKS (sz x) { return (x + RANK_0_SIZE - 1) >> RANK_0_LOG; } SI sz SIZE2RANK (sz s) { return s ? ilog2(SIZE2BLKS(s) - 1) + 1 : 0; } SI sz RANK2BYTES(sz r) { return 1ul << (r + RANK_0_LOG); } SI sz NODE2BASE (np n) { return n >> NPERBITS; } SI sz NODE2SH (np n) { return n & (NPER - 1); } SI sz GET_COUNT(sz r) { return counts[r]; } SI void SET_COUNT(sz r, sz v) { counts[r] = v; } SI sz SHAMT(np n, sz b) { return NODE2SH(n) * BPER + b; } SI uint32_t* NODE2PTR(np n) { return &nodes[NODE2BASE(n)]; } SI sz GET_BIT (np n, sz b) { return (*NODE2PTR(n) >> SHAMT(n, b)) & 1; } SI void CLR_BIT (np n, sz b) { *NODE2PTR(n) &= ~(1ul << SHAMT(n,b)); } SI void SET_BIT (np n, sz b) { *NODE2PTR(n) |= (1ul << SHAMT(n,b)); } SI sz GET_LEAF (np n ) { return !GET_BIT(n, 0); } SI sz GET_FULL (np n ) { return GET_BIT(n, 1); } SI sz GET_VALID(np n ) { return n == TN || !GET_LEAF(PARENT(n)); } SI void SET_LEAF (np n, sz v) { !v ? SET_BIT(n, 0) : CLR_BIT(n, 0); } SI void SET_FULL (np n, sz v) { v ? SET_BIT(n, 1) : CLR_BIT(n, 1); } static char *NODE2ADDR(np n) { size_t base = 0; int i = NODE2RANK(n) + RANK_0_LOG; while (n != TN) { base |= ISRIGHT(n) << i; n = PARENT(n); i++; } return pool + base; } static np ADDR2NODE(char *key) { // We start at the second-highest rank because we are always checking the // key against the midpoint of the current-rank node, which midpoint is // calculated using the size of the next-smallest-rank node. size_t rank = RANKS - 2; np n = TN; char *base = pool; while (!GET_LEAF(n)) { if (key - base >= (signed)RANK2BYTES(rank)) { n = RLINK(n); base += RANK2BYTES(rank); } else { n = LLINK(n); } rank--; } // Now we are at a leaf. Either it matches our key, or we bail. if (key != base) abort(); return n; } // ----------------------------------------------------------------------------- static void buddy_splitnode(np n, sz rank) { assert(("node is legally splittable", GET_LEAF(n))); // remove one node of our rank and create two of a smaller rank if (!GET_FULL(n)) SET_COUNT(rank, GET_COUNT(rank) - 1); SET_COUNT(rank - 1, GET_COUNT(rank - 1) + 2); SET_LEAF(n,0); // Both children are already empty leaf nodes, either because they have // never been written, or because buddy_{free,realloc} left them that way. } static void *buddy_alloc(size_t rank) { for (size_t r = rank; r < RANKS; r++) { if (!GET_COUNT(r)) continue; size_t rp = IRANK(r); size_t first = (1ul << rp) - 1; for (np n = first; n < first + (1ul << rp); n++) { if (!GET_VALID(n) || !GET_LEAF(n) || GET_FULL(n)) continue; while (r > rank) { buddy_splitnode(n, r--); n = LLINK(n); } SET_FULL(n,1); SET_COUNT(r, GET_COUNT(r) - 1); return NODE2ADDR(n); } } // if we make it here, there were no nodes of appropriate size or larger errno = ENOMEM; return NULL; } // ----------------------------------------------------------------------------- // External API void *buddy_malloc(size_t size) { return buddy_alloc(SIZE2RANK(size)); } void *buddy_calloc(size_t nelem, size_t width) { return memset(buddy_malloc(nelem * width), 0, nelem * width); } void buddy_free(void *orig) { if (!orig) return; np n = ADDR2NODE(orig); assert(("freeing node is a leaf", GET_LEAF(n) == 1)); SET_FULL(n,0); size_t rank = NODE2RANK(n); assert(("rank is valid", rank < RANKS && rank >= 0)); SET_COUNT(rank, GET_COUNT(rank) + 1); // mark self free assert(("count is legal", GET_COUNT(rank) < (1ul << (IRANK(rank) + 1)))); if (n == TN) return; np p = PARENT(n); np b = SIBLING(n); while (GET_LEAF(b) && !GET_FULL(b)) { // TODO write this recursively too ? assert(("count will not underflow", GET_COUNT(rank) >= 2)); SET_COUNT(rank, GET_COUNT(rank) - 2); // b and n are disappearing assert(("count legal", GET_COUNT(rank + 1) < (2u << IRANK(rank + 1)))); SET_COUNT(rank + 1, GET_COUNT(rank + 1) + 1); // parent appears as leaf rank++; SET_LEAF(p,1); SET_FULL(p,0); n = p; if (n == TN) break; p = PARENT(n); assert(("parent is valid", GET_VALID(p))); b = SIBLING(n); assert(("sibling is valid", GET_VALID(b))); } } void *buddy_realloc(void *orig, size_t size) { if (!size) { buddy_free(orig); return NULL; } if (!orig) return buddy_malloc(size); void *what = orig; np n = ADDR2NODE(what); size_t rank = NODE2RANK(n); size_t max = RANK2BYTES(rank); if (size > max) { if (n == TN || size >= pool_size) { errno = ENOMEM; return NULL; } #if !NO_PROMOTE // Without the ability to promote and demote existing allocation sizes, // we drop from 70% utilisation to 50%. Demotion seems more important // than promotion, but that may be an effect of the allocation pattern. size_t get = max; np p = PARENT(n), b = SIBLING(n); while (GET_LEAF(b) && !GET_FULL(b) && (get <<= 1) < size && p != TN) { b = SIBLING(p); p = PARENT(p); } if (get >= pool_size) { errno = ENOMEM; return NULL; } if (get >= size) { // can promote existing allocation do { np q = PARENT(n); SET_LEAF(q,1); SET_FULL(q,1); // Set node empty even though invalid, so future allocations // can depend on it without setting it explicitly. SET_FULL(n,0); n = q; // coalescing a full and an empty SET_COUNT(rank, GET_COUNT(rank) - 1); rank++; } while (n != p); what = NODE2ADDR(n); memmove(what, orig, max); } else #endif { what = buddy_alloc(SIZE2RANK(size)); if (!what) return NULL; // ENOMEM is already set memcpy(what, orig, max); buddy_free(orig); } } #if !NO_DEMOTE else while (rank > 0 && size <= (max >>= 1)) { // demote node buddy_splitnode(n, rank--); // claim the left child of the newly split node n = LLINK(n); SET_FULL(n,1); SET_COUNT(rank, GET_COUNT(rank) - 1); } #endif return what; }