/*- * See the file LICENSE for redistribution information. * * Copyright (c) 1996,2007 Oracle. All rights reserved. * * $Id: env_alloc.c,v 12.17 2007/05/22 18:35:39 ubell Exp $ */ #include "db_config.h" #include "db_int.h" /* * Implement shared memory region allocation. The initial list is a single * memory "chunk" which is carved up as memory is requested. Chunks are * coalesced when free'd. We maintain two linked-lists of chunks: a list of * all chunks sorted by address, and a list of free chunks sorted by size. * * The ALLOC_LAYOUT structure is the governing structure for the chunk. * * The ALLOC_ELEMENT structure is the structure that describes any single * chunk of memory, and is immediately followed by the user's memory. * * The internal memory chunks are always aligned to a uintmax_t boundary so * we don't drop core accessing the fields of the ALLOC_ELEMENT structure. * * The memory chunks returned to the user are aligned to a uintmax_t boundary. * This is enforced by terminating the ALLOC_ELEMENT structure with a uintmax_t * field as that immediately precedes the user's memory. * * Any caller needing more than uintmax_t alignment is responsible for doing * the work themselves. */ typedef struct __alloc_layout { SH_TAILQ_HEAD(__addrq) addrq; /* Sorted by address */ SH_TAILQ_HEAD(__sizeq) sizeq; /* Sorted by size */ #ifdef HAVE_STATISTICS u_int32_t success; /* Successful allocations */ u_int32_t failure; /* Failed allocations */ u_int32_t freed; /* Free calls */ u_int32_t longest; /* Longest chain walked */ #ifdef __ALLOC_DISPLAY_ALLOCATION_SIZES u_int32_t pow2_size[32]; /* Allocation size tracking */ #endif #endif uintmax_t unused; /* Guarantee alignment */ } ALLOC_LAYOUT; typedef struct __alloc_element { SH_TAILQ_ENTRY addrq; /* List by address */ SH_TAILQ_ENTRY sizeq; /* List by size */ /* * The "len" field is the total length of the chunk, not the size * available to the caller. */ size_t len; /* Chunk length */ /* * The "ulen" field is the length returned to the caller. * * Set to 0 if the chunk is not currently in use. */ uintmax_t ulen; /* User's length */ } ALLOC_ELEMENT; /* * __env_alloc_init -- * Initialize the area as one large chunk. * * PUBLIC: void __env_alloc_init __P((REGINFO *, size_t)); */ void __env_alloc_init(infop, size) REGINFO *infop; size_t size; { DB_ENV *dbenv; ALLOC_ELEMENT *elp; ALLOC_LAYOUT *head; dbenv = infop->dbenv; /* No initialization needed for heap memory regions. */ if (F_ISSET(dbenv, DB_ENV_PRIVATE)) return; /* * The first chunk of memory is the ALLOC_LAYOUT structure. */ head = infop->addr; SH_TAILQ_INIT(&head->addrq); SH_TAILQ_INIT(&head->sizeq); STAT((head->success = head->failure = head->freed = 0)); COMPQUIET(head->unused, 0); /* * The rest of the memory is the first available chunk. */ elp = (ALLOC_ELEMENT *)((u_int8_t *)head + sizeof(ALLOC_LAYOUT)); elp->len = size - sizeof(ALLOC_LAYOUT); elp->ulen = 0; SH_TAILQ_INSERT_HEAD(&head->addrq, elp, addrq, __alloc_element); SH_TAILQ_INSERT_HEAD(&head->sizeq, elp, sizeq, __alloc_element); } /* * The length, the ALLOC_ELEMENT structure and an optional guard byte, * rounded up to standard alignment. */ #ifdef DIAGNOSTIC #define DB_ALLOC_SIZE(len) \ (size_t)DB_ALIGN((len) + sizeof(ALLOC_ELEMENT) + 1, sizeof(uintmax_t)) #else #define DB_ALLOC_SIZE(len) \ (size_t)DB_ALIGN((len) + sizeof(ALLOC_ELEMENT), sizeof(uintmax_t)) #endif /* * __env_alloc_overhead -- * Return the overhead needed for an allocation. * * PUBLIC: size_t __env_alloc_overhead __P((void)); */ size_t __env_alloc_overhead() { return (sizeof(ALLOC_ELEMENT)); } /* * __env_alloc_size -- * Return the space needed for an allocation, including alignment. * * PUBLIC: size_t __env_alloc_size __P((size_t)); */ size_t __env_alloc_size(len) size_t len; { return (DB_ALLOC_SIZE(len)); } /* * __env_alloc -- * Allocate space from the shared region. * * PUBLIC: int __env_alloc __P((REGINFO *, size_t, void *)); */ int __env_alloc(infop, len, retp) REGINFO *infop; size_t len; void *retp; { ALLOC_ELEMENT *elp, *frag, *elp_tmp; ALLOC_LAYOUT *head; DB_ENV *dbenv; size_t total_len; u_int8_t *p; int ret; #ifdef HAVE_STATISTICS u_long st_search; #endif dbenv = infop->dbenv; *(void **)retp = NULL; /* * In a heap-backed environment, we call malloc for additional space. * (Malloc must return memory correctly aligned for our use.) * * In a heap-backed environment, memory is laid out as follows: * * { size_t total-length } { user-memory } { guard-byte } */ if (F_ISSET(dbenv, DB_ENV_PRIVATE)) { /* Check if we're over the limit. */ if (infop->allocated >= infop->max_alloc) return (ENOMEM); /* We need an additional size_t to hold the length. */ len += sizeof(size_t); #ifdef DIAGNOSTIC /* Plus one byte for the guard byte. */ ++len; #endif /* Allocate the space. */ if ((ret = __os_malloc(dbenv, len, &p)) != 0) return (ret); infop->allocated += len; *(size_t *)p = len; #ifdef DIAGNOSTIC p[len - 1] = GUARD_BYTE; #endif *(void **)retp = p + sizeof(size_t); return (0); } head = infop->addr; total_len = DB_ALLOC_SIZE(len); #ifdef __ALLOC_DISPLAY_ALLOCATION_SIZES STAT((++head->pow2_size[__db_log2(len) - 1])); #endif /* * If the chunk can be split into two pieces, with the fragment holding * at least 64 bytes of memory, we divide the chunk into two parts. */ #define SHALLOC_FRAGMENT (sizeof(ALLOC_ELEMENT) + 64) /* * Walk the size queue, looking for the smallest slot that satisfies * the request. */ elp = NULL; STAT((st_search = 0)); SH_TAILQ_FOREACH(elp_tmp, &head->sizeq, sizeq, __alloc_element) { STAT((++st_search)); if (elp_tmp->len < total_len) break; elp = elp_tmp; /* * We might have a long list of chunks of the same size. Stop * looking if we won't fragment memory by picking the current * slot. */ if (elp->len - total_len <= SHALLOC_FRAGMENT) break; } #ifdef HAVE_STATISTICS if (head->longest < st_search) head->longest = st_search; #endif /* * If we don't find an element of the right size, we're done. */ if (elp == NULL) { STAT((++head->failure)); return (ENOMEM); } STAT((++head->success)); /* Pull the chunk off of the size queue. */ SH_TAILQ_REMOVE(&head->sizeq, elp, sizeq, __alloc_element); if (elp->len - total_len > SHALLOC_FRAGMENT) { frag = (ALLOC_ELEMENT *)((u_int8_t *)elp + total_len); frag->len = elp->len - total_len; frag->ulen = 0; elp->len = total_len; /* The fragment follows the chunk on the address queue. */ SH_TAILQ_INSERT_AFTER( &head->addrq, elp, frag, addrq, __alloc_element); /* * Insert the fragment into the appropriate place in the * size queue. */ SH_TAILQ_FOREACH(elp_tmp, &head->sizeq, sizeq, __alloc_element) if (elp_tmp->len < frag->len) break; if (elp_tmp == NULL) SH_TAILQ_INSERT_TAIL(&head->sizeq, frag, sizeq); else SH_TAILQ_INSERT_BEFORE(&head->sizeq, elp_tmp, frag, sizeq, __alloc_element); } p = (u_int8_t *)elp + sizeof(ALLOC_ELEMENT); elp->ulen = len; #ifdef DIAGNOSTIC p[len] = GUARD_BYTE; #endif *(void **)retp = p; return (0); } /* * __env_alloc_free -- * Free space into the shared region. * * PUBLIC: void __env_alloc_free __P((REGINFO *, void *)); */ void __env_alloc_free(infop, ptr) REGINFO *infop; void *ptr; { ALLOC_ELEMENT *elp, *elp_tmp; ALLOC_LAYOUT *head; DB_ENV *dbenv; size_t len; u_int8_t *p; dbenv = infop->dbenv; /* In a private region, we call free. */ if (F_ISSET(dbenv, DB_ENV_PRIVATE)) { /* Find the start of the memory chunk and its length. */ p = (u_int8_t *)((size_t *)ptr - 1); len = *(size_t *)p; infop->allocated -= len; #ifdef DIAGNOSTIC /* Check the guard byte. */ DB_ASSERT(dbenv, p[len - 1] == GUARD_BYTE); /* Trash the memory chunk. */ memset(p, CLEAR_BYTE, len); #endif __os_free(dbenv, p); return; } head = infop->addr; STAT((++head->freed)); p = ptr; elp = (ALLOC_ELEMENT *)(p - sizeof(ALLOC_ELEMENT)); #ifdef DIAGNOSTIC /* Check the guard byte. */ DB_ASSERT(dbenv, p[elp->ulen] == GUARD_BYTE); /* Trash the memory chunk. */ memset(p, CLEAR_BYTE, elp->len - sizeof(ALLOC_ELEMENT)); #endif /* Mark the memory as no longer in use. */ elp->ulen = 0; /* * Try and merge this chunk with chunks on either side of it. Two * chunks can be merged if they're contiguous and not in use. */ if ((elp_tmp = SH_TAILQ_PREV(&head->addrq, elp, addrq, __alloc_element)) != NULL && elp_tmp->ulen == 0 && (u_int8_t *)elp_tmp + elp_tmp->len == (u_int8_t *)elp) { /* * If we're merging the entry into a previous entry, remove the * current entry from the addr queue and the previous entry from * the size queue, and merge. */ SH_TAILQ_REMOVE(&head->addrq, elp, addrq, __alloc_element); SH_TAILQ_REMOVE(&head->sizeq, elp_tmp, sizeq, __alloc_element); elp_tmp->len += elp->len; elp = elp_tmp; } if ((elp_tmp = SH_TAILQ_NEXT(elp, addrq, __alloc_element)) != NULL && elp_tmp->ulen == 0 && (u_int8_t *)elp + elp->len == (u_int8_t *)elp_tmp) { /* * If we're merging the current entry into a subsequent entry, * remove the subsequent entry from the addr and size queues * and merge. */ SH_TAILQ_REMOVE(&head->addrq, elp_tmp, addrq, __alloc_element); SH_TAILQ_REMOVE(&head->sizeq, elp_tmp, sizeq, __alloc_element); elp->len += elp_tmp->len; } /* Find the correct slot in the size queue. */ SH_TAILQ_FOREACH(elp_tmp, &head->sizeq, sizeq, __alloc_element) if (elp->len >= elp_tmp->len) break; if (elp_tmp == NULL) SH_TAILQ_INSERT_TAIL(&head->sizeq, elp, sizeq); else SH_TAILQ_INSERT_BEFORE(&head->sizeq, elp_tmp, elp, sizeq, __alloc_element); } #ifdef HAVE_STATISTICS /* * __env_alloc_print -- * Display the lists of memory chunks. * * PUBLIC: void __env_alloc_print __P((REGINFO *, u_int32_t)); */ void __env_alloc_print(infop, flags) REGINFO *infop; u_int32_t flags; { #ifdef __ALLOC_DISPLAY_ALLOCATION_LISTS ALLOC_ELEMENT *elp; #endif ALLOC_LAYOUT *head; DB_ENV *dbenv; #ifdef __ALLOC_DISPLAY_ALLOCATION_SIZES DB_MSGBUF mb; int i; #endif dbenv = infop->dbenv; head = infop->addr; if (F_ISSET(dbenv, DB_ENV_PRIVATE)) return; __db_msg(dbenv, "Region allocations: %lu allocations, %lu failures, %lu frees, %lu longest", (u_long)head->success, (u_long)head->failure, (u_long)head->freed, (u_long)head->longest); if (!LF_ISSET(DB_STAT_ALL)) return; #ifdef __ALLOC_DISPLAY_ALLOCATION_SIZES /* * We don't normally display the allocation history by size, it's too * expensive to calculate. */ DB_MSGBUF_INIT(&mb); for (i = 0; i < 32; ++i) __db_msgadd(dbenv, &mb, "%s%2d/%lu", i == 0 ? "Allocations by power-of-two sizes: " : ", ", i + 1, (u_long)head->pow2_size[i]); DB_MSGBUF_FLUSH(dbenv, &mb); #endif #ifdef __ALLOC_DISPLAY_ALLOCATION_LISTS /* * We don't normally display the list of address/chunk pairs, a few * thousand lines of output is too voluminous for even DB_STAT_ALL. */ __db_msg(dbenv, "Allocation list by address: {chunk length, user length}"); SH_TAILQ_FOREACH(elp, &head->addrq, addrq, __alloc_element) __db_msg(dbenv, "\t%#lx {%lu, %lu}", P_TO_ULONG(elp), (u_long)elp->len, (u_long)elp->ulen); __db_msg(dbenv, "Allocation free list by size: {chunk length}"); SH_TAILQ_FOREACH(elp, &head->sizeq, sizeq, __alloc_element) __db_msg(dbenv, "\t%#lx {%lu}", P_TO_ULONG(elp), (u_long)elp->len); #endif return; } #endif