From 51e4b8d7068a3489fb5cb45a8ebcc20036fd0d35 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Wed, 11 Apr 2007 00:30:34 +0000 Subject: r12338@catbus: nickm | 2007-04-10 20:29:05 -0400 Document memory pool implementation, and tweak it even mor. See? Programming is fun. svn:r9940 --- ChangeLog | 2 + doc/TODO | 7 +- src/common/mempool.c | 247 +++++++++++++++++++++++++++++++++++++-------------- src/common/mempool.h | 27 ++++-- src/or/relay.c | 14 +-- src/or/test.c | 4 +- 6 files changed, 219 insertions(+), 82 deletions(-) diff --git a/ChangeLog b/ChangeLog index 3127ecdeb..7b90d0b14 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,6 +6,8 @@ Changes in version 0.2.0.1-alpha - 2007-??-?? queue for each circuit. This lets us use less slack memory, and will eventually let us be smarter about prioritizing different kinds of traffic. + - Allocate cells in memory pools better speed and memory efficiency, + especially on platforms where malloc() is inefficient. - Stop reading on edge connections when their corresponding circuit buffers are full; start again as the circuits empty out. diff --git a/doc/TODO b/doc/TODO index b3daeb59f..2087b7600 100644 --- a/doc/TODO +++ b/doc/TODO @@ -75,7 +75,12 @@ Things we'd like to do in 0.2.0.x: [No need to do this: the edge-connection choking will already take care of this a bit, and rewriting the 'bridged connection' code to not use socketpairs will give us even more control.] - - Do we switch to arena-allocation for cells? + . Do we switch to pool-allocation for cells? + o Implement pool-allocation + o Have Tor use it for packed cells. + o Document it. + - Do something smart with freeing unused chunks. + - Benchmark pool-allocation vs straightforward malloc. - Can we stop doing so many memcpys on cells? o Also, only package data from exitconns when there is space on the target OR conn's outbuf? or when the circuit is not too full. diff --git a/src/common/mempool.c b/src/common/mempool.c index 39bcbfc70..b3c375253 100644 --- a/src/common/mempool.c +++ b/src/common/mempool.c @@ -1,6 +1,6 @@ /* Copyright 2007 Nick Mathewson */ /* See LICENSE for licensing information */ -/* $Id: /tor/trunk/src/common/util.c 12153 2007-03-12T03:11:12.797278Z nickm $ */ +/* $Id$ */ #include #include @@ -8,10 +8,48 @@ #include "mempool.h" /* OVERVIEW: - * DOCDOC - */ - -/* DRAWBACKS: + * + * This is an implementation of memory pools for Tor cells. It may be + * useful for you too. + * + * Generally, a memory pool is an allocation strategy optimized for large + * numbers of identically-sized objects. Rather than the elaborate arena + * and coalescing strategeis you need to get good performance for a + * general-purpose malloc(), pools use a series of large memory "chunks", + * each of which is carved into a bunch of smaller "items" or + * "allocations". + * + * To get decent performance, you need to: + * - Minimize the number of times you hit the underlying allocator. + * - Try to keep accesses as local in memory as possible. + * - Try to keep the common case fast. + * + * Our implementation uses three lists of chunks per pool. Each chunk can + * be either "full" (no more room for items); "empty" (no items); or + * "used" (not full, not empty). There are independent doubly-linked + * lists for each state. + * + * CREDIT: + * + * I wrote this after looking at 3 or 4 other pooling allocators, but + * without copying. The strategy this most resembles (which is funny, + * since that's the one I looked at longest ago) the pool allocator + * underlying Python's obmalloc code. Major differences from obmalloc's + * pools are: + * - We don't even try to be threadsafe. + * - We only handle objects of one size. + * - Our list of empty chunks is doubly-linked, not singly-linked. + * (This could change pretty easily; it's only doubly-linked for + * consistency.) + * - We keep a list of full chunks (so we can have a "nuke everything" + * function). Obmalloc's pools leave full chunks to float unanchored. + * + * [XXXX020 Another way to support 'nuke everything' would be to keep + * _all_ the chunks in a doubly-linked-list. This would have more + * space overhead per chunk, but less pointer manipulation overhead + * than the current approach.] + * + * LIMITATIONS: * - Not even slightly threadsafe. * - Likes to have lots of items per chunks. * - One pointer overhead per allocated thing. (The alternative is @@ -24,12 +62,6 @@ * - probably, chunks should always be a power of 2. */ -/* NOTES: - * - The algorithm is similar to the one used by Python, but assumes that - * we'll know in advance which objects we want to pool, and doesn't - * try to handle a zillion objects of weird different sizes. - */ - #if 1 /* Tor dependencies */ #include "orconfig.h" @@ -39,8 +71,12 @@ #define ALLOC(x) tor_malloc(x) #define FREE(x) tor_free(x) #define ASSERT(x) tor_assert(x) +#undef ALLOC_CAN_RETURN_NULL /* End Tor dependencies */ #else +/* If you're not building this as part of Tor, you'll want to define the + * following macros. For now, these should do as defaults. + */ #include #define PREDICT_UNLIKELY(x) (x) #define PREDICT_LIKELY(x) (x) @@ -49,63 +85,90 @@ #define STRUCT_OFFSET(tp, member) \ ((off_t) (((char*)&((tp*)0)->member)-(char*)0)) #define ASSERT(x) assert(x) +#define ALLOC_CAN_RETURN_NULL #endif /* Tuning parameters */ -/** DOCDOC */ +/** Largest type that we need to ensure returned memory items are aligned to. + * Change this to "double" if we need to be safe for structs with doubles. */ #define ALIGNMENT_TYPE void * -/** DOCDOC */ -#define ALIGNMENT sizeof(void*) -/** DOCDOC */ +/** Increment that we need to align allocated */ +#define ALIGNMENT sizeof(ALIGNMENT_TYPE) +/** Largest memory chunk that we should allocate. */ #define MAX_CHUNK (8*(1L<<20)) -/** DOCDOC */ +/** Smallest memory chunk size that we should allocate. */ #define MIN_CHUNK 4096 typedef struct mp_allocated_t mp_allocated_t; +typedef struct mp_chunk_t mp_chunk_t; -/** DOCDOC */ +/** Holds a single allocated item, allocated as part of a chunk. */ struct mp_allocated_t { + /** The chunk that this item is allocated in. This adds overhead to each + * allocated item, thus making this implementation inappropriate for + * very small items. */ mp_chunk_t *in_chunk; union { + /** If this item is free, the next item on the free list. */ mp_allocated_t *next_free; + /** If this item is not free, the actual memory contents of this item. + * (Not actual size.) */ char mem[1]; + /** An extra element to the union to insure correct alignment. */ ALIGNMENT_TYPE _dummy; }; }; -/** DOCDOC */ +/** 'Magic' value used to detect memory corruption. */ +#define MP_CHUNK_MAGIC 0x09870123 + +/** A chunk of memory. Chunks come from malloc; we use them */ struct mp_chunk_t { - unsigned long magic; - mp_chunk_t *next; - mp_chunk_t *prev; - mp_pool_t *pool; + unsigned long magic; /**< Must be MP_CHUNK_MAGIC if this chunk is valid. */ + mp_chunk_t *next; /**< The next free, used, or full chunk in sequence. */ + mp_chunk_t *prev; /**< The previous free, used, or full chunk in sequence. */ + mp_pool_t *pool; /**< The pool that this chunk is part of */ + /** First free item in the freelist for this chunk. Note that this may be + * NULL even if this chunk is not at capacity: if so, the free memory at + * next_mem has not yet been carved into items. + */ mp_allocated_t *first_free; - int n_allocated; - int capacity; - size_t mem_size; - char *next_mem; - char mem[1]; + int n_allocated; /**< Number of currently allocated items in this chunk */ + int capacity; /**< Largest number of items that can be fit into this chunk */ + size_t mem_size; /**< Number of usable bytes in mem. */ + char *next_mem; /**< Pointer into part of mem not yet carved up. */ + char mem[1]; /**< Storage for this chunk. (Not actual size.) */ }; -/** DOCDOC */ -#define MP_CHUNK_MAGIC 0x09870123 -/** DOCDOC */ +/** Number of extra bytes needed beyond mem_size to allocate a chunk. */ #define CHUNK_OVERHEAD (sizeof(mp_chunk_t)-1) -/** DOCDOC */ +/** Given a pointer to a mp_allocated_t, return a pointer to the memory + * item it holds. */ #define A2M(a) (&(a)->mem[0]) -/** DOCDOC */ +/** Given a pointer to a memory_item_t, return a pointer to its enclosing + * mp_allocated_t. */ #define M2A(p) ( ((char*)p) - STRUCT_OFFSET(mp_allocated_t, mem) ) -/* INVARIANT: every chunk can hold 2 or more items. */ +#ifdef ALLOC_CAN_RETURN_NULL +/** If our ALLOC() macro can return NULL, check whether x is NULL, + * and if so, return NULL. */ +#define CHECK_ALLOC(x) \ + if (PREDICT_UNLIKELY(!x)) { return NULL; } +#else +/** If our ALLOC() macro can't return NULL, do nothing. */ +#define CHECK_ALLOC(x) +#endif -/** DOCDOC */ +/** Helper: Allocate and return a new memory chunk for pool. Does not + * link the chunk into any list. */ static mp_chunk_t * mp_chunk_new(mp_pool_t *pool) { size_t sz = pool->new_chunk_capacity * pool->item_alloc_size; mp_chunk_t *chunk = ALLOC(CHUNK_OVERHEAD + sz); + CHECK_ALLOC(chunk); memset(chunk, 0, sizeof(mp_chunk_t)); /* Doesn't clear the whole thing. */ chunk->magic = MP_CHUNK_MAGIC; chunk->capacity = pool->new_chunk_capacity; @@ -115,29 +178,44 @@ mp_chunk_new(mp_pool_t *pool) return chunk; } -/** DOCDOC */ +/** Return an newly allocated item from pool. */ void * mp_pool_get(mp_pool_t *pool) { mp_chunk_t *chunk; mp_allocated_t *allocated; + if (PREDICT_LIKELY(pool->used_chunks != NULL)) { + /* Common case: there is some chunk that is neither full nor empty. Use + * that one. (We can't use the full ones, obviously, and we should fill + * up the used ones before we start on any empty ones. */ chunk = pool->used_chunks; + } else if (pool->empty_chunks) { - /* Put the most recently emptied chunk on the used list. */ + /* We have no used chunks, but we have an empty chunk that we haven't + * freed yet: use that. (We pull from the front of the list, which should + * get us the most recently emptied chunk.) */ chunk = pool->empty_chunks; + + /* Remove the chunk from the empty list. */ pool->empty_chunks = chunk->next; if (chunk->next) chunk->next->prev = NULL; + + /* Put the chunk on the 'used' list*/ chunk->next = pool->used_chunks; if (chunk->next) chunk->next->prev = chunk; pool->used_chunks = chunk; + ASSERT(!chunk->prev); --pool->n_empty_chunks; } else { - /* Allocate a new chunk and add it to the used list. */ + /* We have no used or empty chunks: allocate a new chunk. */ chunk = mp_chunk_new(pool); + CHECK_ALLOC(chunk); + + /* Add the new chunk to the used list. */ chunk->next = pool->used_chunks; if (chunk->next) chunk->next->prev = chunk; @@ -148,40 +226,52 @@ mp_pool_get(mp_pool_t *pool) ASSERT(chunk->n_allocated < chunk->capacity); if (chunk->first_free) { + /* If there's anything on the chunk's freelist, unlink it and use it. */ allocated = chunk->first_free; chunk->first_free = allocated->next_free; - allocated->next_free = NULL; /* debugging */ + allocated->next_free = NULL; /* For debugging; not really needed. */ + ASSERT(allocated->in_chunk == chunk); } else { + /* Otherwise, the chunk had better have some free space left on it. */ ASSERT(chunk->next_mem + pool->item_alloc_size <= chunk->mem + chunk->mem_size); + + /* Good, it did. Let's carve off a bit of that free space, and use + * that. */ allocated = (void*)chunk->next_mem; chunk->next_mem += pool->item_alloc_size; allocated->in_chunk = chunk; + allocated->next_free = NULL; /* For debugging; not really needed. */ } ++chunk->n_allocated; + if (PREDICT_UNLIKELY(chunk->n_allocated == chunk->capacity)) { - /* This is now a full chunk. */ + /* This chunk just became full. */ ASSERT(chunk == pool->used_chunks); ASSERT(chunk->prev == NULL); + + /* Take it off the used list. */ pool->used_chunks = chunk->next; if (chunk->next) chunk->next->prev = NULL; + /* Put it on the full list. */ chunk->next = pool->full_chunks; if (chunk->next) chunk->next->prev = chunk; pool->full_chunks = chunk; } + /* And return the memory portion of the mp_allocated_t. */ return A2M(allocated); } -/** DOCDOC */ +/** Return an allocated memory item to its memory pool. */ void -mp_pool_release(void *_item) +mp_pool_release(void *item) { - mp_allocated_t *allocated = (void*) M2A(_item); + mp_allocated_t *allocated = (void*) M2A(item); mp_chunk_t *chunk = allocated->in_chunk; ASSERT(chunk); @@ -194,7 +284,7 @@ mp_pool_release(void *_item) if (PREDICT_UNLIKELY(chunk->n_allocated == chunk->capacity)) { /* This chunk was full and is about to be used. */ mp_pool_t *pool = chunk->pool; - /* unlink from full */ + /* unlink from the full list */ if (chunk->prev) chunk->prev->next = chunk->next; if (chunk->next) @@ -202,7 +292,7 @@ mp_pool_release(void *_item) if (chunk == pool->full_chunks) pool->full_chunks = chunk->next; - /* link to used */ + /* link to the used list. */ chunk->next = pool->used_chunks; chunk->prev = NULL; if (chunk->next) @@ -211,7 +301,8 @@ mp_pool_release(void *_item) } else if (PREDICT_UNLIKELY(chunk->n_allocated == 1)) { /* This was used and is about to be empty. */ mp_pool_t *pool = chunk->pool; - /* unlink from used */ + + /* Unlink from the used list */ if (chunk->prev) chunk->prev->next = chunk->next; if (chunk->next) @@ -219,23 +310,26 @@ mp_pool_release(void *_item) if (chunk == pool->used_chunks) pool->used_chunks = chunk->next; - /* link to empty */ + /* Link to the empty list */ chunk->next = pool->empty_chunks; chunk->prev = NULL; if (chunk->next) chunk->next->prev = chunk; pool->empty_chunks = chunk; - /* reset guts to defragment this chunk. */ + /* Reset the guts of this chunk to defragment it, in case it gets + * used again. */ chunk->first_free = NULL; chunk->next_mem = chunk->mem; ++pool->n_empty_chunks; } + --chunk->n_allocated; } -/** DOCDOC */ +/** Allocate a new memory pool to hold items of size item_size. We'll + * try to fit about chunk_capacity items in each chunk. */ mp_pool_t * mp_pool_new(size_t item_size, size_t chunk_capacity) { @@ -243,29 +337,35 @@ mp_pool_new(size_t item_size, size_t chunk_capacity) size_t alloc_size; pool = ALLOC(sizeof(mp_pool_t)); + CHECK_ALLOC(pool); memset(pool, 0, sizeof(mp_pool_t)); - /* First, minimal size with overhead. */ + /* First, we figure out how much space to allow per item. We'll want to + * use make sure we have enough for the overhead plus the item size. */ alloc_size = STRUCT_OFFSET(mp_allocated_t, mem) + item_size; + /* If the item_size is less than sizeof(next_free), we need to make + * the allocation bigger. */ if (alloc_size < sizeof(mp_allocated_t)) alloc_size = sizeof(mp_allocated_t); - /* Then, round up to alignment. */ + /* If we're not an even multiple of ALIGNMENT, round up. */ if (alloc_size % ALIGNMENT) { alloc_size = alloc_size + ALIGNMENT - (alloc_size % ALIGNMENT); } if (alloc_size < ALIGNMENT) alloc_size = ALIGNMENT; - ASSERT((alloc_size % ALIGNMENT) == 0); + /* Now we figure out how many items fit in each chunk. We need to fit at + * least 2 items per chunk. No chunk can be more than MAX_CHUNK bytes long, + * or less than MIN_CHUNK. */ + /* XXXX020 Try a bit harder here: we want to be a bit less than a power of + 2, not a bit over. */ if (chunk_capacity > MAX_CHUNK) chunk_capacity = MAX_CHUNK; - if (chunk_capacity < alloc_size * 2 + CHUNK_OVERHEAD) chunk_capacity = alloc_size * 2 + CHUNK_OVERHEAD; - - if (chunk_capacity < MIN_CHUNK) /* Guess system page size. */ + if (chunk_capacity < MIN_CHUNK) chunk_capacity = MIN_CHUNK; pool->new_chunk_capacity = (chunk_capacity-CHUNK_OVERHEAD) / alloc_size; @@ -274,23 +374,33 @@ mp_pool_new(size_t item_size, size_t chunk_capacity) return pool; } -/** DOCDOC */ +/** If there are more than n empty chunks in pool, free the + * exces ones that have been empty for the longest. */ void -mp_pool_clean(mp_pool_t *pool) +mp_pool_clean(mp_pool_t *pool, int n) { - if (pool->empty_chunks) { - mp_chunk_t *next, *chunk = pool->empty_chunks->next; - while (chunk) { - next = chunk->next; - FREE(chunk); - chunk = next; - } - pool->empty_chunks->next = NULL; - pool->n_empty_chunks = 1; + mp_chunk_t *chunk, **first_to_free; + first_to_free = &pool->empty_chunks; + while (*first_to_free && n > 0) { + first_to_free = &(*first_to_free)->next; + --n; + } + if (!*first_to_free) + return; + + chunk = *first_to_free; + while (chunk) { + mp_chunk_t *next = chunk->next; + chunk->magic = 0xdeadbeef; + FREE(chunk); + --pool->n_empty_chunks; + chunk = next; } + + *first_to_free = NULL; } -/** DOCDOC */ +/** Helper: Given a list of chunks, free all the chunks in the list. */ static void destroy_chunks(mp_chunk_t *chunk) { @@ -303,7 +413,8 @@ destroy_chunks(mp_chunk_t *chunk) } } -/** DOCDOC */ +/** Free all space held in pool This makes all pointers returned from + * mp_pool_get(pool) invalid. */ void mp_pool_destroy(mp_pool_t *pool) { @@ -314,6 +425,7 @@ mp_pool_destroy(mp_pool_t *pool) FREE(pool); } +/** Helper: make sure that a given chunk list is not corrupt. */ static int assert_chunks_ok(mp_pool_t *pool, mp_chunk_t *chunk, int empty, int full) { @@ -353,6 +465,7 @@ assert_chunks_ok(mp_pool_t *pool, mp_chunk_t *chunk, int empty, int full) return n; } +/** Fail with an assertion if pool is not internally consistent. */ void mp_pool_assert_ok(mp_pool_t *pool) { diff --git a/src/common/mempool.h b/src/common/mempool.h index 2ee32fb13..a1bc3bd0d 100644 --- a/src/common/mempool.h +++ b/src/common/mempool.h @@ -1,6 +1,6 @@ /* Copyright 2007 Nick Mathewson */ /* See LICENSE for licensing information */ -/* $Id: /tor/trunk/src/common/util.c 12153 2007-03-12T03:11:12.797278Z nickm $ */ +/* $Id$ */ /** * \file util.h @@ -10,25 +10,38 @@ #ifndef MEMPOOL_H #define MEMPOOL_H +/** A memory pool is a context in which a large number of fixed-sized +* objects can be allocated efficiently. See mempool.c for implementation +* details. */ typedef struct mp_pool_t mp_pool_t; void *mp_pool_get(mp_pool_t *pool); void mp_pool_release(void *item); mp_pool_t *mp_pool_new(size_t item_size, unsigned int n_per_chunk); -void mp_pool_clean(mp_pool_t *pool); +void mp_pool_clean(mp_pool_t *pool, int n); void mp_pool_destroy(mp_pool_t *pool); void mp_pool_assert_ok(mp_pool_t *pool); #ifdef MEMPOOL_PRIVATE -typedef struct mp_chunk_t mp_chunk_t; +/* These declarations are only used by mempool.c and test.c */ -/** DOCDOC */ struct mp_pool_t { - mp_chunk_t *empty_chunks; - mp_chunk_t *used_chunks; - mp_chunk_t *full_chunks; + /** Doubly-linked list of chunks in which no items have been allocated. + * The front of the list is the most recently emptied chunk. */ + struct mp_chunk_t *empty_chunks; + /** Doubly-linked list of chunks in which some items have been allocated, + * but which are not yet full. The front of the list is the chunk that has + * most recently been modified. */ + struct mp_chunk_t *used_chunks; + /** Doubly-linked list of chunks in which no more items can be allocated. + * The front of the list is the chunk that has most recently become full. */ + struct mp_chunk_t *full_chunks; + /** Length of empty_chunks. */ int n_empty_chunks; + /** Size of each chunk (in items). */ int new_chunk_capacity; + /** Size to allocate for each item, including overhead and alignment + * padding. */ size_t item_alloc_size; }; #endif diff --git a/src/or/relay.c b/src/or/relay.c index 4fd664811..491b7a9ce 100644 --- a/src/or/relay.c +++ b/src/or/relay.c @@ -1479,8 +1479,8 @@ circuit_consider_sending_sendme(circuit_t *circ, crypt_path_t *layer_hint) #endif #ifdef ENABLE_CELL_POOL -static mp_pool_t *cell_pool; -/* DOCDOC */ +static mp_pool_t *cell_pool = NULL; +/** Allocate structures to hold cells. */ void init_cell_pool(void) { @@ -1488,7 +1488,7 @@ init_cell_pool(void) cell_pool = mp_pool_new(sizeof(packed_cell_t), 64); } -/* DOCDOC */ +/** Free all storage used to hold cells. */ void free_cell_pool(void) { @@ -1497,20 +1497,22 @@ free_cell_pool(void) cell_pool = NULL; } -/** Release storage held by cell */ +/** Release storage held by cell. */ static INLINE void packed_cell_free(packed_cell_t *cell) { mp_pool_release(cell); } -/* DOCDOC */ -static INLINE packed_cell_t* +/** Allocate and return a new packed_cell_t. */ +static INLINE packed_cell_t * packed_cell_alloc(void) { return mp_pool_get(cell_pool); } #else +/* ENABLE_CELL_POOL isn't defined: here are some stubs to use tor_malloc() + * and tor_free() instead. */ void init_cell_pool(void) { diff --git a/src/or/test.c b/src/or/test.c index 7294e413a..9b427cfa7 100644 --- a/src/or/test.c +++ b/src/or/test.c @@ -2142,13 +2142,15 @@ test_mempool(void) //mp_pool_assert_ok(pool); } if (crypto_rand_int(777)==0) - mp_pool_clean(pool); + mp_pool_clean(pool, 2); if (i % 777) mp_pool_assert_ok(pool); } SMARTLIST_FOREACH(allocated, void *, m, mp_pool_release(m)); mp_pool_assert_ok(pool); + mp_pool_clean(pool, 0); + mp_pool_assert_ok(pool); mp_pool_destroy(pool); smartlist_free(allocated); } -- cgit v1.2.3