[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: malloc and alignment
From: |
Stefan Monnier |
Subject: |
Re: malloc and alignment |
Date: |
Fri, 27 Jun 2003 19:17:38 -0400 |
> I must have been unclear. Rather than keep the markbit s part of the
> object,
> keep a separate bitmap. In order to find the bit in the bitmap for a
> given
> object, you need to find (from the object's pointer) both the base
> address of the bitmap and the index of the bit in the bitmap.
>
> I understand the problem--what I don't understand is this solution:
>
> This is typically done by allocating an array of objects, of size 2^N
> bytes,
> such that the base of the array can be found by clearing the low-order
> bits
> of the object's pointer.
>
> I will try to understand it.
>
> But a good implementation of memalign should work around this problem.
>
> I am not convinced the problem can be solved this way. Anyway,
> memalign is a C library function, and typically needs to be tied
> in with malloc. We can't expect Emacs to always use our memalign.
Indeed. And `memalign' is not a standard function anyway, so we
need to provide a fallback implementation anyway.
> The idea of allocating a much larger superblock big enough for 20 of
> these blocks, then distributing the blocks individually, could be a
> good solution. That only wastes 5% of the space at most. And if the
> superblock is treated by malloc as a large allocation, it will
> probably always have good enough alignment. So you could detect that
> case, and use all 20 of the parts instead of just 19 of them. That way,
> there is usually no waste.
That's what I implemented. See the patch below.
If memalign is available, then the code can use it. It seems that glibc
has a pretty good implementation of memalign, so it's worth the effort.
Here is the memory use breakdown for floats on x86 with glibc:
- with current code:
we malloc (1020) and get 84 Lisp_Floats (of size 12b)
- with new code, malloc:
we malloc (16384) and get 15 * 125 = 1875 Lisp_Floats (of size 8b)
- with new code, memalign:
we memalign (16380) and get 16 * 124 = 1984 Lisp_Floats (of size 8b)
In all three cases, malloc (or memalign) itself does not use up much
extra memory (in the case of memalign, it uses up a lot of memory
if we memalign (16384) which is why I only memalign (16380).
The actual number of bytes per float used up in the three cases is:
- 12.14
- 8.74
- 8.26
I'd like to use the same scheme for cons cells (so as to get rid of the
markbit in Lisp_Object), so it's important to stay as close to 8bytes
as possible since that's what cons cells currently use.
> I'd like to already install part of the patch below: the part that
> introduces a new `mark' field in every Lisp_Misc object (the field
> is 1-bit wide and does not increase the size of the objects since
> it is taken from explicit padding). Any objection ?
> Ok with me.
Installed, together with a patch that makes buffers use the `size'
field (like other vectorlike objects) rather than the `name' field.
Stefan
PS: This patch has not been tested. It's extracted from my code where it
seems to work fine, but I might have missed a dependency with some other
local changes.
Index: lisp.h
===================================================================
RCS file: /cvsroot/emacs/emacs/src/lisp.h,v
retrieving revision 1.458
diff -u -u -b -r1.458 lisp.h
--- lisp.h 26 Jun 2003 23:15:08 -0000 1.458
+++ lisp.h 27 Jun 2003 23:12:26 -0000
@@ -1282,8 +1216,6 @@
/* Lisp floating point type */
struct Lisp_Float
{
- Lisp_Object type; /* essentially used for mark-bit
- and chaining when on free-list */
#ifdef HIDE_LISP_IMPLEMENTATION
double data_;
#else
Index: alloc.c
===================================================================
RCS file: /cvsroot/emacs/emacs/src/alloc.c,v
retrieving revision 1.307
diff -u -u -b -r1.307 alloc.c
--- alloc.c 27 Jun 2003 22:54:26 -0000 1.307
+++ alloc.c 27 Jun 2003 23:06:03 -0000
@@ -19,6 +19,7 @@
#include <config.h>
#include <stdio.h>
+#include <limits.h>
#ifdef ALLOC_DEBUG
#undef INLINE
@@ -418,8 +424,8 @@
/* Value is SZ rounded up to the next multiple of ALIGNMENT.
ALIGNMENT must be a power of 2. */
-#define ALIGN(SZ, ALIGNMENT) \
- (((SZ) + (ALIGNMENT) - 1) & ~((ALIGNMENT) - 1))
+#define ALIGN(ptr, ALIGNMENT) \
+ ((void*) ((((EMACS_UINT)(ptr)) + (ALIGNMENT) - 1) & ~((ALIGNMENT) - 1)))
@@ -635,6 +641,211 @@
UNBLOCK_INPUT;
}
+/* Allocation of aligned blocks of memory to store Lisp data. */
+/* The entry point is lisp_align_malloc which returns blocks of at most */
+/* BLOCK_BYTES and guarantees they are aligned on a BLOCK_ALIGN boundary. */
+/* Define USE_MEMALIGN if `memalign' can be used (i.e. if it can free). */
+
+#define BLOCK_ALIGN 1024
+#define BLOCK_BYTES \
+ (BLOCK_ALIGN - sizeof (struct aligned_block *) - ABLOCKS_PADDING)
+
+/* Internal data structures and constants. */
+
+#ifdef USE_MEMALIGN
+#define IF_MEMALIGN(a,b) a
+#else
+#define IF_MEMALIGN(a,b) b
+#endif
+
+/* Padding to leave at the end of a malloc'd block. This is to give
+ malloc a chance to minimize the amount of memory wasted to alignment.
+ It should be tuned to the particular malloc library used.
+ The current setting is based on glibc-2.3.2. */
+#define ABLOCKS_PADDING IF_MEMALIGN (sizeof (void*), 0)
+#define ABLOCKS_SIZE 16
+
+/* An aligned block of memory. */
+struct ablock
+{
+ union
+ {
+ char payload[BLOCK_BYTES];
+ struct ablock *next_free;
+ } x;
+ /* `abase' is the aligned base of the ablocks. */
+ /* It is overloaded to hold the virtual `busy' field that counts
+ the number of used ablock in the parent ablocks.
+ The first ablock has the `busy' field, the others have the `abase'
+ field. To tell the difference, we assume that pointers will have
+ integer values larger than 2 * ABLOCKS_SIZE. The lowest bit of `busy'
+ is used to tell whether the real base of the parent ablocks is `abase'
+ (if not, the word before the first ablock holds a pointer to the
+ real base). */
+ struct ablocks *abase;
+ /* The padding of all but the last ablock is unused. The padding of
+ the last ablock in an ablocks is not allocated. */
+ char padding[ABLOCKS_PADDING];
+};
+
+/* A bunch of consecutive aligned blocks. */
+struct ablocks
+{
+ struct ablock blocks[ABLOCKS_SIZE];
+};
+
+/* Size of the block requested with malloc or memalign. */
+#define ABLOCKS_BYTES (sizeof (struct ablocks) - ABLOCKS_PADDING)
+
+#define ABLOCK_ABASE(block) \
+ (((unsigned long) (block)->abase) <= (1 + 2 * ABLOCKS_SIZE) \
+ ? (struct ablocks *)(block) \
+ : (block)->abase)
+
+/* Virtual `busy' field. */
+#define ABLOCKS_BUSY(abase) ((abase)->blocks[0].abase)
+
+/* Pointer to the (not necessarily aligned) malloc block. */
+#define ABLOCKS_BASE(abase) \
+ IF_MEMALIGN ((abase), \
+ (1 & (int) ABLOCKS_BUSY (abase) ? abase : ((void**)abase)[-1]))
+
+static struct ablock *free_ablock;
+
+/* Allocate an aligned block of nbytes.
+ Alignment is on a multiple of BLOCK_ALIGN and `nbytes' has to be
+ smaller or equal to BLOCK_BYTES. */
+static POINTER_TYPE *
+lisp_align_malloc (nbytes, type)
+ size_t nbytes;
+ enum mem_type type;
+{
+ void *base, *val;
+ struct ablocks *abase;
+
+ eassert (nbytes <= BLOCK_BYTES);
+
+ BLOCK_INPUT;
+
+#ifdef GC_MALLOC_CHECK
+ allocated_mem_type = type;
+#endif
+
+ if (!free_ablock)
+ {
+ int i, aligned;
+
+#ifdef DOUG_LEA_MALLOC
+ /* Prevent mmap'ing the chunk. Lisp data may not be mmap'ed
+ because mapped region contents are not preserved in
+ a dumped Emacs. */
+ mallopt (M_MMAP_MAX, 0);
+#endif
+
+ if (IF_MEMALIGN (1,0))
+ abase = base = memalign (BLOCK_ALIGN, ABLOCKS_BYTES);
+ else
+ {
+ base = malloc (ABLOCKS_BYTES);
+ abase = ALIGN (base, BLOCK_ALIGN);
+ }
+ aligned = (base == abase);
+ if (!aligned)
+ ((void**)abase)[-1] = base;
+
+#ifdef DOUG_LEA_MALLOC
+ /* Back to a reasonable maximum of mmap'ed areas. */
+ mallopt (M_MMAP_MAX, MMAP_MAX_AREAS);
+#endif
+
+ /* Initialize the blocks and put them on the free list.
+ Is `base' was not properly aligned, we can't use the last block. */
+ for (i = 0; i < (aligned ? ABLOCKS_SIZE : ABLOCKS_SIZE - 1); i++)
+ {
+ abase->blocks[i].abase = abase;
+ abase->blocks[i].x.next_free = free_ablock;
+ free_ablock = &abase->blocks[i];
+ }
+ ABLOCKS_BUSY (abase) = (struct ablocks *) aligned;
+
+ eassert (ABLOCK_ABASE (&abase->blocks[3]) == abase);
+ eassert (ABLOCK_ABASE (&abase->blocks[0]) == abase);
+ eassert (ABLOCKS_BASE (abase) == base);
+ eassert (aligned == (int)ABLOCKS_BUSY (abase));
+ }
+
+ abase = ABLOCK_ABASE (free_ablock);
+ ABLOCKS_BUSY (abase) = (struct ablocks *) (2 + (int) ABLOCKS_BUSY (abase));
+ val = free_ablock;
+ free_ablock = free_ablock->x.next_free;
+
+ /* If the memory just allocated cannot be addressed thru a Lisp
+ object's pointer, and it needs to be,
+ that's equivalent to running out of memory. */
+ if (val && type != MEM_TYPE_NON_LISP)
+ {
+ Lisp_Object tem;
+ XSETCONS (tem, (char *) val + nbytes - 1);
+ if ((char *) XCONS (tem) != (char *) val + nbytes - 1)
+ {
+ lisp_malloc_loser = val;
+ free (val);
+ val = 0;
+ }
+ }
+
+#if GC_MARK_STACK && !defined GC_MALLOC_CHECK
+ if (val && type != MEM_TYPE_NON_LISP)
+ mem_insert (val, (char *) val + nbytes, type);
+#endif
+
+ UNBLOCK_INPUT;
+ if (!val && nbytes)
+ memory_full ();
+
+ eassert (0 == ((EMACS_UINT)val) % BLOCK_ALIGN);
+ return val;
+}
+
+static void
+lisp_align_free (block)
+ POINTER_TYPE *block;
+{
+ struct ablock *ablock = block;
+ struct ablocks *abase = ABLOCK_ABASE (ablock);
+
+#if GC_MARK_STACK && !defined GC_MALLOC_CHECK
+ mem_delete (mem_find (block));
+#endif
+ /* Put on free list. */
+ ablock->x.next_free = free_ablock;
+ free_ablock = ablock;
+ /* Update busy count. */
+ ABLOCKS_BUSY (abase) = (struct ablocks *) (-2 + (int) ABLOCKS_BUSY (abase));
+
+ if (2 > (int) ABLOCKS_BUSY (abase))
+ { /* All the blocks are free. */
+ int i = 0, aligned = (int) ABLOCKS_BUSY (abase);
+ struct ablock **tem = &free_ablock;
+ struct ablock *atop = &abase->blocks[aligned ? ABLOCKS_SIZE :
ABLOCKS_SIZE - 1];
+
+ while (*tem)
+ {
+ if (*tem >= (struct ablock *) abase && *tem < atop)
+ {
+ i++;
+ *tem = (*tem)->x.next_free;
+ }
+ else
+ tem = &(*tem)->x.next_free;
+ }
+ eassert ((aligned & 1) == aligned);
+ eassert (i == (aligned ? ABLOCKS_SIZE : ABLOCKS_SIZE - 1));
+ BLOCK_INPUT;
+ free (ABLOCKS_BASE (abase));
+ UNBLOCK_INPUT;
+ }
+}
/* Return a new buffer structure allocated from the heap with
a call to lisp_malloc. */
@@ -1899,21 +2110,48 @@
/* We store float cells inside of float_blocks, allocating a new
float_block with malloc whenever necessary. Float cells reclaimed
by GC are put on a free list to be reallocated before allocating
- any new float cells from the latest float_block.
-
- Each float_block is just under 1020 bytes long, since malloc really
- allocates in units of powers of two and uses 4 bytes for its own
- overhead. */
+ any new float cells from the latest float_block. */
#define FLOAT_BLOCK_SIZE \
- ((1020 - sizeof (struct float_block *)) / sizeof (struct Lisp_Float))
+ (((BLOCK_BYTES - sizeof (struct float_block *)) * CHAR_BIT) \
+ / (sizeof (struct Lisp_Float) * CHAR_BIT + 1))
+
+#define GETMARKBIT(block,n) \
+ (((block)->markbits[(n) / (sizeof(int) * CHAR_BIT)] \
+ >> ((n) % (sizeof(int) * CHAR_BIT))) \
+ & 1)
+
+#define SETMARKBIT(block,n) \
+ (block)->markbits[(n) / (sizeof(int) * CHAR_BIT)] \
+ |= 1 << ((n) % (sizeof(int) * CHAR_BIT))
+
+#define UNSETMARKBIT(block,n) \
+ (block)->markbits[(n) / (sizeof(int) * CHAR_BIT)] \
+ &= ~(1 << ((n) % (sizeof(int) * CHAR_BIT)))
+
+#define FLOAT_BLOCK(fptr) \
+ ((struct float_block *)(((EMACS_UINT)(fptr)) & ~(BLOCK_ALIGN - 1)))
+
+#define FLOAT_INDEX(fptr) \
+ ((((EMACS_UINT)(fptr)) & (BLOCK_ALIGN - 1)) / sizeof (struct Lisp_Float))
struct float_block
{
- struct float_block *next;
+ /* Place `floats' at the beginning, to ease up FLOAT_INDEX's job. */
struct Lisp_Float floats[FLOAT_BLOCK_SIZE];
+ int markbits[1 + FLOAT_BLOCK_SIZE / (sizeof(int) * CHAR_BIT)];
+ struct float_block *next;
};
+#define FLOAT_MARKED_P(fptr) \
+ GETMARKBIT (FLOAT_BLOCK (fptr), FLOAT_INDEX ((fptr)))
+
+#define FLOAT_MARK(fptr) \
+ SETMARKBIT (FLOAT_BLOCK (fptr), FLOAT_INDEX ((fptr)))
+
+#define FLOAT_UNMARK(fptr) \
+ UNSETMARKBIT (FLOAT_BLOCK (fptr), FLOAT_INDEX ((fptr)))
+
/* Current float_block. */
struct float_block *float_block;
@@ -1936,10 +2174,11 @@
void
init_float ()
{
- float_block = (struct float_block *) lisp_malloc (sizeof *float_block,
+ float_block = (struct float_block *) lisp_align_malloc (sizeof *float_block,
MEM_TYPE_FLOAT);
float_block->next = 0;
bzero ((char *) float_block->floats, sizeof float_block->floats);
+ bzero ((char *) float_block->markbits, sizeof float_block->markbits);
float_block_index = 0;
float_free_list = 0;
n_float_blocks = 1;
@@ -1953,9 +2192,6 @@
struct Lisp_Float *ptr;
{
*(struct Lisp_Float **)&ptr->data = float_free_list;
-#if GC_MARK_STACK
- ptr->type = Vdead;
-#endif
float_free_list = ptr;
}
@@ -1981,7 +2218,7 @@
{
register struct float_block *new;
- new = (struct float_block *) lisp_malloc (sizeof *new,
+ new = (struct float_block *) lisp_align_malloc (sizeof *new,
MEM_TYPE_FLOAT);
new->next = float_block;
float_block = new;
@@ -1992,7 +2229,7 @@
}
XFLOAT_DATA (val) = float_value;
- XSETFASTINT (XFLOAT (val)->type, 0); /* bug chasing -wsr */
+ FLOAT_UNMARK (XFLOAT (val));
consing_since_gc += sizeof (struct Lisp_Float);
floats_consed++;
return val;
@@ -3240,14 +3495,12 @@
struct float_block *b = (struct float_block *) m->start;
int offset = (char *) p - (char *) &b->floats[0];
- /* P must point to the start of a Lisp_Float, not be
- one of the unused cells in the current float block,
- and not be on the free-list. */
+ /* P must point to the start of a Lisp_Float and not be
+ one of the unused cells in the current float block. */
return (offset >= 0
&& offset % sizeof b->floats[0] == 0
&& (b != float_block
- || offset / sizeof b->floats[0] < float_block_index)
- && !EQ (((struct Lisp_Float *) p)->type, Vdead));
+ || offset / sizeof b->floats[0] < float_block_index));
}
else
return 0;
@@ -3394,8 +3646,7 @@
break;
case Lisp_Float:
- mark_p = (live_float_p (m, po)
- && !XMARKBIT (XFLOAT (obj)->type));
+ mark_p = (live_float_p (m, po) && !FLOAT_MARKED_P (XFLOAT (obj)));
break;
case Lisp_Vectorlike:
@@ -3483,8 +3734,7 @@
break;
case MEM_TYPE_FLOAT:
- if (live_float_p (m, p)
- && !XMARKBIT (((struct Lisp_Float *) p)->type))
+ if (live_float_p (m, p) && !FLOAT_MARKED_P (p))
XSETFLOAT (obj, p);
break;
@@ -3812,7 +4062,7 @@
}
again:
- result = (POINTER_TYPE *) ALIGN ((EMACS_UINT)purebeg + pure_bytes_used,
alignment);
+ result = ALIGN (purebeg + pure_bytes_used, alignment);
pure_bytes_used = ((char *)result - (char *)purebeg) + size;
if (pure_bytes_used <= pure_size)
@@ -4825,7 +5032,7 @@
case Lisp_Float:
CHECK_ALLOCATED_AND_LIVE (live_float_p);
- XMARK (XFLOAT (obj)->type);
+ FLOAT_MARK (XFLOAT (obj));
break;
case Lisp_Int:
@@ -4935,7 +5144,7 @@
break;
case Lisp_Float:
- survives_p = XMARKBIT (XFLOAT (obj)->type);
+ survives_p = FLOAT_MARKED_P (XFLOAT (obj));
break;
default:
@@ -5039,19 +5243,16 @@
register int i;
int this_free = 0;
for (i = 0; i < lim; i++)
- if (!XMARKBIT (fblk->floats[i].type))
+ if (!FLOAT_MARKED_P (&fblk->floats[i]))
{
this_free++;
*(struct Lisp_Float **)&fblk->floats[i].data = float_free_list;
float_free_list = &fblk->floats[i];
-#if GC_MARK_STACK
- float_free_list->type = Vdead;
-#endif
}
else
{
num_used++;
- XUNMARK (fblk->floats[i].type);
+ FLOAT_UNMARK (&fblk->floats[i]);
}
lim = FLOAT_BLOCK_SIZE;
/* If this block contains only free floats and we have already
@@ -5062,7 +5263,7 @@
*fprev = fblk->next;
/* Unhook from the free list. */
float_free_list = *(struct Lisp_Float **) &fblk->floats[0].data;
- lisp_free (fblk);
+ lisp_align_free (fblk);
n_float_blocks--;
}
else
@@ -5372,6 +5573,8 @@
pure_size = PURESIZE;
pure_bytes_used = 0;
pure_bytes_used_before_overflow = 0;
+
+ free_ablock = NULL;
#if GC_MARK_STACK || defined GC_MALLOC_CHECK
mem_init ();