musl/src/malloc/malloc.c

551 lines
12 KiB
C
Raw Normal View History

2011-04-20 15:55:58 -04:00
#define _GNU_SOURCE
2011-02-12 00:22:29 -05:00
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include <stdint.h>
#include <errno.h>
#include <sys/mman.h>
#include "libc.h"
#include "atomic.h"
#include "pthread_impl.h"
#if defined(__GNUC__) && defined(__PIC__)
#define inline inline __attribute__((always_inline))
#endif
2011-02-12 00:22:29 -05:00
void *__mmap(void *, size_t, int, int, int, off_t);
int __munmap(void *, size_t);
void *__mremap(void *, size_t, size_t, int, ...);
int __madvise(void *, size_t, int);
struct chunk {
2011-06-26 16:12:43 -04:00
size_t psize, csize;
struct chunk *next, *prev;
2011-02-12 00:22:29 -05:00
};
struct bin {
make all objects used with atomic operations volatile the memory model we use internally for atomics permits plain loads of values which may be subject to concurrent modification without requiring that a special load function be used. since a compiler is free to make transformations that alter the number of loads or the way in which loads are performed, the compiler is theoretically free to break this usage. the most obvious concern is with atomic cas constructs: something of the form tmp=*p;a_cas(p,tmp,f(tmp)); could be transformed to a_cas(p,*p,f(*p)); where the latter is intended to show multiple loads of *p whose resulting values might fail to be equal; this would break the atomicity of the whole operation. but even more fundamental breakage is possible. with the changes being made now, objects that may be modified by atomics are modeled as volatile, and the atomic operations performed on them by other threads are modeled as asynchronous stores by hardware which happens to be acting on the request of another thread. such modeling of course does not itself address memory synchronization between cores/cpus, but that aspect was already handled. this all seems less than ideal, but it's the best we can do without mandating a C11 compiler and using the C11 model for atomics. in the case of pthread_once_t, the ABI type of the underlying object is not volatile-qualified. so we are assuming that accessing the object through a volatile-qualified lvalue via casts yields volatile access semantics. the language of the C standard is somewhat unclear on this matter, but this is an assumption the linux kernel also makes, and seems to be the correct interpretation of the standard.
2015-03-03 22:50:02 -05:00
volatile int lock[2];
2011-02-12 00:22:29 -05:00
struct chunk *head;
struct chunk *tail;
};
static struct {
make all objects used with atomic operations volatile the memory model we use internally for atomics permits plain loads of values which may be subject to concurrent modification without requiring that a special load function be used. since a compiler is free to make transformations that alter the number of loads or the way in which loads are performed, the compiler is theoretically free to break this usage. the most obvious concern is with atomic cas constructs: something of the form tmp=*p;a_cas(p,tmp,f(tmp)); could be transformed to a_cas(p,*p,f(*p)); where the latter is intended to show multiple loads of *p whose resulting values might fail to be equal; this would break the atomicity of the whole operation. but even more fundamental breakage is possible. with the changes being made now, objects that may be modified by atomics are modeled as volatile, and the atomic operations performed on them by other threads are modeled as asynchronous stores by hardware which happens to be acting on the request of another thread. such modeling of course does not itself address memory synchronization between cores/cpus, but that aspect was already handled. this all seems less than ideal, but it's the best we can do without mandating a C11 compiler and using the C11 model for atomics. in the case of pthread_once_t, the ABI type of the underlying object is not volatile-qualified. so we are assuming that accessing the object through a volatile-qualified lvalue via casts yields volatile access semantics. the language of the C standard is somewhat unclear on this matter, but this is an assumption the linux kernel also makes, and seems to be the correct interpretation of the standard.
2015-03-03 22:50:02 -05:00
volatile uint64_t binmap;
2011-02-12 00:22:29 -05:00
struct bin bins[64];
make all objects used with atomic operations volatile the memory model we use internally for atomics permits plain loads of values which may be subject to concurrent modification without requiring that a special load function be used. since a compiler is free to make transformations that alter the number of loads or the way in which loads are performed, the compiler is theoretically free to break this usage. the most obvious concern is with atomic cas constructs: something of the form tmp=*p;a_cas(p,tmp,f(tmp)); could be transformed to a_cas(p,*p,f(*p)); where the latter is intended to show multiple loads of *p whose resulting values might fail to be equal; this would break the atomicity of the whole operation. but even more fundamental breakage is possible. with the changes being made now, objects that may be modified by atomics are modeled as volatile, and the atomic operations performed on them by other threads are modeled as asynchronous stores by hardware which happens to be acting on the request of another thread. such modeling of course does not itself address memory synchronization between cores/cpus, but that aspect was already handled. this all seems less than ideal, but it's the best we can do without mandating a C11 compiler and using the C11 model for atomics. in the case of pthread_once_t, the ABI type of the underlying object is not volatile-qualified. so we are assuming that accessing the object through a volatile-qualified lvalue via casts yields volatile access semantics. the language of the C standard is somewhat unclear on this matter, but this is an assumption the linux kernel also makes, and seems to be the correct interpretation of the standard.
2015-03-03 22:50:02 -05:00
volatile int free_lock[2];
2011-02-12 00:22:29 -05:00
} mal;
#define SIZE_ALIGN (4*sizeof(size_t))
#define SIZE_MASK (-SIZE_ALIGN)
#define OVERHEAD (2*sizeof(size_t))
#define MMAP_THRESHOLD (0x1c00*SIZE_ALIGN)
#define DONTCARE 16
#define RECLAIM 163840
#define CHUNK_SIZE(c) ((c)->csize & -2)
#define CHUNK_PSIZE(c) ((c)->psize & -2)
2011-02-12 00:22:29 -05:00
#define PREV_CHUNK(c) ((struct chunk *)((char *)(c) - CHUNK_PSIZE(c)))
#define NEXT_CHUNK(c) ((struct chunk *)((char *)(c) + CHUNK_SIZE(c)))
2011-06-26 16:12:43 -04:00
#define MEM_TO_CHUNK(p) (struct chunk *)((char *)(p) - OVERHEAD)
#define CHUNK_TO_MEM(c) (void *)((char *)(c) + OVERHEAD)
2011-02-12 00:22:29 -05:00
#define BIN_TO_CHUNK(i) (MEM_TO_CHUNK(&mal.bins[i].head))
#define C_INUSE ((size_t)1)
2011-06-26 16:12:43 -04:00
#define IS_MMAPPED(c) !((c)->csize & (C_INUSE))
2011-02-12 00:22:29 -05:00
/* Synchronization tools */
static inline void lock(volatile int *lk)
2011-02-12 00:22:29 -05:00
{
if (libc.threads_minus_1)
while(a_swap(lk, 1)) __wait(lk, lk+1, 1, 1);
2011-02-12 00:22:29 -05:00
}
static inline void unlock(volatile int *lk)
2011-02-12 00:22:29 -05:00
{
if (lk[0]) {
a_store(lk, 0);
if (lk[1]) __wake(lk, 1, 1);
}
2011-02-12 00:22:29 -05:00
}
static inline void lock_bin(int i)
2011-02-12 00:22:29 -05:00
{
lock(mal.bins[i].lock);
2011-02-12 00:22:29 -05:00
if (!mal.bins[i].head)
mal.bins[i].head = mal.bins[i].tail = BIN_TO_CHUNK(i);
}
static inline void unlock_bin(int i)
2011-02-12 00:22:29 -05:00
{
unlock(mal.bins[i].lock);
}
static int first_set(uint64_t x)
{
#if 1
return a_ctz_64(x);
#else
static const char debruijn64[64] = {
0, 1, 2, 53, 3, 7, 54, 27, 4, 38, 41, 8, 34, 55, 48, 28,
62, 5, 39, 46, 44, 42, 22, 9, 24, 35, 59, 56, 49, 18, 29, 11,
63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10,
51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12
};
static const char debruijn32[32] = {
0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
};
if (sizeof(long) < 8) {
uint32_t y = x;
if (!y) {
y = x>>32;
return 32 + debruijn32[(y&-y)*0x076be629 >> 27];
}
return debruijn32[(y&-y)*0x076be629 >> 27];
}
return debruijn64[(x&-x)*0x022fdd63cc95386dull >> 58];
#endif
}
static const unsigned char bin_tab[60] = {
32,33,34,35,36,36,37,37,38,38,39,39,
40,40,40,40,41,41,41,41,42,42,42,42,43,43,43,43,
44,44,44,44,44,44,44,44,45,45,45,45,45,45,45,45,
46,46,46,46,46,46,46,46,47,47,47,47,47,47,47,47,
};
2011-02-12 00:22:29 -05:00
static int bin_index(size_t x)
{
x = x / SIZE_ALIGN - 1;
if (x <= 32) return x;
if (x < 512) return bin_tab[x/8-4];
2011-02-12 00:22:29 -05:00
if (x > 0x1c00) return 63;
return bin_tab[x/128-4] + 16;
2011-02-12 00:22:29 -05:00
}
static int bin_index_up(size_t x)
{
x = x / SIZE_ALIGN - 1;
if (x <= 32) return x;
x--;
if (x < 512) return bin_tab[x/8-4] + 1;
return bin_tab[x/128-4] + 17;
2011-02-12 00:22:29 -05:00
}
#if 0
void __dump_heap(int x)
{
struct chunk *c;
int i;
for (c = (void *)mal.heap; CHUNK_SIZE(c); c = NEXT_CHUNK(c))
fprintf(stderr, "base %p size %zu (%d) flags %d/%d\n",
c, CHUNK_SIZE(c), bin_index(CHUNK_SIZE(c)),
2011-06-26 16:12:43 -04:00
c->csize & 15,
NEXT_CHUNK(c)->psize & 15);
2011-02-12 00:22:29 -05:00
for (i=0; i<64; i++) {
if (mal.bins[i].head != BIN_TO_CHUNK(i) && mal.bins[i].head) {
fprintf(stderr, "bin %d: %p\n", i, mal.bins[i].head);
if (!(mal.binmap & 1ULL<<i))
fprintf(stderr, "missing from binmap!\n");
} else if (mal.binmap & 1ULL<<i)
fprintf(stderr, "binmap wrongly contains %d!\n", i);
}
}
#endif
void *__expand_heap(size_t *);
2011-02-12 00:22:29 -05:00
static struct chunk *expand_heap(size_t n)
{
static int heap_lock[2];
static void *end;
void *p;
2011-02-12 00:22:29 -05:00
struct chunk *w;
/* The argument n already accounts for the caller's chunk
* overhead needs, but if the heap can't be extended in-place,
* we need room for an extra zero-sized sentinel chunk. */
n += SIZE_ALIGN;
2011-02-12 00:22:29 -05:00
lock(heap_lock);
p = __expand_heap(&n);
if (!p) {
unlock(heap_lock);
return 0;
}
/* If not just expanding existing space, we need to make a
* new sentinel chunk below the allocated space. */
if (p != end) {
/* Valid/safe because of the prologue increment. */
n -= SIZE_ALIGN;
p = (char *)p + SIZE_ALIGN;
w = MEM_TO_CHUNK(p);
w->psize = 0 | C_INUSE;
}
2011-02-12 00:22:29 -05:00
/* Record new heap end and fill in footer. */
end = (char *)p + n;
w = MEM_TO_CHUNK(end);
2011-06-26 16:12:43 -04:00
w->psize = n | C_INUSE;
w->csize = 0 | C_INUSE;
2011-02-12 00:22:29 -05:00
/* Fill in header, which may be new or may be replacing a
* zero-size sentinel header at the old end-of-heap. */
w = MEM_TO_CHUNK(p);
2011-06-26 16:12:43 -04:00
w->csize = n | C_INUSE;
unlock(heap_lock);
2011-02-12 00:22:29 -05:00
return w;
}
static int adjust_size(size_t *n)
{
/* Result of pointer difference must fit in ptrdiff_t. */
if (*n-1 > PTRDIFF_MAX - SIZE_ALIGN - PAGE_SIZE) {
if (*n) {
errno = ENOMEM;
return -1;
} else {
*n = SIZE_ALIGN;
return 0;
}
2011-02-12 00:22:29 -05:00
}
*n = (*n + OVERHEAD + SIZE_ALIGN - 1) & SIZE_MASK;
return 0;
}
static void unbin(struct chunk *c, int i)
{
if (c->prev == c->next)
a_and_64(&mal.binmap, ~(1ULL<<i));
c->prev->next = c->next;
c->next->prev = c->prev;
2011-06-26 16:12:43 -04:00
c->csize |= C_INUSE;
NEXT_CHUNK(c)->psize |= C_INUSE;
2011-02-12 00:22:29 -05:00
}
static int alloc_fwd(struct chunk *c)
{
int i;
size_t k;
2011-06-26 16:12:43 -04:00
while (!((k=c->csize) & C_INUSE)) {
2011-02-12 00:22:29 -05:00
i = bin_index(k);
lock_bin(i);
2011-06-26 16:12:43 -04:00
if (c->csize == k) {
2011-02-12 00:22:29 -05:00
unbin(c, i);
unlock_bin(i);
return 1;
}
unlock_bin(i);
}
return 0;
}
static int alloc_rev(struct chunk *c)
{
int i;
size_t k;
2011-06-26 16:12:43 -04:00
while (!((k=c->psize) & C_INUSE)) {
2011-02-12 00:22:29 -05:00
i = bin_index(k);
lock_bin(i);
2011-06-26 16:12:43 -04:00
if (c->psize == k) {
2011-02-12 00:22:29 -05:00
unbin(PREV_CHUNK(c), i);
unlock_bin(i);
return 1;
}
unlock_bin(i);
}
return 0;
}
/* pretrim - trims a chunk _prior_ to removing it from its bin.
* Must be called with i as the ideal bin for size n, j the bin
* for the _free_ chunk self, and bin j locked. */
static int pretrim(struct chunk *self, size_t n, int i, int j)
{
size_t n1;
struct chunk *next, *split;
/* We cannot pretrim if it would require re-binning. */
if (j < 40) return 0;
if (j < i+3) {
if (j != 63) return 0;
n1 = CHUNK_SIZE(self);
if (n1-n <= MMAP_THRESHOLD) return 0;
} else {
n1 = CHUNK_SIZE(self);
}
if (bin_index(n1-n) != j) return 0;
next = NEXT_CHUNK(self);
split = (void *)((char *)self + n);
split->prev = self->prev;
split->next = self->next;
split->prev->next = split;
split->next->prev = split;
2011-06-26 16:12:43 -04:00
split->psize = n | C_INUSE;
split->csize = n1-n;
next->psize = n1-n;
self->csize = n | C_INUSE;
2011-02-12 00:22:29 -05:00
return 1;
}
static void trim(struct chunk *self, size_t n)
{
size_t n1 = CHUNK_SIZE(self);
struct chunk *next, *split;
if (n >= n1 - DONTCARE) return;
next = NEXT_CHUNK(self);
split = (void *)((char *)self + n);
2011-06-26 16:12:43 -04:00
split->psize = n | C_INUSE;
split->csize = n1-n | C_INUSE;
next->psize = n1-n | C_INUSE;
self->csize = n | C_INUSE;
2011-02-12 00:22:29 -05:00
free(CHUNK_TO_MEM(split));
}
void *malloc(size_t n)
{
struct chunk *c;
int i, j;
if (adjust_size(&n) < 0) return 0;
2011-02-12 00:22:29 -05:00
if (n > MMAP_THRESHOLD) {
size_t len = n + OVERHEAD + PAGE_SIZE - 1 & -PAGE_SIZE;
2011-02-12 00:22:29 -05:00
char *base = __mmap(0, len, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
if (base == (void *)-1) return 0;
2011-06-26 16:12:43 -04:00
c = (void *)(base + SIZE_ALIGN - OVERHEAD);
c->csize = len - (SIZE_ALIGN - OVERHEAD);
c->psize = SIZE_ALIGN - OVERHEAD;
2011-02-12 00:22:29 -05:00
return CHUNK_TO_MEM(c);
}
i = bin_index_up(n);
for (;;) {
uint64_t mask = mal.binmap & -(1ULL<<i);
if (!mask) {
c = expand_heap(n);
if (!c) return 0;
if (alloc_rev(c)) {
struct chunk *x = c;
c = PREV_CHUNK(c);
2011-06-26 16:12:43 -04:00
NEXT_CHUNK(x)->psize = c->csize =
x->csize + CHUNK_SIZE(c);
2011-02-12 00:22:29 -05:00
}
break;
}
j = first_set(mask);
lock_bin(j);
c = mal.bins[j].head;
if (c != BIN_TO_CHUNK(j)) {
2011-02-12 00:22:29 -05:00
if (!pretrim(c, n, i, j)) unbin(c, j);
unlock_bin(j);
break;
}
unlock_bin(j);
}
/* Now patch up in case we over-allocated */
trim(c, n);
return CHUNK_TO_MEM(c);
}
static size_t mal0_clear(char *p, size_t pagesz, size_t n)
{
#ifdef __GNUC__
typedef uint64_t __attribute__((__may_alias__)) T;
#else
typedef unsigned char T;
#endif
char *pp = p + n;
size_t i = (uintptr_t)pp & (pagesz - 1);
for (;;) {
pp = memset(pp - i, 0, i);
if (pp - p < pagesz) return pp - p;
for (i = pagesz; i; i -= 2*sizeof(T), pp -= 2*sizeof(T))
if (((T *)pp)[-1] | ((T *)pp)[-2])
break;
}
}
void *__malloc0(size_t n)
{
void *p = malloc(n);
if (!p || IS_MMAPPED(MEM_TO_CHUNK(p)))
return p;
if (n >= PAGE_SIZE)
n = mal0_clear(p, PAGE_SIZE, n);
return memset(p, 0, n);
}
2011-02-12 00:22:29 -05:00
void *realloc(void *p, size_t n)
{
struct chunk *self, *next;
size_t n0, n1;
void *new;
if (!p) return malloc(n);
if (adjust_size(&n) < 0) return 0;
self = MEM_TO_CHUNK(p);
n1 = n0 = CHUNK_SIZE(self);
if (IS_MMAPPED(self)) {
2011-06-26 16:12:43 -04:00
size_t extra = self->psize;
2011-02-12 00:22:29 -05:00
char *base = (char *)self - extra;
size_t oldlen = n0 + extra;
size_t newlen = n + extra;
/* Crash on realloc of freed chunk */
if (extra & 1) a_crash();
2011-02-12 00:22:29 -05:00
if (newlen < PAGE_SIZE && (new = malloc(n))) {
memcpy(new, p, n-OVERHEAD);
free(p);
return new;
}
newlen = (newlen + PAGE_SIZE-1) & -PAGE_SIZE;
if (oldlen == newlen) return p;
base = __mremap(base, oldlen, newlen, MREMAP_MAYMOVE);
if (base == (void *)-1)
goto copy_realloc;
2011-02-12 00:22:29 -05:00
self = (void *)(base + extra);
2011-06-26 16:12:43 -04:00
self->csize = newlen - extra;
2011-02-12 00:22:29 -05:00
return CHUNK_TO_MEM(self);
}
next = NEXT_CHUNK(self);
/* Crash on corrupted footer (likely from buffer overflow) */
if (next->psize != self->csize) a_crash();
2011-02-12 00:22:29 -05:00
/* Merge adjacent chunks if we need more space. This is not
* a waste of time even if we fail to get enough space, because our
* subsequent call to free would otherwise have to do the merge. */
if (n > n1 && alloc_fwd(next)) {
n1 += CHUNK_SIZE(next);
next = NEXT_CHUNK(next);
}
/* FIXME: find what's wrong here and reenable it..? */
if (0 && n > n1 && alloc_rev(self)) {
self = PREV_CHUNK(self);
n1 += CHUNK_SIZE(self);
}
2011-06-26 16:12:43 -04:00
self->csize = n1 | C_INUSE;
next->psize = n1 | C_INUSE;
2011-02-12 00:22:29 -05:00
/* If we got enough space, split off the excess and return */
if (n <= n1) {
//memmove(CHUNK_TO_MEM(self), p, n0-OVERHEAD);
trim(self, n);
return CHUNK_TO_MEM(self);
}
copy_realloc:
2011-02-12 00:22:29 -05:00
/* As a last resort, allocate a new chunk and copy to it. */
new = malloc(n-OVERHEAD);
if (!new) return 0;
memcpy(new, p, n0-OVERHEAD);
free(CHUNK_TO_MEM(self));
return new;
}
void free(void *p)
{
2017-06-27 20:58:47 +03:00
struct chunk *self, *next;
2011-02-12 00:22:29 -05:00
size_t final_size, new_size, size;
int reclaim=0;
int i;
if (!p) return;
2017-06-27 20:58:47 +03:00
self = MEM_TO_CHUNK(p);
2011-02-12 00:22:29 -05:00
if (IS_MMAPPED(self)) {
2011-06-26 16:12:43 -04:00
size_t extra = self->psize;
2011-02-12 00:22:29 -05:00
char *base = (char *)self - extra;
size_t len = CHUNK_SIZE(self) + extra;
/* Crash on double free */
if (extra & 1) a_crash();
2011-02-12 00:22:29 -05:00
__munmap(base, len);
return;
}
final_size = new_size = CHUNK_SIZE(self);
next = NEXT_CHUNK(self);
/* Crash on corrupted footer (likely from buffer overflow) */
if (next->psize != self->csize) a_crash();
2011-02-12 00:22:29 -05:00
for (;;) {
2011-06-26 16:12:43 -04:00
if (self->psize & next->csize & C_INUSE) {
self->csize = final_size | C_INUSE;
next->psize = final_size | C_INUSE;
2011-02-12 00:22:29 -05:00
i = bin_index(final_size);
lock_bin(i);
lock(mal.free_lock);
2011-06-26 16:12:43 -04:00
if (self->psize & next->csize & C_INUSE)
2011-02-12 00:22:29 -05:00
break;
unlock(mal.free_lock);
unlock_bin(i);
}
if (alloc_rev(self)) {
self = PREV_CHUNK(self);
size = CHUNK_SIZE(self);
final_size += size;
if (new_size+size > RECLAIM && (new_size+size^size) > size)
reclaim = 1;
}
if (alloc_fwd(next)) {
size = CHUNK_SIZE(next);
final_size += size;
if (new_size+size > RECLAIM && (new_size+size^size) > size)
reclaim = 1;
next = NEXT_CHUNK(next);
}
}
if (!(mal.binmap & 1ULL<<i))
a_or_64(&mal.binmap, 1ULL<<i);
2011-06-26 16:12:43 -04:00
self->csize = final_size;
next->psize = final_size;
2011-02-12 00:22:29 -05:00
unlock(mal.free_lock);
self->next = BIN_TO_CHUNK(i);
self->prev = mal.bins[i].tail;
self->next->prev = self;
self->prev->next = self;
/* Replace middle of large chunks with fresh zero pages */
if (reclaim) {
uintptr_t a = (uintptr_t)self + SIZE_ALIGN+PAGE_SIZE-1 & -PAGE_SIZE;
uintptr_t b = (uintptr_t)next - SIZE_ALIGN & -PAGE_SIZE;
#if 1
__madvise((void *)a, b-a, MADV_DONTNEED);
#else
__mmap((void *)a, b-a, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0);
#endif
}
2011-02-12 00:22:29 -05:00
unlock_bin(i);
}