From 05755f5c40397f183146aa12072aea7a7e2fdb04 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 15 Feb 2012 15:20:05 +0100 Subject: [PATCH 01/10] Initial version of c->reply_bytes implementation backported from unstable to 2.4, in order to apply issue 327 patches. --- src/aof.c | 1 + src/networking.c | 26 ++++++++++++++++++++++++++ src/redis.h | 1 + 3 files changed, 28 insertions(+) diff --git a/src/aof.c b/src/aof.c index a8039fb1..f4c17660 100644 --- a/src/aof.c +++ b/src/aof.c @@ -261,6 +261,7 @@ struct redisClient *createFakeClient(void) { * so that Redis will not try to send replies to this client. */ c->replstate = REDIS_REPL_WAIT_BGSAVE_START; c->reply = listCreate(); + c->reply_bytes = 0; c->watched_keys = listCreate(); listSetFreeMethod(c->reply,decrRefCount); listSetDupMethod(c->reply,dupClientReplyValue); diff --git a/src/networking.c b/src/networking.c index b48927df..941d7882 100644 --- a/src/networking.c +++ b/src/networking.c @@ -41,6 +41,7 @@ redisClient *createClient(int fd) { c->authenticated = 0; c->replstate = REDIS_REPL_NONE; c->reply = listCreate(); + c->reply_bytes = 0; listSetFreeMethod(c->reply,decrRefCount); listSetDupMethod(c->reply,dupClientReplyValue); c->bpop.keys = NULL; @@ -130,6 +131,7 @@ void _addReplyObjectToList(redisClient *c, robj *o) { listAddNodeTail(c->reply,o); } } + c->reply_bytes += sdslen(o->ptr); } /* This method takes responsibility over the sds. When it is no longer @@ -142,6 +144,7 @@ void _addReplySdsToList(redisClient *c, sds s) { return; } + c->reply_bytes += sdslen(s); if (listLength(c->reply) == 0) { listAddNodeTail(c->reply,createObject(REDIS_STRING,s)); } else { @@ -180,6 +183,7 @@ void _addReplyStringToList(redisClient *c, char *s, size_t len) { listAddNodeTail(c->reply,createStringObject(s,len)); } } + c->reply_bytes += len; } /* ----------------------------------------------------------------------------- @@ -291,6 +295,7 @@ void setDeferredMultiBulkLength(redisClient *c, void *node, long length) { len = listNodeValue(ln); len->ptr = sdscatprintf(sdsempty(),"*%ld\r\n",length); + c->reply_bytes += sdslen(len->ptr); if (ln->next != NULL) { next = listNodeValue(ln->next); @@ -398,6 +403,7 @@ void copyClientOutputBuffer(redisClient *dst, redisClient *src) { dst->reply = listDup(src->reply); memcpy(dst->buf,src->buf,src->bufpos); dst->bufpos = src->bufpos; + dst->reply_bytes = src->reply_bytes; } static void acceptCommonHandler(int fd) { @@ -611,6 +617,7 @@ void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) { if (c->sentlen == objlen) { listDelNode(c->reply,listFirst(c->reply)); c->sentlen = 0; + c->reply_bytes -= objlen; } } /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT @@ -1060,3 +1067,22 @@ void rewriteClientCommandVector(redisClient *c, int argc, ...) { redisAssert(c->cmd != NULL); va_end(ap); } + +/* This function returns the number of bytes that Redis is virtually + * using to store the reply still not read by the client. + * It is "virtual" since the reply output list may contain objects that + * are shared and are not really using additional memory. + * + * The function returns the total sum of the length of all the objects + * stored in the output list, plus the memory used to allocate every + * list node. The static reply buffer is not taken into account since it + * is allocated anyway. + * + * Note: this function is very fast so can be called as many time as + * the caller wishes. The main usage of this function currently is + * enforcing the client output length limits. */ +unsigned long getClientOutputBufferMemoryUsage(redisClient *c) { + unsigned long list_item_size = sizeof(listNode); + + return c->reply_bytes + (list_item_size*listLength(c->reply)); +} diff --git a/src/redis.h b/src/redis.h index 0d089507..6b1b604d 100644 --- a/src/redis.h +++ b/src/redis.h @@ -339,6 +339,7 @@ typedef struct redisClient { int multibulklen; /* number of multi bulk arguments left to read */ long bulklen; /* length of bulk argument in multi bulk request */ list *reply; + unsigned long reply_bytes; /* Tot bytes of objects in reply list */ int sentlen; time_t lastinteraction; /* time of the last interaction, used for timeout */ int flags; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */ From c63e1b83c40ab6dfb444914b1a18ecce8c003448 Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 4 Feb 2012 14:05:54 +0100 Subject: [PATCH 02/10] This fixes issue #327, is a very complex fix (unfortunately), details: 1) sendReplyToClient() now no longer stops transferring data to a single client in the case we are out of memory (maxmemory-wise). 2) in processCommand() the idea of we being out of memory is no longer the naive zmalloc_used_memory() > server.maxmemory. To say if we can accept or not write queries is up to the return value of freeMemoryIfNeeded(), that has full control about that. 3) freeMemoryIfNeeded() now does its math without considering output buffers size. But at the same time it can't let the output buffers to put us too much outside the max memory limit, so at the same time it makes sure there is enough effort into delivering the output buffers to the slaves, calling the write handler directly. This three changes are the result of many tests, I found (partially empirically) that is the best way to address the problem, but maybe we'll find better solutions in the future. --- src/networking.c | 11 +++-- src/redis.c | 115 ++++++++++++++++++++++++++++++++++++++--------- src/redis.h | 2 +- 3 files changed, 103 insertions(+), 25 deletions(-) diff --git a/src/networking.c b/src/networking.c index 941d7882..156e1996 100644 --- a/src/networking.c +++ b/src/networking.c @@ -620,12 +620,17 @@ void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) { c->reply_bytes -= objlen; } } - /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT + /* Note that we avoid to send more than REDIS_MAX_WRITE_PER_EVENT * bytes, in a single threaded server it's a good idea to serve * other clients as well, even if a very large request comes from * super fast link that is always able to accept data (in real world - * scenario think about 'KEYS *' against the loopback interfae) */ - if (totwritten > REDIS_MAX_WRITE_PER_EVENT) break; + * scenario think about 'KEYS *' against the loopback interface). + * + * However if we are over the maxmemory limit we ignore that and + * just deliver as much data as it is possible to deliver. */ + if (totwritten > REDIS_MAX_WRITE_PER_EVENT && + (server.maxmemory == 0 || + zmalloc_used_memory() < server.maxmemory)) break; } if (nwritten == -1) { if (errno == EAGAIN) { diff --git a/src/redis.c b/src/redis.c index c8904e84..697d6c47 100644 --- a/src/redis.c +++ b/src/redis.c @@ -1096,12 +1096,13 @@ int processCommand(redisClient *c) { * First we try to free some memory if possible (if there are volatile * keys in the dataset). If there are not the only thing we can do * is returning an error. */ - if (server.maxmemory) freeMemoryIfNeeded(); - if (server.maxmemory && (c->cmd->flags & REDIS_CMD_DENYOOM) && - zmalloc_used_memory() > server.maxmemory) - { - addReplyError(c,"command not allowed when used memory > 'maxmemory'"); - return REDIS_OK; + if (server.maxmemory) { + int retval = freeMemoryIfNeeded(); + if ((c->cmd->flags & REDIS_CMD_DENYOOM) && retval == REDIS_ERR) { + addReplyError(c, + "command not allowed when used memory > 'maxmemory'"); + return REDIS_OK; + } } /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */ @@ -1528,23 +1529,54 @@ void monitorCommand(redisClient *c) { /* ============================ Maxmemory directive ======================== */ /* This function gets called when 'maxmemory' is set on the config file to limit - * the max memory used by the server, and we are out of memory. - * This function will try to, in order: + * the max memory used by the server, before processing a command. * - * - Free objects from the free list - * - Try to remove keys with an EXPIRE set + * The goal of the function is to free enough memory to keep Redis under the + * configured memory limit. * - * It is not possible to free enough memory to reach used-memory < maxmemory - * the server will start refusing commands that will enlarge even more the - * memory usage. + * The function starts calculating how many bytes should be freed to keep + * Redis under the limit, and enters a loop selecting the best keys to + * evict accordingly to the configured policy. + * + * If all the bytes needed to return back under the limit were freed the + * function returns REDIS_OK, otherwise REDIS_ERR is returned, and the caller + * should block the execution of commands that will result in more memory + * used by the server. */ -void freeMemoryIfNeeded(void) { - /* Remove keys accordingly to the active policy as long as we are - * over the memory limit. */ - if (server.maxmemory_policy == REDIS_MAXMEMORY_NO_EVICTION) return; +int freeMemoryIfNeeded(void) { + size_t mem_used, mem_tofree, mem_freed; + int slaves = listLength(server.slaves); - while (server.maxmemory && zmalloc_used_memory() > server.maxmemory) { - int j, k, freed = 0; + /* Remove the size of slaves output buffers from the count of used + * memory. */ + mem_used = zmalloc_used_memory(); + if (slaves) { + listIter li; + listNode *ln; + + listRewind(server.slaves,&li); + while((ln = listNext(&li))) { + redisClient *slave = listNodeValue(ln); + unsigned long obuf_bytes = getClientOutputBufferMemoryUsage(slave); + if (obuf_bytes > mem_used) + mem_used = 0; + else + mem_used -= obuf_bytes; + } + } + + /* Check if we are over the memory limit. */ + if (mem_used <= server.maxmemory) return REDIS_OK; + + if (server.maxmemory_policy == REDIS_MAXMEMORY_NO_EVICTION) + return REDIS_ERR; /* We need to free memory, but policy forbids. */ + + /* Compute how much memory we need to free. */ + mem_tofree = mem_used - server.maxmemory; + printf("USED: %zu, TOFREE: %zu\n", mem_used, mem_tofree); + mem_freed = 0; + while (mem_freed < mem_tofree) { + int j, k, keys_freed = 0; for (j = 0; j < server.dbnum; j++) { long bestval = 0; /* just to prevent warning */ @@ -1617,16 +1649,57 @@ void freeMemoryIfNeeded(void) { /* Finally remove the selected key. */ if (bestkey) { + long long delta; + robj *keyobj = createStringObject(bestkey,sdslen(bestkey)); propagateExpire(db,keyobj); + /* We compute the amount of memory freed by dbDelete() alone. + * It is possible that actually the memory needed to propagate + * the DEL in AOF and replication link is greater than the one + * we are freeing removing the key, but we can't account for + * that otherwise we would never exit the loop. + * + * AOF and Output buffer memory will be freed eventually so + * we only care about memory used by the key space. */ + delta = (long long) zmalloc_used_memory(); dbDelete(db,keyobj); + delta -= (long long) zmalloc_used_memory(); + // printf("%lld\n",delta); + mem_freed += delta; server.stat_evictedkeys++; decrRefCount(keyobj); - freed++; + keys_freed++; + + /* When the memory to free starts to be big enough, we may + * start spending so much time here that is impossible to + * deliver data to the slaves fast enough, so we force the + * transmission here inside the loop. */ + if (slaves) { + listIter li; + listNode *ln; + + listRewind(server.slaves,&li); + while((ln = listNext(&li))) { + redisClient *slave = listNodeValue(ln); + int events; + + events = aeGetFileEvents(server.el,slave->fd); + printf("EVENTS: %d\n", events); + if (events & AE_WRITABLE && + slave->replstate == REDIS_REPL_ONLINE && + listLength(slave->reply)) + { + printf("SLAVE %d -> %d\n", + slave->fd, (int) listLength(slave->reply)); + sendReplyToClient(server.el,slave->fd,slave,0); + } + } + } } } - if (!freed) return; /* nothing to free... */ + if (!keys_freed) return REDIS_ERR; /* nothing to free... */ } + return REDIS_OK; } /* =================================== Main! ================================ */ diff --git a/src/redis.h b/src/redis.h index 6b1b604d..e14a7908 100644 --- a/src/redis.h +++ b/src/redis.h @@ -839,7 +839,7 @@ unsigned int zsetLength(robj *zobj); void zsetConvert(robj *zobj, int encoding); /* Core functions */ -void freeMemoryIfNeeded(void); +int freeMemoryIfNeeded(void); int processCommand(redisClient *c); void setupSignalHandlers(void); struct redisCommand *lookupCommand(sds name); From 5a7999e66aa76bbe8a71ede5738e65df2d5a8955 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 6 Feb 2012 16:35:43 +0100 Subject: [PATCH 03/10] Also remove size of AOF buffers from used memory when doing the math for freeMemoryIfNeeded() --- src/redis.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/redis.c b/src/redis.c index 697d6c47..f73f3155 100644 --- a/src/redis.c +++ b/src/redis.c @@ -1547,8 +1547,8 @@ int freeMemoryIfNeeded(void) { size_t mem_used, mem_tofree, mem_freed; int slaves = listLength(server.slaves); - /* Remove the size of slaves output buffers from the count of used - * memory. */ + /* Remove the size of slaves output buffers and AOF buffer from the + * count of used memory. */ mem_used = zmalloc_used_memory(); if (slaves) { listIter li; @@ -1564,6 +1564,10 @@ int freeMemoryIfNeeded(void) { mem_used -= obuf_bytes; } } + if (server.aof_state != REDIS_AOF_OFF) { + mem_used -= sdslen(server.aof_buf); + mem_used -= sdslen(server.aof_rewrite_buf); + } /* Check if we are over the memory limit. */ if (mem_used <= server.maxmemory) return REDIS_OK; From 35a4761f1d4c21fc21ad55694f86c58289bb817a Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 6 Feb 2012 16:56:42 +0100 Subject: [PATCH 04/10] freeMemoryIfNeeded() minor refactoring --- src/networking.c | 21 +++++++++++++++++++++ src/redis.c | 24 +----------------------- src/redis.h | 1 + 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/src/networking.c b/src/networking.c index 156e1996..59f18abb 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1091,3 +1091,24 @@ unsigned long getClientOutputBufferMemoryUsage(redisClient *c) { return c->reply_bytes + (list_item_size*listLength(c->reply)); } + +/* Helper function used by freeMemoryIfNeeded() in order to flush slaves + * output buffers without returning control to the event loop. */ +void flushSlavesOutputBuffers(void) { + listIter li; + listNode *ln; + + listRewind(server.slaves,&li); + while((ln = listNext(&li))) { + redisClient *slave = listNodeValue(ln); + int events; + + events = aeGetFileEvents(server.el,slave->fd); + if (events & AE_WRITABLE && + slave->replstate == REDIS_REPL_ONLINE && + listLength(slave->reply)) + { + sendReplyToClient(server.el,slave->fd,slave,0); + } + } +} diff --git a/src/redis.c b/src/redis.c index f73f3155..4f6de4ff 100644 --- a/src/redis.c +++ b/src/redis.c @@ -1577,7 +1577,6 @@ int freeMemoryIfNeeded(void) { /* Compute how much memory we need to free. */ mem_tofree = mem_used - server.maxmemory; - printf("USED: %zu, TOFREE: %zu\n", mem_used, mem_tofree); mem_freed = 0; while (mem_freed < mem_tofree) { int j, k, keys_freed = 0; @@ -1668,7 +1667,6 @@ int freeMemoryIfNeeded(void) { delta = (long long) zmalloc_used_memory(); dbDelete(db,keyobj); delta -= (long long) zmalloc_used_memory(); - // printf("%lld\n",delta); mem_freed += delta; server.stat_evictedkeys++; decrRefCount(keyobj); @@ -1678,27 +1676,7 @@ int freeMemoryIfNeeded(void) { * start spending so much time here that is impossible to * deliver data to the slaves fast enough, so we force the * transmission here inside the loop. */ - if (slaves) { - listIter li; - listNode *ln; - - listRewind(server.slaves,&li); - while((ln = listNext(&li))) { - redisClient *slave = listNodeValue(ln); - int events; - - events = aeGetFileEvents(server.el,slave->fd); - printf("EVENTS: %d\n", events); - if (events & AE_WRITABLE && - slave->replstate == REDIS_REPL_ONLINE && - listLength(slave->reply)) - { - printf("SLAVE %d -> %d\n", - slave->fd, (int) listLength(slave->reply)); - sendReplyToClient(server.el,slave->fd,slave,0); - } - } - } + if (slaves) flushSlavesOutputBuffers(); } } if (!keys_freed) return REDIS_ERR; /* nothing to free... */ diff --git a/src/redis.h b/src/redis.h index e14a7908..3b372636 100644 --- a/src/redis.h +++ b/src/redis.h @@ -716,6 +716,7 @@ void getClientsMaxBuffers(unsigned long *longest_output_list, sds getClientInfoString(redisClient *client); sds getAllClientsInfoString(void); void rewriteClientCommandVector(redisClient *c, int argc, ...); +void flushSlavesOutputBuffers(void); #ifdef __GNUC__ void addReplyErrorFormat(redisClient *c, const char *fmt, ...) From 8b1829009b3c0710b3a8b889b64cd0bb8c3f46f1 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 7 Feb 2012 13:05:36 +0100 Subject: [PATCH 05/10] Precision of getClientOutputBufferMemoryUsage() greatily improved, see issue #327 for more information. --- src/networking.c | 25 +++++++++++++++++-------- src/zmalloc.c | 14 ++++++++++++++ src/zmalloc.h | 4 ++++ 3 files changed, 35 insertions(+), 8 deletions(-) diff --git a/src/networking.c b/src/networking.c index 59f18abb..008b2a25 100644 --- a/src/networking.c +++ b/src/networking.c @@ -131,7 +131,7 @@ void _addReplyObjectToList(redisClient *c, robj *o) { listAddNodeTail(c->reply,o); } } - c->reply_bytes += sdslen(o->ptr); + c->reply_bytes += zmalloc_size(o->ptr); } /* This method takes responsibility over the sds. When it is no longer @@ -144,7 +144,7 @@ void _addReplySdsToList(redisClient *c, sds s) { return; } - c->reply_bytes += sdslen(s); + c->reply_bytes += zmalloc_size(s); if (listLength(c->reply) == 0) { listAddNodeTail(c->reply,createObject(REDIS_STRING,s)); } else { @@ -169,7 +169,10 @@ void _addReplyStringToList(redisClient *c, char *s, size_t len) { if (c->flags & REDIS_CLOSE_AFTER_REPLY) return; if (listLength(c->reply) == 0) { - listAddNodeTail(c->reply,createStringObject(s,len)); + robj *o = createStringObject(s,len); + + listAddNodeTail(c->reply,o); + c->reply_bytes += zmalloc_size(o->ptr); } else { tail = listNodeValue(listLast(c->reply)); @@ -177,13 +180,17 @@ void _addReplyStringToList(redisClient *c, char *s, size_t len) { if (tail->ptr != NULL && sdslen(tail->ptr)+len <= REDIS_REPLY_CHUNK_BYTES) { + c->reply_bytes -= zmalloc_size(tail->ptr); tail = dupLastObjectIfNeeded(c->reply); tail->ptr = sdscatlen(tail->ptr,s,len); + c->reply_bytes += zmalloc_size(tail->ptr); } else { - listAddNodeTail(c->reply,createStringObject(s,len)); + robj *o = createStringObject(s,len); + + listAddNodeTail(c->reply,o); + c->reply_bytes += zmalloc_size(o->ptr); } } - c->reply_bytes += len; } /* ----------------------------------------------------------------------------- @@ -295,7 +302,7 @@ void setDeferredMultiBulkLength(redisClient *c, void *node, long length) { len = listNodeValue(ln); len->ptr = sdscatprintf(sdsempty(),"*%ld\r\n",length); - c->reply_bytes += sdslen(len->ptr); + c->reply_bytes += zmalloc_size(len->ptr); if (ln->next != NULL) { next = listNodeValue(ln->next); @@ -572,6 +579,7 @@ void freeClient(redisClient *c) { void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) { redisClient *c = privdata; int nwritten = 0, totwritten = 0, objlen; + size_t objmem; robj *o; REDIS_NOTUSED(el); REDIS_NOTUSED(mask); @@ -597,6 +605,7 @@ void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) { } else { o = listNodeValue(listFirst(c->reply)); objlen = sdslen(o->ptr); + objmem = zmalloc_size(o->ptr); if (objlen == 0) { listDelNode(c->reply,listFirst(c->reply)); @@ -617,7 +626,7 @@ void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) { if (c->sentlen == objlen) { listDelNode(c->reply,listFirst(c->reply)); c->sentlen = 0; - c->reply_bytes -= objlen; + c->reply_bytes -= objmem; } } /* Note that we avoid to send more than REDIS_MAX_WRITE_PER_EVENT @@ -1087,7 +1096,7 @@ void rewriteClientCommandVector(redisClient *c, int argc, ...) { * the caller wishes. The main usage of this function currently is * enforcing the client output length limits. */ unsigned long getClientOutputBufferMemoryUsage(redisClient *c) { - unsigned long list_item_size = sizeof(listNode); + unsigned long list_item_size = sizeof(listNode)+sizeof(robj); return c->reply_bytes + (list_item_size*listLength(c->reply)); } diff --git a/src/zmalloc.c b/src/zmalloc.c index 56b9140c..89f80d83 100644 --- a/src/zmalloc.c +++ b/src/zmalloc.c @@ -150,6 +150,20 @@ void *zrealloc(void *ptr, size_t size) { #endif } +/* Provide zmalloc_size() for systems where this function is not provided by + * malloc itself, given that in that case we store an header with this + * information as the first bytes of every allocation. */ +#ifndef HAVE_MALLOC_SIZE +size_t zmalloc_size(void *ptr) { + void *realptr = (char*)ptr-PREFIX_SIZE; + size_t size = *((size_t*)realptr); + /* Assume at least that all the allocations are padded at sizeof(long) by + * the underlying allocator. */ + if (size&(sizeof(long)-1)) size += sizeof(long)-(size&(sizeof(long)-1)); + return size+PREFIX_SIZE; +} +#endif + void zfree(void *ptr) { #ifndef HAVE_MALLOC_SIZE void *realptr; diff --git a/src/zmalloc.h b/src/zmalloc.h index 7ee556a3..995814c8 100644 --- a/src/zmalloc.h +++ b/src/zmalloc.h @@ -76,4 +76,8 @@ void zmalloc_enable_thread_safeness(void); float zmalloc_get_fragmentation_ratio(void); size_t zmalloc_get_rss(void); +#ifndef HAVE_MALLOC_SIZE +size_t zmalloc_size(void *ptr); +#endif + #endif /* __ZMALLOC_H */ From 4de73b7e0f6d199406bf35456815aa7b3bc98fd8 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 7 Feb 2012 17:41:31 +0100 Subject: [PATCH 06/10] Fixes to c->reply_bytes computation, and debug messages to closely study the behavior of memory pressure + slaves + maxmemory + blocked slaves. --- src/networking.c | 30 ++++++++++++++++++++++-------- src/redis.c | 16 +++++++++++++++- 2 files changed, 37 insertions(+), 9 deletions(-) diff --git a/src/networking.c b/src/networking.c index 008b2a25..8af2a41c 100644 --- a/src/networking.c +++ b/src/networking.c @@ -3,6 +3,14 @@ static void setProtocolError(redisClient *c, int pos); +/* To evaluate the output buffer size of a client we need to get size of + * allocated objects, however we can't used zmalloc_size() directly on sds + * strings because of the trick they use to work (the header is before the + * returned pointer), so we use this helper function. */ +size_t zmalloc_size_sds(sds s) { + return zmalloc_size(s-sizeof(struct sdshdr)); +} + void *dupClientReplyValue(void *o) { incrRefCount((robj*)o); return o; @@ -117,6 +125,7 @@ void _addReplyObjectToList(redisClient *c, robj *o) { if (listLength(c->reply) == 0) { incrRefCount(o); listAddNodeTail(c->reply,o); + c->reply_bytes += zmalloc_size_sds(o->ptr); } else { tail = listNodeValue(listLast(c->reply)); @@ -124,14 +133,16 @@ void _addReplyObjectToList(redisClient *c, robj *o) { if (tail->ptr != NULL && sdslen(tail->ptr)+sdslen(o->ptr) <= REDIS_REPLY_CHUNK_BYTES) { + c->reply_bytes -= zmalloc_size_sds(tail->ptr); tail = dupLastObjectIfNeeded(c->reply); tail->ptr = sdscatlen(tail->ptr,o->ptr,sdslen(o->ptr)); + c->reply_bytes += zmalloc_size_sds(tail->ptr); } else { incrRefCount(o); listAddNodeTail(c->reply,o); + c->reply_bytes += zmalloc_size_sds(o->ptr); } } - c->reply_bytes += zmalloc_size(o->ptr); } /* This method takes responsibility over the sds. When it is no longer @@ -144,9 +155,9 @@ void _addReplySdsToList(redisClient *c, sds s) { return; } - c->reply_bytes += zmalloc_size(s); if (listLength(c->reply) == 0) { listAddNodeTail(c->reply,createObject(REDIS_STRING,s)); + c->reply_bytes += zmalloc_size_sds(s); } else { tail = listNodeValue(listLast(c->reply)); @@ -154,11 +165,14 @@ void _addReplySdsToList(redisClient *c, sds s) { if (tail->ptr != NULL && sdslen(tail->ptr)+sdslen(s) <= REDIS_REPLY_CHUNK_BYTES) { + c->reply_bytes -= zmalloc_size_sds(tail->ptr); tail = dupLastObjectIfNeeded(c->reply); tail->ptr = sdscatlen(tail->ptr,s,sdslen(s)); + c->reply_bytes += zmalloc_size_sds(tail->ptr); sdsfree(s); } else { listAddNodeTail(c->reply,createObject(REDIS_STRING,s)); + c->reply_bytes += zmalloc_size_sds(s); } } } @@ -172,7 +186,7 @@ void _addReplyStringToList(redisClient *c, char *s, size_t len) { robj *o = createStringObject(s,len); listAddNodeTail(c->reply,o); - c->reply_bytes += zmalloc_size(o->ptr); + c->reply_bytes += zmalloc_size_sds(o->ptr); } else { tail = listNodeValue(listLast(c->reply)); @@ -180,15 +194,15 @@ void _addReplyStringToList(redisClient *c, char *s, size_t len) { if (tail->ptr != NULL && sdslen(tail->ptr)+len <= REDIS_REPLY_CHUNK_BYTES) { - c->reply_bytes -= zmalloc_size(tail->ptr); + c->reply_bytes -= zmalloc_size_sds(tail->ptr); tail = dupLastObjectIfNeeded(c->reply); tail->ptr = sdscatlen(tail->ptr,s,len); - c->reply_bytes += zmalloc_size(tail->ptr); + c->reply_bytes += zmalloc_size_sds(tail->ptr); } else { robj *o = createStringObject(s,len); listAddNodeTail(c->reply,o); - c->reply_bytes += zmalloc_size(o->ptr); + c->reply_bytes += zmalloc_size_sds(o->ptr); } } } @@ -302,7 +316,7 @@ void setDeferredMultiBulkLength(redisClient *c, void *node, long length) { len = listNodeValue(ln); len->ptr = sdscatprintf(sdsempty(),"*%ld\r\n",length); - c->reply_bytes += zmalloc_size(len->ptr); + c->reply_bytes += zmalloc_size_sds(len->ptr); if (ln->next != NULL) { next = listNodeValue(ln->next); @@ -605,7 +619,7 @@ void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) { } else { o = listNodeValue(listFirst(c->reply)); objlen = sdslen(o->ptr); - objmem = zmalloc_size(o->ptr); + objmem = zmalloc_size_sds(o->ptr); if (objlen == 0) { listDelNode(c->reply,listFirst(c->reply)); diff --git a/src/redis.c b/src/redis.c index 4f6de4ff..822f9c8a 100644 --- a/src/redis.c +++ b/src/redis.c @@ -1546,10 +1546,18 @@ void monitorCommand(redisClient *c) { int freeMemoryIfNeeded(void) { size_t mem_used, mem_tofree, mem_freed; int slaves = listLength(server.slaves); + static time_t xt; + int debug = 0; + + if (xt != time(NULL)) { + debug = 1; + xt = time(NULL); + } /* Remove the size of slaves output buffers and AOF buffer from the * count of used memory. */ mem_used = zmalloc_used_memory(); + if (debug) printf("used_full: %zu\n", mem_used); if (slaves) { listIter li; listNode *ln; @@ -1564,6 +1572,7 @@ int freeMemoryIfNeeded(void) { mem_used -= obuf_bytes; } } + if (debug) printf("used_nosl: %zu\n", mem_used); if (server.aof_state != REDIS_AOF_OFF) { mem_used -= sdslen(server.aof_buf); mem_used -= sdslen(server.aof_rewrite_buf); @@ -1578,6 +1587,7 @@ int freeMemoryIfNeeded(void) { /* Compute how much memory we need to free. */ mem_tofree = mem_used - server.maxmemory; mem_freed = 0; + if (debug) printf("tofree: %zu\n", mem_tofree); while (mem_freed < mem_tofree) { int j, k, keys_freed = 0; @@ -1679,8 +1689,12 @@ int freeMemoryIfNeeded(void) { if (slaves) flushSlavesOutputBuffers(); } } - if (!keys_freed) return REDIS_ERR; /* nothing to free... */ + if (!keys_freed) { + if (debug) printf("-freed: %zu\n\n", mem_freed); + return REDIS_ERR; /* nothing to free... */ + } } + if (debug) printf("+freed: %zu\n\n", mem_freed); return REDIS_OK; } From f27d38862df1acecb7681d21b007affaf78534d6 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 8 Feb 2012 00:10:20 +0100 Subject: [PATCH 07/10] debugging messages removed from freeMemoryIfNeeded() --- src/redis.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/src/redis.c b/src/redis.c index 822f9c8a..4f6de4ff 100644 --- a/src/redis.c +++ b/src/redis.c @@ -1546,18 +1546,10 @@ void monitorCommand(redisClient *c) { int freeMemoryIfNeeded(void) { size_t mem_used, mem_tofree, mem_freed; int slaves = listLength(server.slaves); - static time_t xt; - int debug = 0; - - if (xt != time(NULL)) { - debug = 1; - xt = time(NULL); - } /* Remove the size of slaves output buffers and AOF buffer from the * count of used memory. */ mem_used = zmalloc_used_memory(); - if (debug) printf("used_full: %zu\n", mem_used); if (slaves) { listIter li; listNode *ln; @@ -1572,7 +1564,6 @@ int freeMemoryIfNeeded(void) { mem_used -= obuf_bytes; } } - if (debug) printf("used_nosl: %zu\n", mem_used); if (server.aof_state != REDIS_AOF_OFF) { mem_used -= sdslen(server.aof_buf); mem_used -= sdslen(server.aof_rewrite_buf); @@ -1587,7 +1578,6 @@ int freeMemoryIfNeeded(void) { /* Compute how much memory we need to free. */ mem_tofree = mem_used - server.maxmemory; mem_freed = 0; - if (debug) printf("tofree: %zu\n", mem_tofree); while (mem_freed < mem_tofree) { int j, k, keys_freed = 0; @@ -1689,12 +1679,8 @@ int freeMemoryIfNeeded(void) { if (slaves) flushSlavesOutputBuffers(); } } - if (!keys_freed) { - if (debug) printf("-freed: %zu\n\n", mem_freed); - return REDIS_ERR; /* nothing to free... */ - } + if (!keys_freed) return REDIS_ERR; /* nothing to free... */ } - if (debug) printf("+freed: %zu\n\n", mem_freed); return REDIS_OK; } From 662f25e8053bb111b21000d813da34f3eb333df7 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 8 Feb 2012 00:17:27 +0100 Subject: [PATCH 08/10] redis.conf updated with new maxmemory semantics --- redis.conf | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/redis.conf b/redis.conf index 44fb536c..2e434000 100644 --- a/redis.conf +++ b/redis.conf @@ -192,21 +192,23 @@ slave-serve-stale-data yes # maxclients 128 # Don't use more memory than the specified amount of bytes. -# When the memory limit is reached Redis will try to remove keys with an -# EXPIRE set. It will try to start freeing keys that are going to expire -# in little time and preserve keys with a longer time to live. -# Redis will also try to remove objects from free lists if possible. +# When the memory limit is reached Redis will try to remove keys +# accordingly to the eviction policy selected (see maxmemmory-policy). # -# If all this fails, Redis will start to reply with errors to commands -# that will use more memory, like SET, LPUSH, and so on, and will continue -# to reply to most read-only commands like GET. +# If Redis can't remove keys according to the policy, or if the policy is +# set to 'noeviction', Redis will start to reply with errors to commands +# that would use more memory, like SET, LPUSH, and so on, and will continue +# to reply to read-only commands like GET. # -# WARNING: maxmemory can be a good idea mainly if you want to use Redis as a -# 'state' server or cache, not as a real DB. When Redis is used as a real -# database the memory usage will grow over the weeks, it will be obvious if -# it is going to use too much memory in the long run, and you'll have the time -# to upgrade. With maxmemory after the limit is reached you'll start to get -# errors for write operations, and this may even lead to DB inconsistency. +# This option is usually useful when using Redis as an LRU cache, or to set +# an hard memory limit for an instance (using the 'noeviction' policy). +# +# WARNING: If you have slaves attached to an instance with maxmemory on, +# the size of the output buffers needed to feed the slaves are subtracted +# from the used memory count, so that network problems / resyncs will +# not trigger a loop where keys are evicted, and in turn the output +# buffer of slaves is full with DELs of keys evicted triggering the deletion +# of more keys, and so forth until the database is completely emptied. # # maxmemory From 31f9f987ace301017b164964f95eb153ef8c74d5 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 8 Feb 2012 00:20:46 +0100 Subject: [PATCH 09/10] more practical maxmemory+slaves hint in redis.conf --- redis.conf | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/redis.conf b/redis.conf index 2e434000..58ea6fcc 100644 --- a/redis.conf +++ b/redis.conf @@ -210,6 +210,10 @@ slave-serve-stale-data yes # buffer of slaves is full with DELs of keys evicted triggering the deletion # of more keys, and so forth until the database is completely emptied. # +# In short... if you have slaves attached it is suggested that you set a lower +# limit for maxmemory so that there is some free RAM on the system for slave +# output buffers (but this is not needed if the policy is 'noeviction'). +# # maxmemory # MAXMEMORY POLICY: how Redis will select what to remove when maxmemory From b3a86b8209be03ea954dd13cf97a743c1f45796f Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 15 Feb 2012 15:30:29 +0100 Subject: [PATCH 10/10] Fixed a few broken stuff introduced while merging issue #327 related code in 2.4 --- src/redis.c | 6 +++--- src/redis.h | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/redis.c b/src/redis.c index 4f6de4ff..29ab6dd4 100644 --- a/src/redis.c +++ b/src/redis.c @@ -1564,9 +1564,9 @@ int freeMemoryIfNeeded(void) { mem_used -= obuf_bytes; } } - if (server.aof_state != REDIS_AOF_OFF) { - mem_used -= sdslen(server.aof_buf); - mem_used -= sdslen(server.aof_rewrite_buf); + if (server.appendonly) { + mem_used -= sdslen(server.aofbuf); + mem_used -= sdslen(server.bgrewritebuf); } /* Check if we are over the memory limit. */ diff --git a/src/redis.h b/src/redis.h index 3b372636..e93916c3 100644 --- a/src/redis.h +++ b/src/redis.h @@ -716,6 +716,7 @@ void getClientsMaxBuffers(unsigned long *longest_output_list, sds getClientInfoString(redisClient *client); sds getAllClientsInfoString(void); void rewriteClientCommandVector(redisClient *c, int argc, ...); +unsigned long getClientOutputBufferMemoryUsage(redisClient *c); void flushSlavesOutputBuffers(void); #ifdef __GNUC__