Merge branch 'issue327' into unstable

This commit is contained in:
antirez 2012-02-16 09:40:27 +01:00
commit 48a32944e6
8 changed files with 209 additions and 59 deletions

View File

@ -201,21 +201,27 @@ slave-serve-stale-data yes
# maxclients 10000
# Don't use more memory than the specified amount of bytes.
# When the memory limit is reached Redis will try to remove keys with an
# EXPIRE set. It will try to start freeing keys that are going to expire
# in little time and preserve keys with a longer time to live.
# Redis will also try to remove objects from free lists if possible.
# When the memory limit is reached Redis will try to remove keys
# accordingly to the eviction policy selected (see maxmemmory-policy).
#
# If all this fails, Redis will start to reply with errors to commands
# that will use more memory, like SET, LPUSH, and so on, and will continue
# to reply to most read-only commands like GET.
# If Redis can't remove keys according to the policy, or if the policy is
# set to 'noeviction', Redis will start to reply with errors to commands
# that would use more memory, like SET, LPUSH, and so on, and will continue
# to reply to read-only commands like GET.
#
# WARNING: maxmemory can be a good idea mainly if you want to use Redis as a
# 'state' server or cache, not as a real DB. When Redis is used as a real
# database the memory usage will grow over the weeks, it will be obvious if
# it is going to use too much memory in the long run, and you'll have the time
# to upgrade. With maxmemory after the limit is reached you'll start to get
# errors for write operations, and this may even lead to DB inconsistency.
# This option is usually useful when using Redis as an LRU cache, or to set
# an hard memory limit for an instance (using the 'noeviction' policy).
#
# WARNING: If you have slaves attached to an instance with maxmemory on,
# the size of the output buffers needed to feed the slaves are subtracted
# from the used memory count, so that network problems / resyncs will
# not trigger a loop where keys are evicted, and in turn the output
# buffer of slaves is full with DELs of keys evicted triggering the deletion
# of more keys, and so forth until the database is completely emptied.
#
# In short... if you have slaves attached it is suggested that you set a lower
# limit for maxmemory so that there is some free RAM on the system for slave
# output buffers (but this is not needed if the policy is 'noeviction').
#
# maxmemory <bytes>

View File

@ -488,9 +488,10 @@ long long getExpire(redisDb *db, robj *key) {
void propagateExpire(redisDb *db, robj *key) {
robj *argv[2];
argv[0] = createStringObject("DEL",3);
argv[0] = shared.del;
argv[1] = key;
incrRefCount(key);
incrRefCount(argv[0]);
incrRefCount(argv[1]);
if (server.aof_state != REDIS_AOF_OFF)
feedAppendOnlyFile(server.delCommand,db->id,argv,2);

View File

@ -3,6 +3,14 @@
static void setProtocolError(redisClient *c, int pos);
/* To evaluate the output buffer size of a client we need to get size of
* allocated objects, however we can't used zmalloc_size() directly on sds
* strings because of the trick they use to work (the header is before the
* returned pointer), so we use this helper function. */
size_t zmalloc_size_sds(sds s) {
return zmalloc_size(s-sizeof(struct sdshdr));
}
void *dupClientReplyValue(void *o) {
incrRefCount((robj*)o);
return o;
@ -137,6 +145,7 @@ void _addReplyObjectToList(redisClient *c, robj *o) {
if (listLength(c->reply) == 0) {
incrRefCount(o);
listAddNodeTail(c->reply,o);
c->reply_bytes += zmalloc_size_sds(o->ptr);
} else {
tail = listNodeValue(listLast(c->reply));
@ -144,14 +153,16 @@ void _addReplyObjectToList(redisClient *c, robj *o) {
if (tail->ptr != NULL &&
sdslen(tail->ptr)+sdslen(o->ptr) <= REDIS_REPLY_CHUNK_BYTES)
{
c->reply_bytes -= zmalloc_size_sds(tail->ptr);
tail = dupLastObjectIfNeeded(c->reply);
tail->ptr = sdscatlen(tail->ptr,o->ptr,sdslen(o->ptr));
c->reply_bytes += zmalloc_size_sds(tail->ptr);
} else {
incrRefCount(o);
listAddNodeTail(c->reply,o);
c->reply_bytes += zmalloc_size_sds(o->ptr);
}
}
c->reply_bytes += sdslen(o->ptr);
asyncCloseClientOnOutputBufferLimitReached(c);
}
@ -165,9 +176,9 @@ void _addReplySdsToList(redisClient *c, sds s) {
return;
}
c->reply_bytes += sdslen(s);
if (listLength(c->reply) == 0) {
listAddNodeTail(c->reply,createObject(REDIS_STRING,s));
c->reply_bytes += zmalloc_size_sds(s);
} else {
tail = listNodeValue(listLast(c->reply));
@ -175,11 +186,14 @@ void _addReplySdsToList(redisClient *c, sds s) {
if (tail->ptr != NULL &&
sdslen(tail->ptr)+sdslen(s) <= REDIS_REPLY_CHUNK_BYTES)
{
c->reply_bytes -= zmalloc_size_sds(tail->ptr);
tail = dupLastObjectIfNeeded(c->reply);
tail->ptr = sdscatlen(tail->ptr,s,sdslen(s));
c->reply_bytes += zmalloc_size_sds(tail->ptr);
sdsfree(s);
} else {
listAddNodeTail(c->reply,createObject(REDIS_STRING,s));
c->reply_bytes += zmalloc_size_sds(s);
}
}
asyncCloseClientOnOutputBufferLimitReached(c);
@ -191,7 +205,10 @@ void _addReplyStringToList(redisClient *c, char *s, size_t len) {
if (c->flags & REDIS_CLOSE_AFTER_REPLY) return;
if (listLength(c->reply) == 0) {
listAddNodeTail(c->reply,createStringObject(s,len));
robj *o = createStringObject(s,len);
listAddNodeTail(c->reply,o);
c->reply_bytes += zmalloc_size_sds(o->ptr);
} else {
tail = listNodeValue(listLast(c->reply));
@ -199,13 +216,17 @@ void _addReplyStringToList(redisClient *c, char *s, size_t len) {
if (tail->ptr != NULL &&
sdslen(tail->ptr)+len <= REDIS_REPLY_CHUNK_BYTES)
{
c->reply_bytes -= zmalloc_size_sds(tail->ptr);
tail = dupLastObjectIfNeeded(c->reply);
tail->ptr = sdscatlen(tail->ptr,s,len);
c->reply_bytes += zmalloc_size_sds(tail->ptr);
} else {
listAddNodeTail(c->reply,createStringObject(s,len));
robj *o = createStringObject(s,len);
listAddNodeTail(c->reply,o);
c->reply_bytes += zmalloc_size_sds(o->ptr);
}
}
c->reply_bytes += len;
asyncCloseClientOnOutputBufferLimitReached(c);
}
@ -336,7 +357,7 @@ void setDeferredMultiBulkLength(redisClient *c, void *node, long length) {
len = listNodeValue(ln);
len->ptr = sdscatprintf(sdsempty(),"*%ld\r\n",length);
c->reply_bytes += sdslen(len->ptr);
c->reply_bytes += zmalloc_size_sds(len->ptr);
if (ln->next != NULL) {
next = listNodeValue(ln->next);
@ -363,6 +384,18 @@ void addReplyDouble(redisClient *c, double d) {
void addReplyLongLongWithPrefix(redisClient *c, long long ll, char prefix) {
char buf[128];
int len;
/* Things like $3\r\n or *2\r\n are emitted very often by the protocol
* so we have a few shared objects to use if the integer is small
* like it is most of the times. */
if (prefix == '*' && ll < REDIS_SHARED_BULKHDR_LEN) {
addReply(c,shared.mbulkhdr[ll]);
return;
} else if (prefix == '$' && ll < REDIS_SHARED_BULKHDR_LEN) {
addReply(c,shared.bulkhdr[ll]);
return;
}
buf[0] = prefix;
len = ll2string(buf+1,sizeof(buf)-1,ll);
buf[len+1] = '\r';
@ -626,6 +659,7 @@ void freeClientsInAsyncFreeQueue(void) {
void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) {
redisClient *c = privdata;
int nwritten = 0, totwritten = 0, objlen;
size_t objmem;
robj *o;
REDIS_NOTUSED(el);
REDIS_NOTUSED(mask);
@ -651,6 +685,7 @@ void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) {
} else {
o = listNodeValue(listFirst(c->reply));
objlen = sdslen(o->ptr);
objmem = zmalloc_size_sds(o->ptr);
if (objlen == 0) {
listDelNode(c->reply,listFirst(c->reply));
@ -671,15 +706,20 @@ void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) {
if (c->sentlen == objlen) {
listDelNode(c->reply,listFirst(c->reply));
c->sentlen = 0;
c->reply_bytes -= objlen;
c->reply_bytes -= objmem;
}
}
/* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
/* Note that we avoid to send more than REDIS_MAX_WRITE_PER_EVENT
* bytes, in a single threaded server it's a good idea to serve
* other clients as well, even if a very large request comes from
* super fast link that is always able to accept data (in real world
* scenario think about 'KEYS *' against the loopback interfae) */
if (totwritten > REDIS_MAX_WRITE_PER_EVENT) break;
* scenario think about 'KEYS *' against the loopback interface).
*
* However if we are over the maxmemory limit we ignore that and
* just deliver as much data as it is possible to deliver. */
if (totwritten > REDIS_MAX_WRITE_PER_EVENT &&
(server.maxmemory == 0 ||
zmalloc_used_memory() < server.maxmemory)) break;
}
if (nwritten == -1) {
if (errno == EAGAIN) {
@ -1205,7 +1245,7 @@ void rewriteClientCommandArgument(redisClient *c, int i, robj *newval) {
* the caller wishes. The main usage of this function currently is
* enforcing the client output length limits. */
unsigned long getClientOutputBufferMemoryUsage(redisClient *c) {
unsigned long list_item_size = sizeof(listNode);
unsigned long list_item_size = sizeof(listNode)+sizeof(robj);
return c->reply_bytes + (list_item_size*listLength(c->reply));
}
@ -1298,3 +1338,24 @@ void asyncCloseClientOnOutputBufferLimitReached(redisClient *c) {
sdsfree(client);
}
}
/* Helper function used by freeMemoryIfNeeded() in order to flush slaves
* output buffers without returning control to the event loop. */
void flushSlavesOutputBuffers(void) {
listIter li;
listNode *ln;
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
redisClient *slave = listNodeValue(ln);
int events;
events = aeGetFileEvents(server.el,slave->fd);
if (events & AE_WRITABLE &&
slave->replstate == REDIS_REPL_ONLINE &&
listLength(slave->reply))
{
sendReplyToClient(server.el,slave->fd,slave,0);
}
}
}

View File

@ -41,7 +41,7 @@ int pubsubSubscribeChannel(redisClient *c, robj *channel) {
listAddNodeTail(clients,c);
}
/* Notify the client */
addReply(c,shared.mbulk3);
addReply(c,shared.mbulkhdr[3]);
addReply(c,shared.subscribebulk);
addReplyBulk(c,channel);
addReplyLongLong(c,dictSize(c->pubsub_channels)+listLength(c->pubsub_patterns));
@ -77,7 +77,7 @@ int pubsubUnsubscribeChannel(redisClient *c, robj *channel, int notify) {
}
/* Notify the client */
if (notify) {
addReply(c,shared.mbulk3);
addReply(c,shared.mbulkhdr[3]);
addReply(c,shared.unsubscribebulk);
addReplyBulk(c,channel);
addReplyLongLong(c,dictSize(c->pubsub_channels)+
@ -103,7 +103,7 @@ int pubsubSubscribePattern(redisClient *c, robj *pattern) {
listAddNodeTail(server.pubsub_patterns,pat);
}
/* Notify the client */
addReply(c,shared.mbulk3);
addReply(c,shared.mbulkhdr[3]);
addReply(c,shared.psubscribebulk);
addReplyBulk(c,pattern);
addReplyLongLong(c,dictSize(c->pubsub_channels)+listLength(c->pubsub_patterns));
@ -128,7 +128,7 @@ int pubsubUnsubscribePattern(redisClient *c, robj *pattern, int notify) {
}
/* Notify the client */
if (notify) {
addReply(c,shared.mbulk3);
addReply(c,shared.mbulkhdr[3]);
addReply(c,shared.punsubscribebulk);
addReplyBulk(c,pattern);
addReplyLongLong(c,dictSize(c->pubsub_channels)+
@ -188,7 +188,7 @@ int pubsubPublishMessage(robj *channel, robj *message) {
while ((ln = listNext(&li)) != NULL) {
redisClient *c = ln->value;
addReply(c,shared.mbulk3);
addReply(c,shared.mbulkhdr[3]);
addReply(c,shared.messagebulk);
addReplyBulk(c,channel);
addReplyBulk(c,message);
@ -206,7 +206,7 @@ int pubsubPublishMessage(robj *channel, robj *message) {
sdslen(pat->pattern->ptr),
(char*)channel->ptr,
sdslen(channel->ptr),0)) {
addReply(pat->client,shared.mbulk4);
addReply(pat->client,shared.mbulkhdr[4]);
addReply(pat->client,shared.pmessagebulk);
addReplyBulk(pat->client,pat->pattern);
addReplyBulk(pat->client,channel);

View File

@ -851,12 +851,17 @@ void createSharedObjects(void) {
shared.unsubscribebulk = createStringObject("$11\r\nunsubscribe\r\n",18);
shared.psubscribebulk = createStringObject("$10\r\npsubscribe\r\n",17);
shared.punsubscribebulk = createStringObject("$12\r\npunsubscribe\r\n",19);
shared.mbulk3 = createStringObject("*3\r\n",4);
shared.mbulk4 = createStringObject("*4\r\n",4);
shared.del = createStringObject("DEL",3);
for (j = 0; j < REDIS_SHARED_INTEGERS; j++) {
shared.integers[j] = createObject(REDIS_STRING,(void*)(long)j);
shared.integers[j]->encoding = REDIS_ENCODING_INT;
}
for (j = 0; j < REDIS_SHARED_BULKHDR_LEN; j++) {
shared.mbulkhdr[j] = createObject(REDIS_STRING,
sdscatprintf(sdsempty(),"*%d\r\n",j));
shared.bulkhdr[j] = createObject(REDIS_STRING,
sdscatprintf(sdsempty(),"$%d\r\n",j));
}
}
void initServerConfig() {
@ -1270,12 +1275,13 @@ int processCommand(redisClient *c) {
* First we try to free some memory if possible (if there are volatile
* keys in the dataset). If there are not the only thing we can do
* is returning an error. */
if (server.maxmemory) freeMemoryIfNeeded();
if (server.maxmemory && (c->cmd->flags & REDIS_CMD_DENYOOM) &&
zmalloc_used_memory() > server.maxmemory)
{
addReplyError(c,"command not allowed when used memory > 'maxmemory'");
return REDIS_OK;
if (server.maxmemory) {
int retval = freeMemoryIfNeeded();
if ((c->cmd->flags & REDIS_CMD_DENYOOM) && retval == REDIS_ERR) {
addReplyError(c,
"command not allowed when used memory > 'maxmemory'");
return REDIS_OK;
}
}
/* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */
@ -1778,23 +1784,57 @@ void monitorCommand(redisClient *c) {
/* ============================ Maxmemory directive ======================== */
/* This function gets called when 'maxmemory' is set on the config file to limit
* the max memory used by the server, and we are out of memory.
* This function will try to, in order:
* the max memory used by the server, before processing a command.
*
* - Free objects from the free list
* - Try to remove keys with an EXPIRE set
* The goal of the function is to free enough memory to keep Redis under the
* configured memory limit.
*
* It is not possible to free enough memory to reach used-memory < maxmemory
* the server will start refusing commands that will enlarge even more the
* memory usage.
* The function starts calculating how many bytes should be freed to keep
* Redis under the limit, and enters a loop selecting the best keys to
* evict accordingly to the configured policy.
*
* If all the bytes needed to return back under the limit were freed the
* function returns REDIS_OK, otherwise REDIS_ERR is returned, and the caller
* should block the execution of commands that will result in more memory
* used by the server.
*/
void freeMemoryIfNeeded(void) {
/* Remove keys accordingly to the active policy as long as we are
* over the memory limit. */
if (server.maxmemory_policy == REDIS_MAXMEMORY_NO_EVICTION) return;
int freeMemoryIfNeeded(void) {
size_t mem_used, mem_tofree, mem_freed;
int slaves = listLength(server.slaves);
while (server.maxmemory && zmalloc_used_memory() > server.maxmemory) {
int j, k, freed = 0;
/* Remove the size of slaves output buffers and AOF buffer from the
* count of used memory. */
mem_used = zmalloc_used_memory();
if (slaves) {
listIter li;
listNode *ln;
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
redisClient *slave = listNodeValue(ln);
unsigned long obuf_bytes = getClientOutputBufferMemoryUsage(slave);
if (obuf_bytes > mem_used)
mem_used = 0;
else
mem_used -= obuf_bytes;
}
}
if (server.aof_state != REDIS_AOF_OFF) {
mem_used -= sdslen(server.aof_buf);
mem_used -= sdslen(server.aof_rewrite_buf);
}
/* Check if we are over the memory limit. */
if (mem_used <= server.maxmemory) return REDIS_OK;
if (server.maxmemory_policy == REDIS_MAXMEMORY_NO_EVICTION)
return REDIS_ERR; /* We need to free memory, but policy forbids. */
/* Compute how much memory we need to free. */
mem_tofree = mem_used - server.maxmemory;
mem_freed = 0;
while (mem_freed < mem_tofree) {
int j, k, keys_freed = 0;
for (j = 0; j < server.dbnum; j++) {
long bestval = 0; /* just to prevent warning */
@ -1867,16 +1907,36 @@ void freeMemoryIfNeeded(void) {
/* Finally remove the selected key. */
if (bestkey) {
long long delta;
robj *keyobj = createStringObject(bestkey,sdslen(bestkey));
propagateExpire(db,keyobj);
/* We compute the amount of memory freed by dbDelete() alone.
* It is possible that actually the memory needed to propagate
* the DEL in AOF and replication link is greater than the one
* we are freeing removing the key, but we can't account for
* that otherwise we would never exit the loop.
*
* AOF and Output buffer memory will be freed eventually so
* we only care about memory used by the key space. */
delta = (long long) zmalloc_used_memory();
dbDelete(db,keyobj);
delta -= (long long) zmalloc_used_memory();
mem_freed += delta;
server.stat_evictedkeys++;
decrRefCount(keyobj);
freed++;
keys_freed++;
/* When the memory to free starts to be big enough, we may
* start spending so much time here that is impossible to
* deliver data to the slaves fast enough, so we force the
* transmission here inside the loop. */
if (slaves) flushSlavesOutputBuffers();
}
}
if (!freed) return; /* nothing to free... */
if (!keys_freed) return REDIS_ERR; /* nothing to free... */
}
return REDIS_OK;
}
/* =================================== Main! ================================ */

View File

@ -46,6 +46,7 @@
#define REDIS_EXPIRELOOKUPS_PER_CRON 10 /* lookup 10 expires per loop */
#define REDIS_MAX_WRITE_PER_EVENT (1024*64)
#define REDIS_SHARED_INTEGERS 10000
#define REDIS_SHARED_BULKHDR_LEN 32
#define REDIS_MAX_LOGMSG_LEN 1024 /* Default maximum length of syslog messages */
#define REDIS_AOF_REWRITE_PERC 100
#define REDIS_AOF_REWRITE_MIN_SIZE (1024*1024)
@ -358,9 +359,11 @@ struct sharedObjectsStruct {
*outofrangeerr, *noscripterr, *loadingerr, *slowscripterr, *plus,
*select0, *select1, *select2, *select3, *select4,
*select5, *select6, *select7, *select8, *select9,
*messagebulk, *pmessagebulk, *subscribebulk, *unsubscribebulk, *mbulk3,
*mbulk4, *psubscribebulk, *punsubscribebulk,
*integers[REDIS_SHARED_INTEGERS];
*messagebulk, *pmessagebulk, *subscribebulk, *unsubscribebulk,
*psubscribebulk, *punsubscribebulk, *del,
*integers[REDIS_SHARED_INTEGERS],
*mbulkhdr[REDIS_SHARED_BULKHDR_LEN], /* "*<value>\r\n" */
*bulkhdr[REDIS_SHARED_BULKHDR_LEN]; /* "$<value>\r\n" */
};
/* ZSETs use a specialized version of Skiplists */
@ -817,6 +820,7 @@ void freeClientsInAsyncFreeQueue(void);
void asyncCloseClientOnOutputBufferLimitReached(redisClient *c);
int getClientLimitClassByName(char *name);
char *getClientLimitClassName(int class);
void flushSlavesOutputBuffers(void);
#ifdef __GNUC__
void addReplyErrorFormat(redisClient *c, const char *fmt, ...)
@ -940,7 +944,7 @@ unsigned int zsetLength(robj *zobj);
void zsetConvert(robj *zobj, int encoding);
/* Core functions */
void freeMemoryIfNeeded(void);
int freeMemoryIfNeeded(void);
int processCommand(redisClient *c);
void setupSignalHandlers(void);
struct redisCommand *lookupCommand(sds name);

View File

@ -150,6 +150,20 @@ void *zrealloc(void *ptr, size_t size) {
#endif
}
/* Provide zmalloc_size() for systems where this function is not provided by
* malloc itself, given that in that case we store an header with this
* information as the first bytes of every allocation. */
#ifndef HAVE_MALLOC_SIZE
size_t zmalloc_size(void *ptr) {
void *realptr = (char*)ptr-PREFIX_SIZE;
size_t size = *((size_t*)realptr);
/* Assume at least that all the allocations are padded at sizeof(long) by
* the underlying allocator. */
if (size&(sizeof(long)-1)) size += sizeof(long)-(size&(sizeof(long)-1));
return size+PREFIX_SIZE;
}
#endif
void zfree(void *ptr) {
#ifndef HAVE_MALLOC_SIZE
void *realptr;

View File

@ -76,4 +76,8 @@ void zmalloc_enable_thread_safeness(void);
float zmalloc_get_fragmentation_ratio(void);
size_t zmalloc_get_rss(void);
#ifndef HAVE_MALLOC_SIZE
size_t zmalloc_size(void *ptr);
#endif
#endif /* __ZMALLOC_H */