From 144b0094c31b0a3604e5bd2686a7df15c0fd324d Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 3 Jun 2010 16:06:18 +0200 Subject: [PATCH 001/139] initial implementation for the intset --- intset.c | 379 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ intset.h | 16 +++ 2 files changed, 395 insertions(+) create mode 100644 intset.c create mode 100644 intset.h diff --git a/intset.c b/intset.c new file mode 100644 index 00000000..8ffaef25 --- /dev/null +++ b/intset.c @@ -0,0 +1,379 @@ +#include +#include +#include +#include +#include "intset.h" +#include "zmalloc.h" + +/* Note that these encodings are ordered, so: + * INTSET_ENC_INT16 < INTSET_ENC_INT32 < INTSET_ENC_INT64. */ +#define INTSET_ENC_INT16 (sizeof(int16_t)) +#define INTSET_ENC_INT32 (sizeof(int32_t)) +#define INTSET_ENC_INT64 (sizeof(int64_t)) + +/* Accessors for each type of encoding */ +#define INTSET_VALUE_ENCODING(__val) (((__val) < INT32_MIN || (__val) > INT32_MAX) ? \ + INTSET_ENC_INT64 : (((__val) < INT16_MIN || (__val) > INT16_MAX) ? \ + INTSET_ENC_INT32 : INTSET_ENC_INT16)) +#define INTSET_GET_ENCODED(__is,__pos,__enc) ((__enc == INTSET_ENC_INT64) ? \ + ((int64_t*)(__is)->contents)[__pos] : ((__enc == INTSET_ENC_INT32) ? \ + ((int32_t*)(__is)->contents)[__pos] : ((int16_t*)(__is)->contents)[__pos])) +#define INTSET_GET(__is,__pos) (INTSET_GET_ENCODED(__is,__pos,(__is)->encoding)) +#define INTSET_SET(__is,__pos,__val) { \ + if ((__is)->encoding == INTSET_ENC_INT64) \ + ((int64_t*)(__is)->contents)[__pos] = (__val); \ + else if ((__is)->encoding == INTSET_ENC_INT32) \ + ((int32_t*)(__is)->contents)[__pos] = (__val); \ + else \ + ((int16_t*)(__is)->contents)[__pos] = (__val); } + +/* Create an empty intset. */ +intset *intsetNew(void) { + intset *is = zmalloc(sizeof(intset)); + is->encoding = INTSET_ENC_INT16; + is->length = 0; + return is; +} + +/* Resize the intset */ +static intset *intsetResize(intset *is, uint32_t len) { + uint32_t size = len*is->encoding; + is = zrealloc(is,sizeof(intset)+size); + return is; +} + +static intset *intsetUpgrade(intset *is, uint8_t newenc, uint8_t extra, uint8_t offset) { + uint8_t curenc = is->encoding; + int length = is->length; + + /* First set new encoding and resize */ + is->encoding = newenc; + is = intsetResize(is,is->length+extra); + + /* Upgrade back-to-front so we don't overwrite values */ + while(length--) + INTSET_SET(is,length+offset,INTSET_GET_ENCODED(is,length,curenc)); + return is; +} + +/* Search for the position of "value". Return 1 when the value was found and + * sets "pos" to the position of the value within the intset. Return 0 when + * the value is not present in the intset and sets "pos" to the position + * where "value" can be inserted. */ +static uint8_t intsetSearch(intset *is, int64_t value, uint32_t *pos) { + int min = 0, max = is->length-1, mid; + int64_t cur; + + /* The value can never be found when the set is empty */ + if (is->length == 0) { + if (pos) *pos = 0; + return 0; + } + + while(max >= min) { + mid = (min+max)/2; + cur = INTSET_GET(is,mid); + if (value > cur) { + min = mid+1; + } else if (value < cur) { + max = mid-1; + } else { + break; + } + } + + if (value == cur) { + if (pos) *pos = mid; + return 1; + } else { + if (pos) *pos = min; + return 0; + } +} + +static void intsetMoveTail(intset *is, uint32_t from, uint32_t to) { + void *src, *dst; + uint32_t bytes = is->length-from; + if (is->encoding == INTSET_ENC_INT64) { + src = (int64_t*)is->contents+from; + dst = (int64_t*)is->contents+to; + bytes *= sizeof(int64_t); + } else if (is->encoding == INTSET_ENC_INT32) { + src = (int32_t*)is->contents+from; + dst = (int32_t*)is->contents+to; + bytes *= sizeof(int32_t); + } else { + src = (int16_t*)is->contents+from; + dst = (int16_t*)is->contents+to; + bytes *= sizeof(int16_t); + } + memmove(dst,src,bytes); +} + +/* Insert an integer in the intset */ +intset *intsetAdd(intset *is, int64_t value, uint8_t *success) { + uint8_t valenc = INTSET_VALUE_ENCODING(value); + uint32_t pos, offset; + if (success) *success = 1; + + /* Upgrade encoding if necessary. If we need to upgrade, we know that + * this value should be either appended (if > 0) or prepended (if < 0), + * because it lies outside the range of existing values. */ + if (valenc > is->encoding) { + offset = value < 0 ? 1 : 0; + is = intsetUpgrade(is,valenc,1,offset); + pos = (value < 0) ? 0 : is->length; + } else { + if (is->length == 0) { + pos = 0; + } else { + /* Check for the case where we know the insert position */ + if (value > INTSET_GET(is,is->length-1)) { + pos = is->length; + } else if (value < INTSET_GET(is,0)) { + pos = 0; + } else { + /* Abort if the value is already present in the set */ + if (intsetSearch(is,value,&pos)) { + if (success) *success = 0; + return is; + } + } + } + + is = intsetResize(is,is->length+1); + if (pos < is->length) intsetMoveTail(is,pos,pos+1); + } + + INTSET_SET(is,pos,value); + is->length++; + return is; +} + +/* Delete integer from intset */ +intset *intsetDelete(intset *is, int64_t value, uint8_t *success) { + uint8_t valenc = INTSET_VALUE_ENCODING(value); + uint32_t pos; + if (success) *success = 0; + + if (valenc <= is->encoding && intsetSearch(is,value,&pos)) { + /* We know we can delete */ + if (success) *success = 1; + + /* Overwrite value with tail and update length */ + if (pos < (is->length-1)) intsetMoveTail(is,pos+1,pos); + is = intsetResize(is,is->length-1); + is->length--; + } + return is; +} + +/* Determine whether a value belongs to this set */ +uint8_t intsetFind(intset *is, int64_t value) { + uint8_t valenc = INTSET_VALUE_ENCODING(value); + return valenc <= is->encoding && intsetSearch(is,value,NULL); +} + +/* Return random member */ +int64_t intsetRandom(intset *is) { + return INTSET_GET(is,rand()%is->length); +} + +#ifdef INTSET_TEST_MAIN +#include + +void intsetRepr(intset *is) { + int i; + for (i = 0; i < is->length; i++) { + printf("%lld\n", (uint64_t)INTSET_GET(is,i)); + } + printf("\n"); +} + +void error(char *err) { + printf("%s\n", err); + exit(1); +} + +void ok(void) { + printf("OK\n"); +} + +long long usec(void) { + struct timeval tv; + gettimeofday(&tv,NULL); + return (((long long)tv.tv_sec)*1000000)+tv.tv_usec; +} + +#define assert(_e) ((_e)?(void)0:(_assert(#_e,__FILE__,__LINE__),exit(1))) +void _assert(char *estr, char *file, int line) { + printf("\n\n=== ASSERTION FAILED ===\n"); + printf("==> %s:%d '%s' is not true\n",file,line,estr); +} + +intset *createSet(int bits, int size) { + uint64_t mask = (1< 32) { + value = (rand()*rand()) & mask; + } else { + value = rand() & mask; + } + is = intsetAdd(is,value,NULL); + } + return is; +} + +void checkConsistency(intset *is) { + int i; + + for (i = 0; i < (is->length-1); i++) { + if (is->encoding == INTSET_ENC_INT16) { + int16_t *i16 = (int16_t*)is->contents; + assert(i16[i] < i16[i+1]); + } else if (is->encoding == INTSET_ENC_INT32) { + int32_t *i32 = (int32_t*)is->contents; + assert(i32[i] < i32[i+1]); + } else { + int64_t *i64 = (int64_t*)is->contents; + assert(i64[i] < i64[i+1]); + } + } +} + +int main(int argc, char **argv) { + uint8_t success; + int i; + intset *is; + sranddev(); + + printf("Value encodings: "); { + assert(INTSET_VALUE_ENCODING(-32768) == INTSET_ENC_INT16); + assert(INTSET_VALUE_ENCODING(+32767) == INTSET_ENC_INT16); + assert(INTSET_VALUE_ENCODING(-32769) == INTSET_ENC_INT32); + assert(INTSET_VALUE_ENCODING(+32768) == INTSET_ENC_INT32); + assert(INTSET_VALUE_ENCODING(-2147483648) == INTSET_ENC_INT32); + assert(INTSET_VALUE_ENCODING(+2147483647) == INTSET_ENC_INT32); + assert(INTSET_VALUE_ENCODING(-2147483649) == INTSET_ENC_INT64); + assert(INTSET_VALUE_ENCODING(+2147483648) == INTSET_ENC_INT64); + assert(INTSET_VALUE_ENCODING(-9223372036854775808ull) == INTSET_ENC_INT64); + assert(INTSET_VALUE_ENCODING(+9223372036854775807ull) == INTSET_ENC_INT64); + ok(); + } + + printf("Basic adding: "); { + is = intsetNew(); + is = intsetAdd(is,5,&success); assert(success); + is = intsetAdd(is,6,&success); assert(success); + is = intsetAdd(is,4,&success); assert(success); + is = intsetAdd(is,4,&success); assert(!success); + ok(); + } + + printf("Large number of random adds: "); { + int inserts = 0; + is = intsetNew(); + for (i = 0; i < 1024; i++) { + is = intsetAdd(is,rand()%0x800,&success); + if (success) inserts++; + } + assert(is->length == inserts); + checkConsistency(is); + ok(); + } + + printf("Upgrade from int16 to int32: "); { + is = intsetNew(); + is = intsetAdd(is,32,NULL); + assert(is->encoding == INTSET_ENC_INT16); + is = intsetAdd(is,65535,NULL); + assert(is->encoding == INTSET_ENC_INT32); + assert(intsetFind(is,32)); + assert(intsetFind(is,65535)); + checkConsistency(is); + + is = intsetNew(); + is = intsetAdd(is,32,NULL); + assert(is->encoding == INTSET_ENC_INT16); + is = intsetAdd(is,-65535,NULL); + assert(is->encoding == INTSET_ENC_INT32); + assert(intsetFind(is,32)); + assert(intsetFind(is,-65535)); + checkConsistency(is); + ok(); + } + + printf("Upgrade from int16 to int64: "); { + is = intsetNew(); + is = intsetAdd(is,32,NULL); + assert(is->encoding == INTSET_ENC_INT16); + is = intsetAdd(is,4294967295,NULL); + assert(is->encoding == INTSET_ENC_INT64); + assert(intsetFind(is,32)); + assert(intsetFind(is,4294967295)); + checkConsistency(is); + + is = intsetNew(); + is = intsetAdd(is,32,NULL); + assert(is->encoding == INTSET_ENC_INT16); + is = intsetAdd(is,-4294967295,NULL); + assert(is->encoding == INTSET_ENC_INT64); + assert(intsetFind(is,32)); + assert(intsetFind(is,-4294967295)); + checkConsistency(is); + ok(); + } + + printf("Upgrade from int32 to int64: "); { + is = intsetNew(); + is = intsetAdd(is,65535,NULL); + assert(is->encoding == INTSET_ENC_INT32); + is = intsetAdd(is,4294967295,NULL); + assert(is->encoding == INTSET_ENC_INT64); + assert(intsetFind(is,65535)); + assert(intsetFind(is,4294967295)); + checkConsistency(is); + + is = intsetNew(); + is = intsetAdd(is,65535,NULL); + assert(is->encoding == INTSET_ENC_INT32); + is = intsetAdd(is,-4294967295,NULL); + assert(is->encoding == INTSET_ENC_INT64); + assert(intsetFind(is,65535)); + assert(intsetFind(is,-4294967295)); + checkConsistency(is); + ok(); + } + + printf("Stress lookups: "); { + long num = 100000, size = 10000; + int i, bits = 20; + long long start; + is = createSet(bits,size); + checkConsistency(is); + + start = usec(); + for (i = 0; i < num; i++) intsetSearch(is,rand() % ((1< Date: Thu, 3 Jun 2010 22:21:39 +0200 Subject: [PATCH 002/139] move logic for obvious corner cases to intsetSearch --- intset.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/intset.c b/intset.c index 8ffaef25..f1bbcde3 100644 --- a/intset.c +++ b/intset.c @@ -68,6 +68,16 @@ static uint8_t intsetSearch(intset *is, int64_t value, uint32_t *pos) { if (is->length == 0) { if (pos) *pos = 0; return 0; + } else { + /* Check for the case where we know we cannot find the value, + * but do know the insert position. */ + if (value > INTSET_GET(is,is->length-1)) { + if (pos) *pos = is->length; + return 0; + } else if (value < INTSET_GET(is,0)) { + if (pos) *pos = 0; + return 0; + } } while(max >= min) { @@ -124,21 +134,12 @@ intset *intsetAdd(intset *is, int64_t value, uint8_t *success) { is = intsetUpgrade(is,valenc,1,offset); pos = (value < 0) ? 0 : is->length; } else { - if (is->length == 0) { - pos = 0; - } else { - /* Check for the case where we know the insert position */ - if (value > INTSET_GET(is,is->length-1)) { - pos = is->length; - } else if (value < INTSET_GET(is,0)) { - pos = 0; - } else { - /* Abort if the value is already present in the set */ - if (intsetSearch(is,value,&pos)) { - if (success) *success = 0; - return is; - } - } + /* Abort if the value is already present in the set. + * This call will populate "pos" with the right position to insert + * the value when it cannot be found. */ + if (intsetSearch(is,value,&pos)) { + if (success) *success = 0; + return is; } is = intsetResize(is,is->length+1); From 35cabcb50557eae1433a68c7730ad8aff478b68a Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Fri, 11 Jun 2010 18:35:57 +0200 Subject: [PATCH 003/139] wrapper functions for the set type to support multiple encodings --- redis.c | 282 ++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 174 insertions(+), 108 deletions(-) diff --git a/redis.c b/redis.c index 3e7a17d8..cb7bca8e 100644 --- a/redis.c +++ b/redis.c @@ -3064,7 +3064,9 @@ static robj *createZiplistObject(void) { static robj *createSetObject(void) { dict *d = dictCreate(&setDictType,NULL); - return createObject(REDIS_SET,d); + robj *o = createObject(REDIS_SET,d); + o->encoding = REDIS_ENCODING_HT; + return o; } static robj *createHashObject(void) { @@ -5457,6 +5459,99 @@ static void rpoplpushcommand(redisClient *c) { /* ==================================== Sets ================================ */ +static int setTypeAdd(robj *subject, robj *value) { + if (subject->encoding == REDIS_ENCODING_HT) { + if (dictAdd(subject->ptr,value,NULL) == DICT_OK) { + incrRefCount(value); + return 1; + } + } else { + redisPanic("Unknown set encoding"); + } + return 0; +} + +static int setTypeRemove(robj *subject, robj *value) { + if (subject->encoding == REDIS_ENCODING_HT) { + if (dictDelete(subject->ptr,value) == DICT_OK) { + if (htNeedsResize(subject->ptr)) dictResize(subject->ptr); + return 1; + } + } else { + redisPanic("Unknown set encoding"); + } + return 0; +} + +static int setTypeIsMember(robj *subject, robj *value) { + if (subject->encoding == REDIS_ENCODING_HT) { + return dictFind((dict*)subject->ptr,value) != NULL; + } else { + redisPanic("Unknown set encoding"); + } +} + +/* Structure to hold set iteration abstraction. */ +typedef struct { + int encoding; + dictIterator *di; +} setIterator; + +static setIterator *setTypeInitIterator(robj *subject) { + setIterator *si = zmalloc(sizeof(setIterator)); + si->encoding = subject->encoding; + if (si->encoding == REDIS_ENCODING_HT) { + si->di = dictGetIterator(subject->ptr); + } else { + redisPanic("Unknown set encoding"); + } + return si; +} + +static void setTypeReleaseIterator(setIterator *si) { + if (si->encoding == REDIS_ENCODING_HT) + dictReleaseIterator(si->di); + zfree(si); +} + +/* Move to the next entry in the set. Returns the object at the current + * position, or NULL when the end is reached. This object will have its + * refcount incremented, so the caller needs to take care of this. */ +static robj *setTypeNext(setIterator *si) { + robj *ret = NULL; + if (si->encoding == REDIS_ENCODING_HT) { + dictEntry *de = dictNext(si->di); + if (de != NULL) { + ret = dictGetEntryKey(de); + incrRefCount(ret); + } + } + return ret; +} + + +/* Return random element from set. The returned object will always have + * an incremented refcount. */ +robj *setTypeRandomElement(robj *subject) { + robj *ret = NULL; + if (subject->encoding == REDIS_ENCODING_HT) { + dictEntry *de = dictGetRandomKey(subject->ptr); + ret = dictGetEntryKey(de); + incrRefCount(ret); + } else { + redisPanic("Unknown set encoding"); + } + return ret; +} + +static unsigned long setTypeSize(robj *subject) { + if (subject->encoding == REDIS_ENCODING_HT) { + return dictSize((dict*)subject->ptr); + } else { + redisPanic("Unknown set encoding"); + } +} + static void saddCommand(redisClient *c) { robj *set; @@ -5470,8 +5565,7 @@ static void saddCommand(redisClient *c) { return; } } - if (dictAdd(set->ptr,c->argv[2],NULL) == DICT_OK) { - incrRefCount(c->argv[2]); + if (setTypeAdd(set,c->argv[2])) { server.dirty++; addReply(c,shared.cone); } else { @@ -5485,10 +5579,9 @@ static void sremCommand(redisClient *c) { if ((set = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL || checkType(c,set,REDIS_SET)) return; - if (dictDelete(set->ptr,c->argv[2]) == DICT_OK) { + if (setTypeRemove(set,c->argv[2])) { + if (setTypeSize(set) == 0) dbDelete(c->db,c->argv[1]); server.dirty++; - if (htNeedsResize(set->ptr)) dictResize(set->ptr); - if (dictSize((dict*)set->ptr) == 0) dbDelete(c->db,c->argv[1]); addReply(c,shared.cone); } else { addReply(c,shared.czero); @@ -5513,12 +5606,12 @@ static void smoveCommand(redisClient *c) { return; } /* Remove the element from the source set */ - if (dictDelete(srcset->ptr,c->argv[3]) == DICT_ERR) { + if (!setTypeRemove(srcset,c->argv[3])) { /* Key not found in the src set! return zero */ addReply(c,shared.czero); return; } - if (dictSize((dict*)srcset->ptr) == 0 && srcset != dstset) + if (setTypeSize(srcset) == 0 && srcset != dstset) dbDelete(c->db,c->argv[1]); server.dirty++; /* Add the element to the destination set */ @@ -5526,8 +5619,7 @@ static void smoveCommand(redisClient *c) { dstset = createSetObject(); dbAdd(c->db,c->argv[2],dstset); } - if (dictAdd(dstset->ptr,c->argv[3],NULL) == DICT_OK) - incrRefCount(c->argv[3]); + setTypeAdd(dstset,c->argv[3]); addReply(c,shared.cone); } @@ -5537,7 +5629,7 @@ static void sismemberCommand(redisClient *c) { if ((set = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL || checkType(c,set,REDIS_SET)) return; - if (dictFind(set->ptr,c->argv[2])) + if (setTypeIsMember(set,c->argv[2])) addReply(c,shared.cone); else addReply(c,shared.czero); @@ -5545,74 +5637,62 @@ static void sismemberCommand(redisClient *c) { static void scardCommand(redisClient *c) { robj *o; - dict *s; if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL || checkType(c,o,REDIS_SET)) return; - s = o->ptr; - addReplyUlong(c,dictSize(s)); + addReplyUlong(c,setTypeSize(o)); } static void spopCommand(redisClient *c) { - robj *set; - dictEntry *de; + robj *set, *ele; if ((set = lookupKeyWriteOrReply(c,c->argv[1],shared.nullbulk)) == NULL || checkType(c,set,REDIS_SET)) return; - de = dictGetRandomKey(set->ptr); - if (de == NULL) { + ele = setTypeRandomElement(set); + if (ele == NULL) { addReply(c,shared.nullbulk); } else { - robj *ele = dictGetEntryKey(de); - + setTypeRemove(set,ele); addReplyBulk(c,ele); - dictDelete(set->ptr,ele); - if (htNeedsResize(set->ptr)) dictResize(set->ptr); - if (dictSize((dict*)set->ptr) == 0) dbDelete(c->db,c->argv[1]); + decrRefCount(ele); + if (setTypeSize(set) == 0) dbDelete(c->db,c->argv[1]); server.dirty++; } } static void srandmemberCommand(redisClient *c) { - robj *set; - dictEntry *de; + robj *set, *ele; if ((set = lookupKeyReadOrReply(c,c->argv[1],shared.nullbulk)) == NULL || checkType(c,set,REDIS_SET)) return; - de = dictGetRandomKey(set->ptr); - if (de == NULL) { + ele = setTypeRandomElement(set); + if (ele == NULL) { addReply(c,shared.nullbulk); } else { - robj *ele = dictGetEntryKey(de); - addReplyBulk(c,ele); + decrRefCount(ele); } } static int qsortCompareSetsByCardinality(const void *s1, const void *s2) { - dict **d1 = (void*) s1, **d2 = (void*) s2; - - return dictSize(*d1)-dictSize(*d2); + return setTypeSize(*(robj**)s1)-setTypeSize(*(robj**)s2); } -static void sinterGenericCommand(redisClient *c, robj **setskeys, unsigned long setsnum, robj *dstkey) { - dict **dv = zmalloc(sizeof(dict*)*setsnum); - dictIterator *di; - dictEntry *de; - robj *lenobj = NULL, *dstset = NULL; +static void sinterGenericCommand(redisClient *c, robj **setkeys, unsigned long setnum, robj *dstkey) { + robj **sets = zmalloc(sizeof(robj*)*setnum); + setIterator *si; + robj *ele, *lenobj = NULL, *dstset = NULL; unsigned long j, cardinality = 0; - for (j = 0; j < setsnum; j++) { - robj *setobj; - - setobj = dstkey ? - lookupKeyWrite(c->db,setskeys[j]) : - lookupKeyRead(c->db,setskeys[j]); + for (j = 0; j < setnum; j++) { + robj *setobj = dstkey ? + lookupKeyWrite(c->db,setkeys[j]) : + lookupKeyRead(c->db,setkeys[j]); if (!setobj) { - zfree(dv); + zfree(sets); if (dstkey) { if (dbDelete(c->db,dstkey)) server.dirty++; @@ -5622,16 +5702,15 @@ static void sinterGenericCommand(redisClient *c, robj **setskeys, unsigned long } return; } - if (setobj->type != REDIS_SET) { - zfree(dv); - addReply(c,shared.wrongtypeerr); + if (checkType(c,setobj,REDIS_SET)) { + zfree(sets); return; } - dv[j] = setobj->ptr; + sets[j] = setobj; } /* Sort sets from the smallest to largest, this will improve our * algorithm's performace */ - qsort(dv,setsnum,sizeof(dict*),qsortCompareSetsByCardinality); + qsort(sets,setnum,sizeof(robj*),qsortCompareSetsByCardinality); /* The first thing we should output is the total number of elements... * since this is a multi-bulk write, but at this stage we don't know @@ -5651,33 +5730,31 @@ static void sinterGenericCommand(redisClient *c, robj **setskeys, unsigned long /* Iterate all the elements of the first (smallest) set, and test * the element against all the other sets, if at least one set does * not include the element it is discarded */ - di = dictGetIterator(dv[0]); + si = setTypeInitIterator(sets[0]); + while((ele = setTypeNext(si)) != NULL) { + for (j = 1; j < setnum; j++) + if (!setTypeIsMember(sets[j],ele)) break; - while((de = dictNext(di)) != NULL) { - robj *ele; - - for (j = 1; j < setsnum; j++) - if (dictFind(dv[j],dictGetEntryKey(de)) == NULL) break; - if (j != setsnum) - continue; /* at least one set does not contain the member */ - ele = dictGetEntryKey(de); - if (!dstkey) { - addReplyBulk(c,ele); - cardinality++; - } else { - dictAdd(dstset->ptr,ele,NULL); - incrRefCount(ele); + /* Only take action when all sets contain the member */ + if (j == setnum) { + if (!dstkey) { + addReplyBulk(c,ele); + cardinality++; + } else { + setTypeAdd(dstset,ele); + } } + decrRefCount(ele); } - dictReleaseIterator(di); + setTypeReleaseIterator(si); if (dstkey) { /* Store the resulting set into the target, if the intersection * is not an empty set. */ dbDelete(c->db,dstkey); - if (dictSize((dict*)dstset->ptr) > 0) { + if (setTypeSize(dstset) > 0) { dbAdd(c->db,dstkey,dstset); - addReplyLongLong(c,dictSize((dict*)dstset->ptr)); + addReplyLongLong(c,setTypeSize(dstset)); } else { decrRefCount(dstset); addReply(c,shared.czero); @@ -5686,7 +5763,7 @@ static void sinterGenericCommand(redisClient *c, robj **setskeys, unsigned long } else { lenobj->ptr = sdscatprintf(sdsempty(),"*%lu\r\n",cardinality); } - zfree(dv); + zfree(sets); } static void sinterCommand(redisClient *c) { @@ -5701,29 +5778,25 @@ static void sinterstoreCommand(redisClient *c) { #define REDIS_OP_DIFF 1 #define REDIS_OP_INTER 2 -static void sunionDiffGenericCommand(redisClient *c, robj **setskeys, int setsnum, robj *dstkey, int op) { - dict **dv = zmalloc(sizeof(dict*)*setsnum); - dictIterator *di; - dictEntry *de; - robj *dstset = NULL; +static void sunionDiffGenericCommand(redisClient *c, robj **setkeys, int setnum, robj *dstkey, int op) { + robj **sets = zmalloc(sizeof(robj*)*setnum); + setIterator *si; + robj *ele, *dstset = NULL; int j, cardinality = 0; - for (j = 0; j < setsnum; j++) { - robj *setobj; - - setobj = dstkey ? - lookupKeyWrite(c->db,setskeys[j]) : - lookupKeyRead(c->db,setskeys[j]); + for (j = 0; j < setnum; j++) { + robj *setobj = dstkey ? + lookupKeyWrite(c->db,setkeys[j]) : + lookupKeyRead(c->db,setkeys[j]); if (!setobj) { - dv[j] = NULL; + sets[j] = NULL; continue; } - if (setobj->type != REDIS_SET) { - zfree(dv); - addReply(c,shared.wrongtypeerr); + if (checkType(c,setobj,REDIS_SET)) { + zfree(sets); return; } - dv[j] = setobj->ptr; + sets[j] = setobj; } /* We need a temp set object to store our union. If the dstkey @@ -5733,60 +5806,53 @@ static void sunionDiffGenericCommand(redisClient *c, robj **setskeys, int setsnu /* Iterate all the elements of all the sets, add every element a single * time to the result set */ - for (j = 0; j < setsnum; j++) { - if (op == REDIS_OP_DIFF && j == 0 && !dv[j]) break; /* result set is empty */ - if (!dv[j]) continue; /* non existing keys are like empty sets */ + for (j = 0; j < setnum; j++) { + if (op == REDIS_OP_DIFF && j == 0 && !sets[j]) break; /* result set is empty */ + if (!sets[j]) continue; /* non existing keys are like empty sets */ - di = dictGetIterator(dv[j]); - - while((de = dictNext(di)) != NULL) { - robj *ele; - - /* dictAdd will not add the same element multiple times */ - ele = dictGetEntryKey(de); + si = setTypeInitIterator(sets[j]); + while((ele = setTypeNext(si)) != NULL) { if (op == REDIS_OP_UNION || j == 0) { - if (dictAdd(dstset->ptr,ele,NULL) == DICT_OK) { - incrRefCount(ele); + if (setTypeAdd(dstset,ele)) { cardinality++; } } else if (op == REDIS_OP_DIFF) { - if (dictDelete(dstset->ptr,ele) == DICT_OK) { + if (setTypeRemove(dstset,ele)) { cardinality--; } } + decrRefCount(ele); } - dictReleaseIterator(di); + setTypeReleaseIterator(si); - /* result set is empty? Exit asap. */ + /* Exit when result set is empty. */ if (op == REDIS_OP_DIFF && cardinality == 0) break; } /* Output the content of the resulting set, if not in STORE mode */ if (!dstkey) { addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",cardinality)); - di = dictGetIterator(dstset->ptr); - while((de = dictNext(di)) != NULL) { - robj *ele; - - ele = dictGetEntryKey(de); + si = setTypeInitIterator(dstset); + while((ele = setTypeNext(si)) != NULL) { addReplyBulk(c,ele); + decrRefCount(ele); } - dictReleaseIterator(di); + setTypeReleaseIterator(si); decrRefCount(dstset); } else { /* If we have a target key where to store the resulting set * create this key with the result set inside */ dbDelete(c->db,dstkey); - if (dictSize((dict*)dstset->ptr) > 0) { + if (setTypeSize(dstset) > 0) { dbAdd(c->db,dstkey,dstset); - addReplyLongLong(c,dictSize((dict*)dstset->ptr)); + addReplyLongLong(c,setTypeSize(dstset)); } else { decrRefCount(dstset); addReply(c,shared.czero); } server.dirty++; } - zfree(dv); + zfree(sets); } static void sunionCommand(redisClient *c) { From e24d93762f8945de0ee03f23b15d686cfa08bb58 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Fri, 11 Jun 2010 19:22:27 +0200 Subject: [PATCH 004/139] intset housekeeping --- intset.c | 5 ++--- intset.h | 3 ++- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/intset.c b/intset.c index f1bbcde3..1ee8263c 100644 --- a/intset.c +++ b/intset.c @@ -1,7 +1,6 @@ #include #include #include -#include #include "intset.h" #include "zmalloc.h" @@ -152,7 +151,7 @@ intset *intsetAdd(intset *is, int64_t value, uint8_t *success) { } /* Delete integer from intset */ -intset *intsetDelete(intset *is, int64_t value, uint8_t *success) { +intset *intsetRemove(intset *is, int64_t value, uint8_t *success) { uint8_t valenc = INTSET_VALUE_ENCODING(value); uint32_t pos; if (success) *success = 0; @@ -370,7 +369,7 @@ int main(int argc, char **argv) { assert(intsetFind(is,v1)); v2 = rand() % 0xfff; - is = intsetDelete(is,v2,NULL); + is = intsetRemove(is,v2,NULL); assert(!intsetFind(is,v2)); } checkConsistency(is); diff --git a/intset.h b/intset.h index 9d91710a..b90c44e0 100644 --- a/intset.h +++ b/intset.h @@ -1,5 +1,6 @@ #ifndef __INTSET_H #define __INTSET_H +#include typedef struct intset { uint32_t encoding; @@ -9,7 +10,7 @@ typedef struct intset { intset *intsetNew(void); intset *intsetAdd(intset *is, int64_t value, uint8_t *success); -intset *intsetDelete(intset *is, int64_t value, uint8_t *success); +intset *intsetRemove(intset *is, int64_t value, uint8_t *success); uint8_t intsetFind(intset *is, int64_t value); int64_t intsetRandom(intset *is); From d0b58d530027185a7fccd08bcda31efe06ef366b Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Sat, 12 Jun 2010 22:25:22 +0200 Subject: [PATCH 005/139] intset encoding for sets, refactored set tests to test both encodings --- Makefile | 3 +- intset.c | 19 ++- intset.h | 2 + redis.c | 135 +++++++++++++++++--- tests/unit/type/set.tcl | 264 +++++++++++++++++++++++++--------------- 5 files changed, 306 insertions(+), 117 deletions(-) diff --git a/Makefile b/Makefile index 46df88bb..72524f7c 100644 --- a/Makefile +++ b/Makefile @@ -15,7 +15,7 @@ endif CCOPT= $(CFLAGS) $(CCLINK) $(ARCH) $(PROF) DEBUG?= -g -rdynamic -ggdb -OBJ = adlist.o ae.o anet.o dict.o redis.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o +OBJ = adlist.o ae.o anet.o dict.o redis.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o intset.o BENCHOBJ = ae.o anet.o redis-benchmark.o sds.o adlist.o zmalloc.o CLIOBJ = anet.o sds.o adlist.o redis-cli.o zmalloc.o linenoise.o CHECKDUMPOBJ = redis-check-dump.o lzf_c.o lzf_d.o @@ -54,6 +54,7 @@ redis.o: redis.c fmacros.h config.h redis.h ae.h sds.h anet.h dict.h \ sds.o: sds.c sds.h zmalloc.h zipmap.o: zipmap.c zmalloc.h ziplist.o: ziplist.c zmalloc.h +intset.o: intset.c zmalloc.h zmalloc.o: zmalloc.c config.h redis-server: $(OBJ) diff --git a/intset.c b/intset.c index 1ee8263c..2532582e 100644 --- a/intset.c +++ b/intset.c @@ -60,8 +60,8 @@ static intset *intsetUpgrade(intset *is, uint8_t newenc, uint8_t extra, uint8_t * the value is not present in the intset and sets "pos" to the position * where "value" can be inserted. */ static uint8_t intsetSearch(intset *is, int64_t value, uint32_t *pos) { - int min = 0, max = is->length-1, mid; - int64_t cur; + int min = 0, max = is->length-1, mid = -1; + int64_t cur = -1; /* The value can never be found when the set is empty */ if (is->length == 0) { @@ -179,6 +179,21 @@ int64_t intsetRandom(intset *is) { return INTSET_GET(is,rand()%is->length); } +/* Sets the value to the value at the given position. When this position is + * out of range the function returns 0, when in range it returns 1. */ +uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value) { + if (pos < is->length) { + *value = INTSET_GET(is,pos); + return 1; + } + return 0; +} + +/* Return intset length */ +uint32_t intsetLen(intset *is) { + return is->length; +} + #ifdef INTSET_TEST_MAIN #include diff --git a/intset.h b/intset.h index b90c44e0..25afc18d 100644 --- a/intset.h +++ b/intset.h @@ -13,5 +13,7 @@ intset *intsetAdd(intset *is, int64_t value, uint8_t *success); intset *intsetRemove(intset *is, int64_t value, uint8_t *success); uint8_t intsetFind(intset *is, int64_t value); int64_t intsetRandom(intset *is); +uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value); +uint32_t intsetLen(intset *is); #endif // __INTSET_H diff --git a/redis.c b/redis.c index cb7bca8e..c1df3293 100644 --- a/redis.c +++ b/redis.c @@ -76,6 +76,7 @@ #include "pqsort.h" /* Partial qsort for SORT+LIMIT */ #include "zipmap.h" /* Compact dictionary-alike data structure */ #include "ziplist.h" /* Compact list data structure */ +#include "intset.h" /* Compact integer set structure */ #include "sha1.h" /* SHA1 is used for DEBUG DIGEST */ #include "release.h" /* Release and/or git repository information */ @@ -132,9 +133,10 @@ #define REDIS_ENCODING_ZIPMAP 3 /* Encoded as zipmap */ #define REDIS_ENCODING_LIST 4 /* Encoded as zipmap */ #define REDIS_ENCODING_ZIPLIST 5 /* Encoded as ziplist */ +#define REDIS_ENCODING_INTSET 6 /* Encoded as intset */ static char* strencoding[] = { - "raw", "int", "hashtable", "zipmap", "list", "ziplist" + "raw", "int", "hashtable", "zipmap", "list", "ziplist", "intset" }; /* Object types only used for dumping to disk */ @@ -651,6 +653,7 @@ static void call(redisClient *c, struct redisCommand *cmd); static void resetClient(redisClient *c); static void convertToRealHash(robj *o); static void listTypeConvert(robj *o, int enc); +static void setTypeConvert(robj *o, int enc); static int pubsubUnsubscribeAllChannels(redisClient *c, int notify); static int pubsubUnsubscribeAllPatterns(redisClient *c, int notify); static void freePubsubPattern(void *p); @@ -3069,6 +3072,13 @@ static robj *createSetObject(void) { return o; } +static robj *createIntsetObject(void) { + intset *is = intsetNew(); + robj *o = createObject(REDIS_SET,is); + o->encoding = REDIS_ENCODING_INTSET; + return o; +} + static robj *createHashObject(void) { /* All the Hashes start as zipmaps. Will be automatically converted * into hash tables if there are enough elements or big elements @@ -3107,7 +3117,16 @@ static void freeListObject(robj *o) { } static void freeSetObject(robj *o) { - dictRelease((dict*) o->ptr); + switch (o->encoding) { + case REDIS_ENCODING_HT: + dictRelease((dict*) o->ptr); + break; + case REDIS_ENCODING_INTSET: + zfree(o->ptr); + break; + default: + redisPanic("Unknown set encoding type"); + } } static void freeZsetObject(robj *o) { @@ -3371,7 +3390,7 @@ static int getLongLongFromObject(robj *o, long long *target) { } } - *target = value; + if (target) *target = value; return REDIS_OK; } @@ -3789,17 +3808,29 @@ static int rdbSaveObject(FILE *fp, robj *o) { } } else if (o->type == REDIS_SET) { /* Save a set value */ - dict *set = o->ptr; - dictIterator *di = dictGetIterator(set); - dictEntry *de; + if (o->encoding == REDIS_ENCODING_HT) { + dict *set = o->ptr; + dictIterator *di = dictGetIterator(set); + dictEntry *de; - if (rdbSaveLen(fp,dictSize(set)) == -1) return -1; - while((de = dictNext(di)) != NULL) { - robj *eleobj = dictGetEntryKey(de); + if (rdbSaveLen(fp,dictSize(set)) == -1) return -1; + while((de = dictNext(di)) != NULL) { + robj *eleobj = dictGetEntryKey(de); + if (rdbSaveStringObject(fp,eleobj) == -1) return -1; + } + dictReleaseIterator(di); + } else if (o->encoding == REDIS_ENCODING_INTSET) { + intset *is = o->ptr; + long long llval; + int i = 0; - if (rdbSaveStringObject(fp,eleobj) == -1) return -1; + if (rdbSaveLen(fp,intsetLen(is)) == -1) return -1; + while(intsetGet(is,i++,&llval)) { + if (rdbSaveLongLongAsStringObject(fp,llval) == -1) return -1; + } + } else { + redisPanic("Unknown set encoding"); } - dictReleaseIterator(di); } else if (o->type == REDIS_ZSET) { /* Save a set value */ zset *zs = o->ptr; @@ -5459,12 +5490,37 @@ static void rpoplpushcommand(redisClient *c) { /* ==================================== Sets ================================ */ +/* Factory method to return a set that *can* hold "value". When the object has + * an integer-encodable value, an intset will be returned. Otherwise a regular + * hash table. */ +static robj *setTypeCreate(robj *value) { + if (getLongLongFromObject(value,NULL) == REDIS_OK) + return createIntsetObject(); + return createSetObject(); +} + static int setTypeAdd(robj *subject, robj *value) { + long long llval; if (subject->encoding == REDIS_ENCODING_HT) { if (dictAdd(subject->ptr,value,NULL) == DICT_OK) { incrRefCount(value); return 1; } + } else if (subject->encoding == REDIS_ENCODING_INTSET) { + if (getLongLongFromObject(value,&llval) == REDIS_OK) { + uint8_t success; + subject->ptr = intsetAdd(subject->ptr,llval,&success); + if (success) return 1; + } else { + /* Failed to get integer from object, convert to regular set. */ + setTypeConvert(subject,REDIS_ENCODING_HT); + + /* The set *was* an intset and this value is not integer + * encodable, so dictAdd should always work. */ + redisAssert(dictAdd(subject->ptr,value,NULL) == DICT_OK); + incrRefCount(value); + return 1; + } } else { redisPanic("Unknown set encoding"); } @@ -5472,11 +5528,18 @@ static int setTypeAdd(robj *subject, robj *value) { } static int setTypeRemove(robj *subject, robj *value) { + long long llval; if (subject->encoding == REDIS_ENCODING_HT) { if (dictDelete(subject->ptr,value) == DICT_OK) { if (htNeedsResize(subject->ptr)) dictResize(subject->ptr); return 1; } + } else if (subject->encoding == REDIS_ENCODING_INTSET) { + if (getLongLongFromObject(value,&llval) == REDIS_OK) { + uint8_t success; + subject->ptr = intsetRemove(subject->ptr,llval,&success); + if (success) return 1; + } } else { redisPanic("Unknown set encoding"); } @@ -5484,24 +5547,35 @@ static int setTypeRemove(robj *subject, robj *value) { } static int setTypeIsMember(robj *subject, robj *value) { + long long llval; if (subject->encoding == REDIS_ENCODING_HT) { return dictFind((dict*)subject->ptr,value) != NULL; + } else if (subject->encoding == REDIS_ENCODING_INTSET) { + if (getLongLongFromObject(value,&llval) == REDIS_OK) { + return intsetFind((intset*)subject->ptr,llval); + } } else { redisPanic("Unknown set encoding"); } + return 0; } /* Structure to hold set iteration abstraction. */ typedef struct { + robj *subject; int encoding; + int ii; /* intset iterator */ dictIterator *di; } setIterator; static setIterator *setTypeInitIterator(robj *subject) { setIterator *si = zmalloc(sizeof(setIterator)); + si->subject = subject; si->encoding = subject->encoding; if (si->encoding == REDIS_ENCODING_HT) { si->di = dictGetIterator(subject->ptr); + } else if (si->encoding == REDIS_ENCODING_INTSET) { + si->ii = 0; } else { redisPanic("Unknown set encoding"); } @@ -5525,6 +5599,10 @@ static robj *setTypeNext(setIterator *si) { ret = dictGetEntryKey(de); incrRefCount(ret); } + } else if (si->encoding == REDIS_ENCODING_INTSET) { + long long llval; + if (intsetGet(si->subject->ptr,si->ii++,&llval)) + ret = createStringObjectFromLongLong(llval); } return ret; } @@ -5538,6 +5616,9 @@ robj *setTypeRandomElement(robj *subject) { dictEntry *de = dictGetRandomKey(subject->ptr); ret = dictGetEntryKey(de); incrRefCount(ret); + } else if (subject->encoding == REDIS_ENCODING_INTSET) { + long long llval = intsetRandom(subject->ptr); + ret = createStringObjectFromLongLong(llval); } else { redisPanic("Unknown set encoding"); } @@ -5547,17 +5628,41 @@ robj *setTypeRandomElement(robj *subject) { static unsigned long setTypeSize(robj *subject) { if (subject->encoding == REDIS_ENCODING_HT) { return dictSize((dict*)subject->ptr); + } else if (subject->encoding == REDIS_ENCODING_INTSET) { + return intsetLen((intset*)subject->ptr); } else { redisPanic("Unknown set encoding"); } } +static void setTypeConvert(robj *subject, int enc) { + setIterator *si; + robj *element; + redisAssert(subject->type == REDIS_SET); + + if (enc == REDIS_ENCODING_HT) { + dict *d = dictCreate(&setDictType,NULL); + + /* setTypeGet returns a robj with incremented refcount */ + si = setTypeInitIterator(subject); + while ((element = setTypeNext(si)) != NULL) + redisAssert(dictAdd(d,element,NULL) == DICT_OK); + setTypeReleaseIterator(si); + + subject->encoding = REDIS_ENCODING_HT; + zfree(subject->ptr); + subject->ptr = d; + } else { + redisPanic("Unsupported set conversion"); + } +} + static void saddCommand(redisClient *c) { robj *set; set = lookupKeyWrite(c->db,c->argv[1]); if (set == NULL) { - set = createSetObject(); + set = setTypeCreate(c->argv[2]); dbAdd(c->db,c->argv[1],set); } else { if (set->type != REDIS_SET) { @@ -5616,7 +5721,7 @@ static void smoveCommand(redisClient *c) { server.dirty++; /* Add the element to the destination set */ if (!dstset) { - dstset = createSetObject(); + dstset = setTypeCreate(c->argv[3]); dbAdd(c->db,c->argv[2],dstset); } setTypeAdd(dstset,c->argv[3]); @@ -5724,7 +5829,7 @@ static void sinterGenericCommand(redisClient *c, robj **setkeys, unsigned long s } else { /* If we have a target key where to store the resulting set * create this key with an empty set inside */ - dstset = createSetObject(); + dstset = createIntsetObject(); } /* Iterate all the elements of the first (smallest) set, and test @@ -5802,7 +5907,7 @@ static void sunionDiffGenericCommand(redisClient *c, robj **setkeys, int setnum, /* We need a temp set object to store our union. If the dstkey * is not NULL (that is, we are inside an SUNIONSTORE operation) then * this set object will be the resulting object to set into the target key*/ - dstset = createSetObject(); + dstset = createIntsetObject(); /* Iterate all the elements of all the sets, add every element a single * time to the result set */ diff --git a/tests/unit/type/set.tcl b/tests/unit/type/set.tcl index 58ea2b5b..c4fd4d76 100644 --- a/tests/unit/type/set.tcl +++ b/tests/unit/type/set.tcl @@ -1,119 +1,185 @@ start_server {tags {"set"}} { - test {SADD, SCARD, SISMEMBER, SMEMBERS basics} { - r sadd myset foo - r sadd myset bar - list [r scard myset] [r sismember myset foo] \ - [r sismember myset bar] [r sismember myset bla] \ - [lsort [r smembers myset]] - } {2 1 1 0 {bar foo}} + proc create_set {key entries} { + r del $key + foreach entry $entries { r sadd $key $entry } + } - test {SADD adding the same element multiple times} { - r sadd myset foo - r sadd myset foo - r sadd myset foo - r scard myset - } {2} + test {SADD, SCARD, SISMEMBER, SMEMBERS basics - regular set} { + create_set myset {foo} + assert_encoding hashtable myset + assert_equal 1 [r sadd myset bar] + assert_equal 0 [r sadd myset bar] + assert_equal 2 [r scard myset] + assert_equal 1 [r sismember myset foo] + assert_equal 1 [r sismember myset bar] + assert_equal 0 [r sismember myset bla] + assert_equal {bar foo} [lsort [r smembers myset]] + } + + test {SADD, SCARD, SISMEMBER, SMEMBERS basics - intset} { + create_set myset {17} + assert_encoding intset myset + assert_equal 1 [r sadd myset 16] + assert_equal 0 [r sadd myset 16] + assert_equal 2 [r scard myset] + assert_equal 1 [r sismember myset 16] + assert_equal 1 [r sismember myset 17] + assert_equal 0 [r sismember myset 18] + assert_equal {16 17} [lsort [r smembers myset]] + } test {SADD against non set} { r lpush mylist foo - catch {r sadd mylist bar} err - format $err - } {ERR*kind*} + assert_error ERR*kind* {r sadd mylist bar} + } - test {SREM basics} { - r sadd myset ciao - r srem myset foo - lsort [r smembers myset] - } {bar ciao} + test {SREM basics - regular set} { + create_set myset {foo bar ciao} + assert_encoding hashtable myset + assert_equal 0 [r srem myset qux] + assert_equal 1 [r srem myset foo] + assert_equal {bar ciao} [lsort [r smembers myset]] + } - test {Mass SADD and SINTER with two sets} { + test {SREM basics - intset} { + create_set myset {3 4 5} + assert_encoding intset myset + assert_equal 0 [r srem myset 6] + assert_equal 1 [r srem myset 4] + assert_equal {3 5} [lsort [r smembers myset]] + } + + foreach {type} {hashtable intset} { + for {set i 1} {$i <= 5} {incr i} { + r del [format "set%d" $i] + } for {set i 0} {$i < 1000} {incr i} { r sadd set1 $i r sadd set2 [expr $i+995] } - lsort [r sinter set1 set2] - } {995 996 997 998 999} - - test {SUNION with two sets} { - lsort [r sunion set1 set2] - } [lsort -uniq "[r smembers set1] [r smembers set2]"] - - test {SINTERSTORE with two sets} { - r sinterstore setres set1 set2 - lsort [r smembers setres] - } {995 996 997 998 999} - - test {SINTERSTORE with two sets, after a DEBUG RELOAD} { - r debug reload - r sinterstore setres set1 set2 - lsort [r smembers setres] - } {995 996 997 998 999} - - test {SUNIONSTORE with two sets} { - r sunionstore setres set1 set2 - lsort [r smembers setres] - } [lsort -uniq "[r smembers set1] [r smembers set2]"] - - test {SUNIONSTORE against non existing keys} { - r set setres xxx - list [r sunionstore setres foo111 bar222] [r exists xxx] - } {0 0} - - test {SINTER against three sets} { - r sadd set3 999 - r sadd set3 995 - r sadd set3 1000 - r sadd set3 2000 - lsort [r sinter set1 set2 set3] - } {995 999} - - test {SINTERSTORE with three sets} { - r sinterstore setres set1 set2 set3 - lsort [r smembers setres] - } {995 999} - - test {SUNION with non existing keys} { - lsort [r sunion nokey1 set1 set2 nokey2] - } [lsort -uniq "[r smembers set1] [r smembers set2]"] - - test {SDIFF with two sets} { + foreach i {999 995 1000 2000} { + r sadd set3 $i + } for {set i 5} {$i < 1000} {incr i} { r sadd set4 $i } - lsort [r sdiff set1 set4] - } {0 1 2 3 4} - - test {SDIFF with three sets} { r sadd set5 0 - lsort [r sdiff set1 set4 set5] - } {1 2 3 4} - test {SDIFFSTORE with three sets} { - r sdiffstore sres set1 set4 set5 - lsort [r smembers sres] - } {1 2 3 4} - - test {SPOP basics} { - r del myset - r sadd myset 1 - r sadd myset 2 - r sadd myset 3 - list [lsort [list [r spop myset] [r spop myset] [r spop myset]]] [r scard myset] - } {{1 2 3} 0} - - test {SRANDMEMBER} { - r del myset - r sadd myset a - r sadd myset b - r sadd myset c - unset -nocomplain myset - array set myset {} - for {set i 0} {$i < 100} {incr i} { - set myset([r srandmember myset]) 1 + # it is possible that a hashtable encoded only contains integers, + # because it is converted from an intset to a hashtable when a + # non-integer element is added and then removed. + if {$type eq "hashtable"} { + for {set i 1} {$i <= 5} {incr i} { + r sadd [format "set%d" $i] foo + r srem [format "set%d" $i] foo + } } - lsort [array names myset] - } {a b c} - + + test "Generated sets must be encoded as $type" { + for {set i 1} {$i <= 5} {incr i} { + assert_encoding $type [format "set%d" $i] + } + } + + test "SINTER with two sets - $type" { + assert_equal {995 996 997 998 999} [lsort [r sinter set1 set2]] + } + + test "SINTERSTORE with two sets - $type" { + r sinterstore setres set1 set2 + assert_encoding intset setres + assert_equal {995 996 997 998 999} [lsort [r smembers setres]] + } + + test "SINTERSTORE with two sets, after a DEBUG RELOAD - $type" { + r debug reload + r sinterstore setres set1 set2 + assert_encoding intset setres + assert_equal {995 996 997 998 999} [lsort [r smembers setres]] + } + + test "SUNION with two sets - $type" { + set expected [lsort -uniq "[r smembers set1] [r smembers set2]"] + assert_equal $expected [lsort [r sunion set1 set2]] + } + + test "SUNIONSTORE with two sets - $type" { + r sunionstore setres set1 set2 + assert_encoding intset setres + set expected [lsort -uniq "[r smembers set1] [r smembers set2]"] + assert_equal $expected [lsort [r smembers setres]] + } + + test "SINTER against three sets - $type" { + assert_equal {995 999} [lsort [r sinter set1 set2 set3]] + } + + test "SINTERSTORE with three sets - $type" { + r sinterstore setres set1 set2 set3 + assert_equal {995 999} [r smembers setres] + } + + test "SUNION with non existing keys - $type" { + set expected [lsort -uniq "[r smembers set1] [r smembers set2]"] + assert_equal $expected [lsort [r sunion nokey1 set1 set2 nokey2]] + } + + test "SDIFF with two sets - $type" { + assert_equal {0 1 2 3 4} [lsort [r sdiff set1 set4]] + } + + test "SDIFF with three sets - $type" { + assert_equal {1 2 3 4} [lsort [r sdiff set1 set4 set5]] + } + + test "SDIFFSTORE with three sets - $type" { + r sdiffstore setres set1 set4 set5 + assert_encoding intset setres + assert_equal {1 2 3 4} [lsort [r smembers setres]] + } + } + + test "SINTER against non-set should throw error" { + r set key1 x + assert_error "ERR*wrong kind*" {r sinter key1 noset} + } + + test "SUNION against non-set should throw error" { + r set key1 x + assert_error "ERR*wrong kind*" {r sunion key1 noset} + } + + test "SINTERSTORE against non existing keys should delete dstkey" { + r set setres xxx + assert_equal 0 [r sinterstore setres foo111 bar222] + assert_equal 0 [r exists setres] + } + + test "SUNIONSTORE against non existing keys should delete dstkey" { + r set setres xxx + assert_equal 0 [r sunionstore setres foo111 bar222] + assert_equal 0 [r exists setres] + } + + foreach {type contents} {hashtable {a b c} intset {1 2 3}} { + test "SPOP basics - $type" { + create_set myset $contents + assert_encoding $type myset + assert_equal $contents [lsort [list [r spop myset] [r spop myset] [r spop myset]]] + assert_equal 0 [r scard myset] + } + + test "SRANDMEMBER - $type" { + create_set myset $contents + unset -nocomplain myset + array set myset {} + for {set i 0} {$i < 100} {incr i} { + set myset([r srandmember myset]) 1 + } + assert_equal $contents [lsort [array names myset]] + } + } + test {SMOVE basics} { r sadd myset1 a r sadd myset1 b From b978abbf022b1810031b5aaa171ef0899f7fe77d Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Sun, 13 Jun 2010 11:16:18 +0200 Subject: [PATCH 006/139] small refactor of SMOVE and tests for SMOVE on sets with different encoding --- redis.c | 51 +++++++++++++---------- tests/unit/type/set.tcl | 89 ++++++++++++++++++++++++++++------------- 2 files changed, 92 insertions(+), 48 deletions(-) diff --git a/redis.c b/redis.c index c1df3293..51ca6d2a 100644 --- a/redis.c +++ b/redis.c @@ -5694,37 +5694,46 @@ static void sremCommand(redisClient *c) { } static void smoveCommand(redisClient *c) { - robj *srcset, *dstset; - + robj *srcset, *dstset, *ele; srcset = lookupKeyWrite(c->db,c->argv[1]); dstset = lookupKeyWrite(c->db,c->argv[2]); + ele = c->argv[3]; - /* If the source key does not exist return 0, if it's of the wrong type - * raise an error */ - if (srcset == NULL || srcset->type != REDIS_SET) { - addReply(c, srcset ? shared.wrongtypeerr : shared.czero); - return; - } - /* Error if the destination key is not a set as well */ - if (dstset && dstset->type != REDIS_SET) { - addReply(c,shared.wrongtypeerr); - return; - } - /* Remove the element from the source set */ - if (!setTypeRemove(srcset,c->argv[3])) { - /* Key not found in the src set! return zero */ + /* If the source key does not exist return 0 */ + if (srcset == NULL) { addReply(c,shared.czero); return; } - if (setTypeSize(srcset) == 0 && srcset != dstset) - dbDelete(c->db,c->argv[1]); + + /* If the source key has the wrong type, or the destination key + * is set and has the wrong type, return with an error. */ + if (checkType(c,srcset,REDIS_SET) || + (dstset && checkType(c,dstset,REDIS_SET))) return; + + /* If srcset and dstset are equal, SMOVE is a no-op */ + if (srcset == dstset) { + addReply(c,shared.cone); + return; + } + + /* If the element cannot be removed from the src set, return 0. */ + if (!setTypeRemove(srcset,ele)) { + addReply(c,shared.czero); + return; + } + + /* Remove the src set from the database when empty */ + if (setTypeSize(srcset) == 0) dbDelete(c->db,c->argv[1]); server.dirty++; - /* Add the element to the destination set */ + + /* Create the destination set when it doesn't exist */ if (!dstset) { - dstset = setTypeCreate(c->argv[3]); + dstset = setTypeCreate(ele); dbAdd(c->db,c->argv[2],dstset); } - setTypeAdd(dstset,c->argv[3]); + + /* An extra key has changed when ele was successfully added to dstset */ + if (setTypeAdd(dstset,ele)) server.dirty++; addReply(c,shared.cone); } diff --git a/tests/unit/type/set.tcl b/tests/unit/type/set.tcl index c4fd4d76..5b8d961e 100644 --- a/tests/unit/type/set.tcl +++ b/tests/unit/type/set.tcl @@ -180,38 +180,73 @@ start_server {tags {"set"}} { } } - test {SMOVE basics} { - r sadd myset1 a - r sadd myset1 b - r sadd myset1 c - r sadd myset2 x - r sadd myset2 y - r sadd myset2 z - r smove myset1 myset2 a - list [lsort [r smembers myset2]] [lsort [r smembers myset1]] - } {{a x y z} {b c}} + proc setup_move {} { + r del myset3 myset4 + create_set myset1 {1 a b} + create_set myset2 {2 3 4} + assert_encoding hashtable myset1 + assert_encoding intset myset2 + } - test {SMOVE non existing key} { - list [r smove myset1 myset2 foo] [lsort [r smembers myset2]] [lsort [r smembers myset1]] - } {0 {a x y z} {b c}} + test "SMOVE basics - from regular set to intset" { + # move a non-integer element to an intset should convert encoding + setup_move + assert_equal 1 [r smove myset1 myset2 a] + assert_equal {1 b} [lsort [r smembers myset1]] + assert_equal {2 3 4 a} [lsort [r smembers myset2]] + assert_encoding hashtable myset2 - test {SMOVE non existing src set} { - list [r smove noset myset2 foo] [lsort [r smembers myset2]] - } {0 {a x y z}} + # move an integer element should not convert the encoding + setup_move + assert_equal 1 [r smove myset1 myset2 1] + assert_equal {a b} [lsort [r smembers myset1]] + assert_equal {1 2 3 4} [lsort [r smembers myset2]] + assert_encoding intset myset2 + } - test {SMOVE non existing dst set} { - list [r smove myset2 myset3 y] [lsort [r smembers myset2]] [lsort [r smembers myset3]] - } {1 {a x z} y} + test "SMOVE basics - from intset to regular set" { + setup_move + assert_equal 1 [r smove myset2 myset1 2] + assert_equal {1 2 a b} [lsort [r smembers myset1]] + assert_equal {3 4} [lsort [r smembers myset2]] + } - test {SMOVE wrong src key type} { + test "SMOVE non existing key" { + setup_move + assert_equal 0 [r smove myset1 myset2 foo] + assert_equal {1 a b} [lsort [r smembers myset1]] + assert_equal {2 3 4} [lsort [r smembers myset2]] + } + + test "SMOVE non existing src set" { + setup_move + assert_equal 0 [r smove noset myset2 foo] + assert_equal {2 3 4} [lsort [r smembers myset2]] + } + + test "SMOVE from regular set to non existing destination set" { + setup_move + assert_equal 1 [r smove myset1 myset3 a] + assert_equal {1 b} [lsort [r smembers myset1]] + assert_equal {a} [lsort [r smembers myset3]] + assert_encoding hashtable myset3 + } + + test "SMOVE from intset to non existing destination set" { + setup_move + assert_equal 1 [r smove myset2 myset3 2] + assert_equal {3 4} [lsort [r smembers myset2]] + assert_equal {2} [lsort [r smembers myset3]] + assert_encoding intset myset3 + } + + test "SMOVE wrong src key type" { r set x 10 - catch {r smove x myset2 foo} err - format $err - } {ERR*} + assert_error "ERR*wrong kind*" {r smove x myset2 foo} + } - test {SMOVE wrong dst key type} { + test "SMOVE wrong dst key type" { r set x 10 - catch {r smove myset2 x foo} err - format $err - } {ERR*} + assert_error "ERR*wrong kind*" {r smove myset2 x foo} + } } From 70ff3511bc427fe05df22e1aba4071910f33e289 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Sun, 13 Jun 2010 15:21:25 +0200 Subject: [PATCH 007/139] configure maximum number of entries in an intset --- redis.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/redis.c b/redis.c index 51ca6d2a..0cf768ac 100644 --- a/redis.c +++ b/redis.c @@ -244,6 +244,7 @@ static char* strencoding[] = { #define REDIS_HASH_MAX_ZIPMAP_VALUE 512 #define REDIS_LIST_MAX_ZIPLIST_ENTRIES 1024 #define REDIS_LIST_MAX_ZIPLIST_VALUE 32 +#define REDIS_SET_MAX_INTSET_ENTRIES 4096 /* We can print the stacktrace, so our assert is defined this way: */ #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1))) @@ -434,6 +435,7 @@ struct redisServer { size_t hash_max_zipmap_value; size_t list_max_ziplist_entries; size_t list_max_ziplist_value; + size_t set_max_intset_entries; /* Virtual memory state */ FILE *vm_fp; int vm_fd; @@ -1765,6 +1767,7 @@ static void initServerConfig() { server.hash_max_zipmap_value = REDIS_HASH_MAX_ZIPMAP_VALUE; server.list_max_ziplist_entries = REDIS_LIST_MAX_ZIPLIST_ENTRIES; server.list_max_ziplist_value = REDIS_LIST_MAX_ZIPLIST_VALUE; + server.set_max_intset_entries = REDIS_SET_MAX_INTSET_ENTRIES; server.shutdown_asap = 0; resetServerSaveParams(); @@ -2047,6 +2050,8 @@ static void loadServerConfig(char *filename) { server.list_max_ziplist_entries = memtoll(argv[1], NULL); } else if (!strcasecmp(argv[0],"list-max-ziplist-value") && argc == 2){ server.list_max_ziplist_value = memtoll(argv[1], NULL); + } else if (!strcasecmp(argv[0],"set-max-intset-entries") && argc == 2){ + server.set_max_intset_entries = memtoll(argv[1], NULL); } else { err = "Bad directive or wrong number of arguments"; goto loaderr; } @@ -5508,9 +5513,15 @@ static int setTypeAdd(robj *subject, robj *value) { } } else if (subject->encoding == REDIS_ENCODING_INTSET) { if (getLongLongFromObject(value,&llval) == REDIS_OK) { - uint8_t success; + uint8_t success = 0; subject->ptr = intsetAdd(subject->ptr,llval,&success); - if (success) return 1; + if (success) { + /* Convert to regular set when the intset contains + * too many entries. */ + if (intsetLen(subject->ptr) > server.set_max_intset_entries) + setTypeConvert(subject,REDIS_ENCODING_HT); + return 1; + } } else { /* Failed to get integer from object, convert to regular set. */ setTypeConvert(subject,REDIS_ENCODING_HT); From ab37269c38a1e60ec03f3d537b1c7ed33f761225 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Sun, 13 Jun 2010 21:23:43 +0200 Subject: [PATCH 008/139] use max number of intset entries in tests and make SUNION/SINTER/SDIFF tests use less entries --- tests/unit/type/set.tcl | 40 ++++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/tests/unit/type/set.tcl b/tests/unit/type/set.tcl index 5b8d961e..f9e7f4bd 100644 --- a/tests/unit/type/set.tcl +++ b/tests/unit/type/set.tcl @@ -1,4 +1,9 @@ -start_server {tags {"set"}} { +start_server { + tags {"set"} + overrides { + "set-max-intset-entries" 512 + } +} { proc create_set {key entries} { r del $key foreach entry $entries { r sadd $key $entry } @@ -33,6 +38,21 @@ start_server {tags {"set"}} { assert_error ERR*kind* {r sadd mylist bar} } + test "SADD a non-integer against an intset" { + create_set myset {1 2 3} + assert_encoding intset myset + assert_equal 1 [r sadd myset a] + assert_encoding hashtable myset + } + + test "SADD overflows the maximum allowed integers in an intset" { + r del myset + for {set i 0} {$i < 512} {incr i} { r sadd myset $i } + assert_encoding intset myset + assert_equal 1 [r sadd myset 512] + assert_encoding hashtable myset + } + test {SREM basics - regular set} { create_set myset {foo bar ciao} assert_encoding hashtable myset @@ -53,14 +73,14 @@ start_server {tags {"set"}} { for {set i 1} {$i <= 5} {incr i} { r del [format "set%d" $i] } - for {set i 0} {$i < 1000} {incr i} { + for {set i 0} {$i < 200} {incr i} { r sadd set1 $i - r sadd set2 [expr $i+995] + r sadd set2 [expr $i+195] } - foreach i {999 995 1000 2000} { + foreach i {199 195 1000 2000} { r sadd set3 $i } - for {set i 5} {$i < 1000} {incr i} { + for {set i 5} {$i < 200} {incr i} { r sadd set4 $i } r sadd set5 0 @@ -82,20 +102,20 @@ start_server {tags {"set"}} { } test "SINTER with two sets - $type" { - assert_equal {995 996 997 998 999} [lsort [r sinter set1 set2]] + assert_equal {195 196 197 198 199} [lsort [r sinter set1 set2]] } test "SINTERSTORE with two sets - $type" { r sinterstore setres set1 set2 assert_encoding intset setres - assert_equal {995 996 997 998 999} [lsort [r smembers setres]] + assert_equal {195 196 197 198 199} [lsort [r smembers setres]] } test "SINTERSTORE with two sets, after a DEBUG RELOAD - $type" { r debug reload r sinterstore setres set1 set2 assert_encoding intset setres - assert_equal {995 996 997 998 999} [lsort [r smembers setres]] + assert_equal {195 196 197 198 199} [lsort [r smembers setres]] } test "SUNION with two sets - $type" { @@ -111,12 +131,12 @@ start_server {tags {"set"}} { } test "SINTER against three sets - $type" { - assert_equal {995 999} [lsort [r sinter set1 set2 set3]] + assert_equal {195 199} [lsort [r sinter set1 set2 set3]] } test "SINTERSTORE with three sets - $type" { r sinterstore setres set1 set2 set3 - assert_equal {995 999} [r smembers setres] + assert_equal {195 199} [r smembers setres] } test "SUNION with non existing keys - $type" { From 273f6169301eba5461d90f07ec683ae06572e931 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Sun, 13 Jun 2010 21:42:04 +0200 Subject: [PATCH 009/139] make sure sets have the right encoding when loaded from rdb --- redis.c | 34 ++++++++++++++++++++++++++++------ tests/unit/type/set.tcl | 15 +++++++++++++++ 2 files changed, 43 insertions(+), 6 deletions(-) diff --git a/redis.c b/redis.c index 0cf768ac..0d6648ee 100644 --- a/redis.c +++ b/redis.c @@ -4234,16 +4234,38 @@ static robj *rdbLoadObject(int type, FILE *fp) { } else if (type == REDIS_SET) { /* Read list/set value */ if ((len = rdbLoadLen(fp,NULL)) == REDIS_RDB_LENERR) return NULL; - o = createSetObject(); - /* It's faster to expand the dict to the right size asap in order - * to avoid rehashing */ - if (len > DICT_HT_INITIAL_SIZE) - dictExpand(o->ptr,len); + + /* Use a regular set when there are too many entries. */ + if (len > server.set_max_intset_entries) { + o = createSetObject(); + /* It's faster to expand the dict to the right size asap in order + * to avoid rehashing */ + if (len > DICT_HT_INITIAL_SIZE) + dictExpand(o->ptr,len); + } else { + o = createIntsetObject(); + } + /* Load every single element of the list/set */ while(len--) { + long long llval; if ((ele = rdbLoadEncodedStringObject(fp)) == NULL) return NULL; ele = tryObjectEncoding(ele); - dictAdd((dict*)o->ptr,ele,NULL); + + if (o->encoding == REDIS_ENCODING_INTSET) { + /* Fetch integer value from element */ + if (getLongLongFromObject(ele,&llval) == REDIS_OK) { + o->ptr = intsetAdd(o->ptr,llval,NULL); + } else { + setTypeConvert(o,REDIS_ENCODING_HT); + } + } + + /* This will also be called when the set was just converted + * to regular hashtable encoded set */ + if (o->encoding == REDIS_ENCODING_HT) { + dictAdd((dict*)o->ptr,ele,NULL); + } } } else if (type == REDIS_ZSET) { /* Read list/set value */ diff --git a/tests/unit/type/set.tcl b/tests/unit/type/set.tcl index f9e7f4bd..a1b655ef 100644 --- a/tests/unit/type/set.tcl +++ b/tests/unit/type/set.tcl @@ -53,6 +53,21 @@ start_server { assert_encoding hashtable myset } + test "Set encoding after DEBUG RELOAD" { + r del myintset myhashset mylargeintset + for {set i 0} {$i < 100} {incr i} { r sadd myintset $i } + for {set i 0} {$i < 1280} {incr i} { r sadd mylargeintset $i } + for {set i 0} {$i < 256} {incr i} { r sadd myhashset [format "i%03d" $i] } + assert_encoding intset myintset + assert_encoding hashtable mylargeintset + assert_encoding hashtable myhashset + + r debug reload + assert_encoding intset myintset + assert_encoding hashtable mylargeintset + assert_encoding hashtable myhashset + } + test {SREM basics - regular set} { create_set myset {foo bar ciao} assert_encoding hashtable myset From 400aea2b1343232e45a0b7a9435a69d0fe8a0956 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Sun, 13 Jun 2010 21:52:07 +0200 Subject: [PATCH 010/139] expand the dictionary of the target set to the right size when converting from intset --- redis.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/redis.c b/redis.c index 0d6648ee..28b1466c 100644 --- a/redis.c +++ b/redis.c @@ -4193,6 +4193,7 @@ static int rdbLoadDoubleValue(FILE *fp, double *val) { static robj *rdbLoadObject(int type, FILE *fp) { robj *o, *ele, *dec; size_t len; + unsigned int i; redisLog(REDIS_DEBUG,"LOADING OBJECT %d (at %d)\n",type,ftell(fp)); if (type == REDIS_STRING) { @@ -4247,7 +4248,7 @@ static robj *rdbLoadObject(int type, FILE *fp) { } /* Load every single element of the list/set */ - while(len--) { + for (i = 0; i < len; i++) { long long llval; if ((ele = rdbLoadEncodedStringObject(fp)) == NULL) return NULL; ele = tryObjectEncoding(ele); @@ -4258,6 +4259,7 @@ static robj *rdbLoadObject(int type, FILE *fp) { o->ptr = intsetAdd(o->ptr,llval,NULL); } else { setTypeConvert(o,REDIS_ENCODING_HT); + dictExpand(o->ptr,len); } } @@ -5668,6 +5670,9 @@ static unsigned long setTypeSize(robj *subject) { } } +/* Convert the set to specified encoding. The resulting dict (when converting + * to a hashtable) is presized to hold the number of elements in the original + * set. */ static void setTypeConvert(robj *subject, int enc) { setIterator *si; robj *element; @@ -5675,6 +5680,8 @@ static void setTypeConvert(robj *subject, int enc) { if (enc == REDIS_ENCODING_HT) { dict *d = dictCreate(&setDictType,NULL); + /* Presize the dict to avoid rehashing */ + dictExpand(d,intsetLen(subject->ptr)); /* setTypeGet returns a robj with incremented refcount */ si = setTypeInitIterator(subject); From 2767f1c0c6983c75689dc67638546196c5dddb26 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Fri, 2 Jul 2010 20:42:20 +0200 Subject: [PATCH 011/139] fix aof and digest code to work with dual set encoding --- src/aof.c | 34 ++++++++++++++++++++++------------ src/debug.c | 15 ++++++--------- 2 files changed, 28 insertions(+), 21 deletions(-) diff --git a/src/aof.c b/src/aof.c index c92798c5..a2e732d2 100644 --- a/src/aof.c +++ b/src/aof.c @@ -461,20 +461,30 @@ int rewriteAppendOnlyFile(char *filename) { redisPanic("Unknown list encoding"); } } else if (o->type == REDIS_SET) { + char cmd[]="*3\r\n$4\r\nSADD\r\n"; + /* Emit the SADDs needed to rebuild the set */ - dict *set = o->ptr; - dictIterator *di = dictGetIterator(set); - dictEntry *de; - - while((de = dictNext(di)) != NULL) { - char cmd[]="*3\r\n$4\r\nSADD\r\n"; - robj *eleobj = dictGetEntryKey(de); - - if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr; - if (fwriteBulkObject(fp,&key) == 0) goto werr; - if (fwriteBulkObject(fp,eleobj) == 0) goto werr; + if (o->encoding == REDIS_ENCODING_INTSET) { + int ii = 0; + long long llval; + while(intsetGet(o->ptr,ii++,&llval)) { + if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr; + if (fwriteBulkObject(fp,&key) == 0) goto werr; + if (fwriteBulkLongLong(fp,llval) == 0) goto werr; + } + } else if (o->encoding == REDIS_ENCODING_HT) { + dictIterator *di = dictGetIterator(o->ptr); + dictEntry *de; + while((de = dictNext(di)) != NULL) { + robj *eleobj = dictGetEntryKey(de); + if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr; + if (fwriteBulkObject(fp,&key) == 0) goto werr; + if (fwriteBulkObject(fp,eleobj) == 0) goto werr; + } + dictReleaseIterator(di); + } else { + redisPanic("Unknown set encoding"); } - dictReleaseIterator(di); } else if (o->type == REDIS_ZSET) { /* Emit the ZADDs needed to rebuild the sorted set */ zset *zs = o->ptr; diff --git a/src/debug.c b/src/debug.c index ba183d72..f58fcbae 100644 --- a/src/debug.c +++ b/src/debug.c @@ -119,16 +119,13 @@ void computeDatasetDigest(unsigned char *final) { } listTypeReleaseIterator(li); } else if (o->type == REDIS_SET) { - dict *set = o->ptr; - dictIterator *di = dictGetIterator(set); - dictEntry *de; - - while((de = dictNext(di)) != NULL) { - robj *eleobj = dictGetEntryKey(de); - - xorObjectDigest(digest,eleobj); + setIterator *si = setTypeInitIterator(o); + robj *ele; + while((ele = setTypeNext(si)) != NULL) { + xorObjectDigest(digest,ele); + decrRefCount(ele); } - dictReleaseIterator(di); + setTypeReleaseIterator(si); } else if (o->type == REDIS_ZSET) { zset *zs = o->ptr; dictIterator *di = dictGetIterator(zs->dict); From 68254919284ec958225e1bc5fb2951ef096c92d1 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Sun, 1 Aug 2010 11:20:26 +0200 Subject: [PATCH 012/139] Fix assertion function on value encoding --- tests/support/test.tcl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/support/test.tcl b/tests/support/test.tcl index 298e4c77..93f64928 100644 --- a/tests/support/test.tcl +++ b/tests/support/test.tcl @@ -36,8 +36,8 @@ proc assert_encoding {enc key} { # Swapped out values don't have an encoding, so make sure that # the value is swapped in before checking the encoding. set dbg [r debug object $key] - while {[string match "* swapped:*" $dbg]} { - [r debug swapin $key] + while {[string match "* swapped at:*" $dbg]} { + r debug swapin $key set dbg [r debug object $key] } assert_match "* encoding:$enc *" $dbg From bcf2995c987acea7f5485ec0e3717a29a7e98457 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 2 Aug 2010 18:13:39 +0200 Subject: [PATCH 013/139] support for write operations against expiring keys, by master-controlled expiring in replication and AOF synthesizing DEL operations --- src/db.c | 51 +++++++++++++++++++++++-------- src/redis.c | 81 ++++++++++++++++++++++++++++---------------------- src/redis.h | 2 +- src/t_string.c | 1 - 4 files changed, 86 insertions(+), 49 deletions(-) diff --git a/src/db.c b/src/db.c index 958a9f6b..d8a5d0b2 100644 --- a/src/db.c +++ b/src/db.c @@ -45,7 +45,7 @@ robj *lookupKeyRead(redisDb *db, robj *key) { } robj *lookupKeyWrite(redisDb *db, robj *key) { - deleteIfVolatile(db,key); + expireIfNeeded(db,key); return lookupKey(db,key); } @@ -321,7 +321,6 @@ void renameGenericCommand(redisClient *c, int nx) { return; incrRefCount(o); - deleteIfVolatile(c->db,c->argv[2]); if (dbAdd(c->db,c->argv[2],o) == REDIS_ERR) { if (nx) { decrRefCount(o); @@ -375,7 +374,6 @@ void moveCommand(redisClient *c) { } /* Try to add the element to the target DB */ - deleteIfVolatile(dst,c->argv[1]); if (dbAdd(dst,c->argv[1],o) == REDIS_ERR) { addReply(c,shared.czero); return; @@ -430,8 +428,45 @@ time_t getExpire(redisDb *db, robj *key) { return (time_t) dictGetEntryVal(de); } +/* Propagate expires into slaves and the AOF file. + * When a key expires in the master, a DEL operation for this key is sent + * to all the slaves and the AOF file if enabled. + * + * This way the key expiry is centralized in one place, and since both + * AOF and the master->slave link guarantee operation ordering, everything + * will be consistent even if we allow write operations against expiring + * keys. */ +void propagateExpire(redisDb *db, robj *key) { + struct redisCommand *cmd; + robj *argv[2]; + + cmd = lookupCommand("del"); + argv[0] = createStringObject("DEL",3); + argv[1] = key; + incrRefCount(key); + + if (server.appendonly) + feedAppendOnlyFile(cmd,db->id,argv,2); + if (listLength(server.slaves)) + replicationFeedSlaves(server.slaves,db->id,argv,2); + + decrRefCount(key); +} + int expireIfNeeded(redisDb *db, robj *key) { time_t when = getExpire(db,key); + + /* If we are running in the context of a slave, return ASAP: + * the slave key expiration is controlled by the master that will + * send us synthesized DEL operations for expired keys. + * + * Still we try to return the right information to the caller, + * that is, 0 if we think the key should be still valid, 1 if + * we think the key is expired at this time. */ + if (server.masterhost != NULL) { + return time(NULL) > when; + } + if (when < 0) return 0; /* Return when this key has not expired */ @@ -440,15 +475,7 @@ int expireIfNeeded(redisDb *db, robj *key) { /* Delete the key */ server.stat_expiredkeys++; server.dirty++; - return dbDelete(db,key); -} - -int deleteIfVolatile(redisDb *db, robj *key) { - if (getExpire(db,key) < 0) return 0; - - /* Delete the key */ - server.stat_expiredkeys++; - server.dirty++; + propagateExpire(db,key); return dbDelete(db,key); } diff --git a/src/redis.c b/src/redis.c index c8b1c781..27ade8b1 100644 --- a/src/redis.c +++ b/src/redis.c @@ -435,6 +435,48 @@ void updateDictResizePolicy(void) { /* ======================= Cron: called every 100 ms ======================== */ +/* Try to expire a few timed out keys. The algorithm used is adaptive and + * will use few CPU cycles if there are few expiring keys, otherwise + * it will get more aggressive to avoid that too much memory is used by + * keys that can be removed from the keyspace. */ +void activeExpireCycle(void) { + int j; + + for (j = 0; j < server.dbnum; j++) { + int expired; + redisDb *db = server.db+j; + + /* Continue to expire if at the end of the cycle more than 25% + * of the keys were expired. */ + do { + long num = dictSize(db->expires); + time_t now = time(NULL); + + expired = 0; + if (num > REDIS_EXPIRELOOKUPS_PER_CRON) + num = REDIS_EXPIRELOOKUPS_PER_CRON; + while (num--) { + dictEntry *de; + time_t t; + + if ((de = dictGetRandomKey(db->expires)) == NULL) break; + t = (time_t) dictGetEntryVal(de); + if (now > t) { + sds key = dictGetEntryKey(de); + robj *keyobj = createStringObject(key,sdslen(key)); + + propagateExpire(db,keyobj); + dbDelete(db,keyobj); + decrRefCount(keyobj); + expired++; + server.stat_expiredkeys++; + } + } + } while (expired > REDIS_EXPIRELOOKUPS_PER_CRON/4); + } +} + + int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { int j, loops = server.cronloops++; REDIS_NOTUSED(eventLoop); @@ -533,41 +575,10 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { } } - /* Try to expire a few timed out keys. The algorithm used is adaptive and - * will use few CPU cycles if there are few expiring keys, otherwise - * it will get more aggressive to avoid that too much memory is used by - * keys that can be removed from the keyspace. */ - for (j = 0; j < server.dbnum; j++) { - int expired; - redisDb *db = server.db+j; - - /* Continue to expire if at the end of the cycle more than 25% - * of the keys were expired. */ - do { - long num = dictSize(db->expires); - time_t now = time(NULL); - - expired = 0; - if (num > REDIS_EXPIRELOOKUPS_PER_CRON) - num = REDIS_EXPIRELOOKUPS_PER_CRON; - while (num--) { - dictEntry *de; - time_t t; - - if ((de = dictGetRandomKey(db->expires)) == NULL) break; - t = (time_t) dictGetEntryVal(de); - if (now > t) { - sds key = dictGetEntryKey(de); - robj *keyobj = createStringObject(key,sdslen(key)); - - dbDelete(db,keyobj); - decrRefCount(keyobj); - expired++; - server.stat_expiredkeys++; - } - } - } while (expired > REDIS_EXPIRELOOKUPS_PER_CRON/4); - } + /* Expire a few keys per cycle, only if this is a master. + * On slaves we wait for DEL operations synthesized by the master + * in order to guarantee a strict consistency. */ + if (server.masterhost == NULL) activeExpireCycle(); /* Swap a few keys on disk if we are over the memory limit and VM * is enbled. Try to free objects from the free list first. */ diff --git a/src/redis.h b/src/redis.h index fb051f8e..27520c19 100644 --- a/src/redis.h +++ b/src/redis.h @@ -752,8 +752,8 @@ void resetServerSaveParams(); /* db.c -- Keyspace access API */ int removeExpire(redisDb *db, robj *key); +void propagateExpire(redisDb *db, robj *key); int expireIfNeeded(redisDb *db, robj *key); -int deleteIfVolatile(redisDb *db, robj *key); time_t getExpire(redisDb *db, robj *key); int setExpire(redisDb *db, robj *key, time_t when); robj *lookupKey(redisDb *db, robj *key); diff --git a/src/t_string.c b/src/t_string.c index f55595c2..3b8a39bb 100644 --- a/src/t_string.c +++ b/src/t_string.c @@ -17,7 +17,6 @@ void setGenericCommand(redisClient *c, int nx, robj *key, robj *val, robj *expir } } - if (nx) deleteIfVolatile(c->db,key); retval = dbAdd(c->db,key,val); if (retval == REDIS_ERR) { if (!nx) { From c25a5d3b1062f3398a96a76ecd27c6f3a77a446e Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 2 Aug 2010 21:37:39 +0200 Subject: [PATCH 014/139] memory leak removed from expire propagation code --- src/db.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/db.c b/src/db.c index d8a5d0b2..6ac2b0d7 100644 --- a/src/db.c +++ b/src/db.c @@ -450,7 +450,8 @@ void propagateExpire(redisDb *db, robj *key) { if (listLength(server.slaves)) replicationFeedSlaves(server.slaves,db->id,argv,2); - decrRefCount(key); + decrRefCount(argv[0]); + decrRefCount(argv[1]); } int expireIfNeeded(redisDb *db, robj *key) { From 0cf5b7b57cde8b699198a866b04feca9f5394d03 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 3 Aug 2010 12:26:30 +0200 Subject: [PATCH 015/139] allow to set a new EXPIRE of an existing volatile key --- src/db.c | 22 ++++++++-------------- src/redis.h | 2 +- 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/src/db.c b/src/db.c index 6ac2b0d7..5acda3d5 100644 --- a/src/db.c +++ b/src/db.c @@ -401,16 +401,13 @@ int removeExpire(redisDb *db, robj *key) { } } -int setExpire(redisDb *db, robj *key, time_t when) { +void setExpire(redisDb *db, robj *key, time_t when) { dictEntry *de; /* Reuse the sds from the main dict in the expire dict */ - redisAssert((de = dictFind(db->dict,key->ptr)) != NULL); - if (dictAdd(db->expires,dictGetEntryKey(de),(void*)when) == DICT_ERR) { - return 0; - } else { - return 1; - } + de = dictFind(db->dict,key->ptr); + redisAssert(de != NULL); + dictReplace(db->expires,dictGetEntryKey(de),(void*)when); } /* Return the expire time of the specified key, or -1 if no expire @@ -504,13 +501,10 @@ void expireGenericCommand(redisClient *c, robj *key, robj *param, long offset) { return; } else { time_t when = time(NULL)+seconds; - if (setExpire(c->db,key,when)) { - addReply(c,shared.cone); - touchWatchedKey(c->db,key); - server.dirty++; - } else { - addReply(c,shared.czero); - } + setExpire(c->db,key,when); + addReply(c,shared.cone); + touchWatchedKey(c->db,key); + server.dirty++; return; } } diff --git a/src/redis.h b/src/redis.h index 27520c19..c211cfb5 100644 --- a/src/redis.h +++ b/src/redis.h @@ -755,7 +755,7 @@ int removeExpire(redisDb *db, robj *key); void propagateExpire(redisDb *db, robj *key); int expireIfNeeded(redisDb *db, robj *key); time_t getExpire(redisDb *db, robj *key); -int setExpire(redisDb *db, robj *key, time_t when); +void setExpire(redisDb *db, robj *key, time_t when); robj *lookupKey(redisDb *db, robj *key); robj *lookupKeyRead(redisDb *db, robj *key); robj *lookupKeyWrite(redisDb *db, robj *key); From 2c572622fb99f32328de58f815953f17d4ad0e4d Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 3 Aug 2010 13:08:32 +0200 Subject: [PATCH 016/139] no longer passing tests due to the new write-on-volatile semantics modified/removed --- tests/unit/basic.tcl | 14 +++----------- tests/unit/expire.tcl | 9 +++++---- 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/tests/unit/basic.tcl b/tests/unit/basic.tcl index f888cabc..a8f7feb0 100644 --- a/tests/unit/basic.tcl +++ b/tests/unit/basic.tcl @@ -148,12 +148,11 @@ start_server {tags {"basic"}} { r get novar2 } {foobared} - test {SETNX will overwrite EXPIREing key} { + test {SETNX against volatile key} { r set x 10 r expire x 10000 - r setnx x 20 - r get x - } {20} + list [r setnx x 20] [r get x] + } {0 10} test {EXISTS} { set res {} @@ -362,13 +361,6 @@ start_server {tags {"basic"}} { list [r msetnx x1 xxx y2 yyy] [r get x1] [r get y2] } {1 xxx yyy} - test {MSETNX should remove all the volatile keys even on failure} { - r mset x 1 y 2 z 3 - r expire y 10000 - r expire z 10000 - list [r msetnx x A y B z C] [r mget x y z] - } {0 {1 {} {}}} - test {STRLEN against non existing key} { r strlen notakey } {0} diff --git a/tests/unit/expire.tcl b/tests/unit/expire.tcl index b80975b6..5de907ab 100644 --- a/tests/unit/expire.tcl +++ b/tests/unit/expire.tcl @@ -1,12 +1,13 @@ start_server {tags {"expire"}} { - test {EXPIRE - don't set timeouts multiple times} { + test {EXPIRE - set timeouts multiple times} { r set x foobar set v1 [r expire x 5] set v2 [r ttl x] set v3 [r expire x 10] set v4 [r ttl x] + r expire x 4 list $v1 $v2 $v3 $v4 - } {1 5 0 5} + } {1 5 1 10} test {EXPIRE - It should be still possible to read 'x'} { r get x @@ -19,13 +20,13 @@ start_server {tags {"expire"}} { } {{} 0} } - test {EXPIRE - Delete on write policy} { + test {EXPIRE - write on expire should work} { r del x r lpush x foo r expire x 1000 r lpush x bar r lrange x 0 -1 - } {bar} + } {bar foo} test {EXPIREAT - Check for EXPIRE alike behavior} { r del x From 6146329f1f3381e8daef47463a6588b161f10596 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 3 Aug 2010 13:38:39 +0200 Subject: [PATCH 017/139] replication test with expires --- tests/integration/replication.tcl | 18 ++++++++++++++++++ tests/support/util.tcl | 9 ++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/tests/integration/replication.tcl b/tests/integration/replication.tcl index 4b258825..6ca5a6dd 100644 --- a/tests/integration/replication.tcl +++ b/tests/integration/replication.tcl @@ -23,6 +23,24 @@ start_server {tags {"repl"}} { } assert_equal [r debug digest] [r -1 debug digest] } + + test {MASTER and SLAVE consistency with expire} { + createComplexDataset r 50000 useexpire + after 4000 ;# Make sure everything expired before taking the digest + if {[r debug digest] ne [r -1 debug digest]} { + set csv1 [csvdump r] + set csv2 [csvdump {r -1}] + set fd [open /tmp/repldump1.txt w] + puts -nonewline $fd $csv1 + close $fd + set fd [open /tmp/repldump2.txt w] + puts -nonewline $fd $csv2 + close $fd + puts "Master - Slave inconsistency" + puts "Run diff -u against /tmp/repldump*.txt for more info" + } + assert_equal [r debug digest] [r -1 debug digest] + } } } diff --git a/tests/support/util.tcl b/tests/support/util.tcl index b9c89aa8..95153111 100644 --- a/tests/support/util.tcl +++ b/tests/support/util.tcl @@ -140,12 +140,19 @@ proc findKeyWithType {r type} { return {} } -proc createComplexDataset {r ops} { +proc createComplexDataset {r ops {opt {}}} { for {set j 0} {$j < $ops} {incr j} { set k [randomKey] set k2 [randomKey] set f [randomValue] set v [randomValue] + + if {[lsearch -exact $opt useexpire] != -1} { + if {rand() < 0.1} { + {*}$r expire [randomKey] [randomInt 2] + } + } + randpath { set d [expr {rand()}] } { From a539d29ac559ffb80bfe6b3f045eddbd772fa1ba Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 3 Aug 2010 14:19:20 +0200 Subject: [PATCH 018/139] PERSIST command implemented --- src/db.c | 20 +++++++++++++++----- src/redis.c | 1 + src/redis.h | 1 + 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/src/db.c b/src/db.c index 5acda3d5..81e41430 100644 --- a/src/db.c +++ b/src/db.c @@ -394,11 +394,7 @@ int removeExpire(redisDb *db, robj *key) { /* An expire may only be removed if there is a corresponding entry in the * main dict. Otherwise, the key will never be freed. */ redisAssert(dictFind(db->dict,key->ptr) != NULL); - if (dictDelete(db->expires,key->ptr) == DICT_OK) { - return 1; - } else { - return 0; - } + return dictDelete(db->expires,key->ptr) == DICT_OK; } void setExpire(redisDb *db, robj *key, time_t when) { @@ -528,3 +524,17 @@ void ttlCommand(redisClient *c) { } addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",ttl)); } + +void persistCommand(redisClient *c) { + dictEntry *de; + + de = dictFind(c->db->dict,c->argv[1]->ptr); + if (de == NULL) { + addReply(c,shared.czero); + } else { + if (removeExpire(c->db,c->argv[1])) + addReply(c,shared.cone); + else + addReply(c,shared.czero); + } +} diff --git a/src/redis.c b/src/redis.c index 27ade8b1..1a581a92 100644 --- a/src/redis.c +++ b/src/redis.c @@ -170,6 +170,7 @@ struct redisCommand readonlyCommandTable[] = { {"info",infoCommand,1,REDIS_CMD_INLINE,NULL,0,0,0}, {"monitor",monitorCommand,1,REDIS_CMD_INLINE,NULL,0,0,0}, {"ttl",ttlCommand,2,REDIS_CMD_INLINE,NULL,1,1,1}, + {"persist",persistCommand,2,REDIS_CMD_INLINE,NULL,1,1,1}, {"slaveof",slaveofCommand,3,REDIS_CMD_INLINE,NULL,0,0,0}, {"debug",debugCommand,-2,REDIS_CMD_INLINE,NULL,0,0,0}, {"config",configCommand,-2,REDIS_CMD_BULK,NULL,0,0,0}, diff --git a/src/redis.h b/src/redis.h index c211cfb5..781fb209 100644 --- a/src/redis.h +++ b/src/redis.h @@ -838,6 +838,7 @@ void expireCommand(redisClient *c); void expireatCommand(redisClient *c); void getsetCommand(redisClient *c); void ttlCommand(redisClient *c); +void persistCommand(redisClient *c); void slaveofCommand(redisClient *c); void debugCommand(redisClient *c); void msetCommand(redisClient *c); From 1fb4e8def723ac836ba96e5369f22a0bf463578d Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 3 Aug 2010 14:25:22 +0200 Subject: [PATCH 019/139] PERSIST: a fix and some basic test --- src/db.c | 6 ++++-- tests/unit/expire.tcl | 11 +++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/db.c b/src/db.c index 81e41430..0dec95b1 100644 --- a/src/db.c +++ b/src/db.c @@ -532,9 +532,11 @@ void persistCommand(redisClient *c) { if (de == NULL) { addReply(c,shared.czero); } else { - if (removeExpire(c->db,c->argv[1])) + if (removeExpire(c->db,c->argv[1])) { addReply(c,shared.cone); - else + server.dirty++; + } else { addReply(c,shared.czero); + } } } diff --git a/tests/unit/expire.tcl b/tests/unit/expire.tcl index 5de907ab..6f16ed58 100644 --- a/tests/unit/expire.tcl +++ b/tests/unit/expire.tcl @@ -60,4 +60,15 @@ start_server {tags {"expire"}} { catch {r setex z -10 foo} e set _ $e } {*invalid expire*} + + test {PERSIST can undo an EXPIRE} { + r set x foo + r expire x 50 + list [r ttl x] [r persist x] [r ttl x] [r get x] + } {50 1 -1 foo} + + test {PERSIST returns 0 against non existing or non volatile keys} { + r set x foo + list [r persist foo] [r persist nokeyatall] + } {0 0} } From 2159782b513da6eaba9be210c6b8b237baab6cfe Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Tue, 3 Aug 2010 19:21:16 +0200 Subject: [PATCH 020/139] Use flexible array in zskiplistNode to reduce memory usage --- src/redis.h | 11 +++-- src/t_zset.c | 136 ++++++++++++++++++++++----------------------------- 2 files changed, 64 insertions(+), 83 deletions(-) diff --git a/src/redis.h b/src/redis.h index fb051f8e..bf694bdd 100644 --- a/src/redis.h +++ b/src/redis.h @@ -480,13 +480,14 @@ typedef struct _redisSortOperation { } redisSortOperation; /* ZSETs use a specialized version of Skiplists */ - typedef struct zskiplistNode { - struct zskiplistNode **forward; - struct zskiplistNode *backward; - unsigned int *span; - double score; robj *obj; + double score; + struct zskiplistNode *backward; + struct zskiplistLevel { + struct zskiplistNode *forward; + unsigned int span; + } level[]; } zskiplistNode; typedef struct zskiplist { diff --git a/src/t_zset.c b/src/t_zset.c index e93e5c40..50348638 100644 --- a/src/t_zset.c +++ b/src/t_zset.c @@ -24,13 +24,7 @@ * from tail to head, useful for ZREVRANGE. */ zskiplistNode *zslCreateNode(int level, double score, robj *obj) { - zskiplistNode *zn = zmalloc(sizeof(*zn)); - - zn->forward = zmalloc(sizeof(zskiplistNode*) * level); - if (level > 1) - zn->span = zmalloc(sizeof(unsigned int) * (level - 1)); - else - zn->span = NULL; + zskiplistNode *zn = zmalloc(sizeof(*zn)+level*sizeof(struct zskiplistLevel)); zn->score = score; zn->obj = obj; return zn; @@ -45,11 +39,8 @@ zskiplist *zslCreate(void) { zsl->length = 0; zsl->header = zslCreateNode(ZSKIPLIST_MAXLEVEL,0,NULL); for (j = 0; j < ZSKIPLIST_MAXLEVEL; j++) { - zsl->header->forward[j] = NULL; - - /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */ - if (j < ZSKIPLIST_MAXLEVEL-1) - zsl->header->span[j] = 0; + zsl->header->level[j].forward = NULL; + zsl->header->level[j].span = 0; } zsl->header->backward = NULL; zsl->tail = NULL; @@ -58,19 +49,15 @@ zskiplist *zslCreate(void) { void zslFreeNode(zskiplistNode *node) { decrRefCount(node->obj); - zfree(node->forward); - zfree(node->span); zfree(node); } void zslFree(zskiplist *zsl) { - zskiplistNode *node = zsl->header->forward[0], *next; + zskiplistNode *node = zsl->header->level[0].forward, *next; - zfree(zsl->header->forward); - zfree(zsl->header->span); zfree(zsl->header); while(node) { - next = node->forward[0]; + next = node->level[0].forward; zslFreeNode(node); node = next; } @@ -93,13 +80,12 @@ void zslInsert(zskiplist *zsl, double score, robj *obj) { for (i = zsl->level-1; i >= 0; i--) { /* store rank that is crossed to reach the insert position */ rank[i] = i == (zsl->level-1) ? 0 : rank[i+1]; - - while (x->forward[i] && - (x->forward[i]->score < score || - (x->forward[i]->score == score && - compareStringObjects(x->forward[i]->obj,obj) < 0))) { - rank[i] += i > 0 ? x->span[i-1] : 1; - x = x->forward[i]; + while (x->level[i].forward && + (x->level[i].forward->score < score || + (x->level[i].forward->score == score && + compareStringObjects(x->level[i].forward->obj,obj) < 0))) { + rank[i] += x->level[i].span; + x = x->level[i].forward; } update[i] = x; } @@ -112,30 +98,28 @@ void zslInsert(zskiplist *zsl, double score, robj *obj) { for (i = zsl->level; i < level; i++) { rank[i] = 0; update[i] = zsl->header; - update[i]->span[i-1] = zsl->length; + update[i]->level[i].span = zsl->length; } zsl->level = level; } x = zslCreateNode(level,score,obj); for (i = 0; i < level; i++) { - x->forward[i] = update[i]->forward[i]; - update[i]->forward[i] = x; + x->level[i].forward = update[i]->level[i].forward; + update[i]->level[i].forward = x; /* update span covered by update[i] as x is inserted here */ - if (i > 0) { - x->span[i-1] = update[i]->span[i-1] - (rank[0] - rank[i]); - update[i]->span[i-1] = (rank[0] - rank[i]) + 1; - } + x->level[i].span = update[i]->level[i].span - (rank[0] - rank[i]); + update[i]->level[i].span = (rank[0] - rank[i]) + 1; } /* increment span for untouched levels */ for (i = level; i < zsl->level; i++) { - update[i]->span[i-1]++; + update[i]->level[i].span++; } x->backward = (update[0] == zsl->header) ? NULL : update[0]; - if (x->forward[0]) - x->forward[0]->backward = x; + if (x->level[0].forward) + x->level[0].forward->backward = x; else zsl->tail = x; zsl->length++; @@ -145,23 +129,19 @@ void zslInsert(zskiplist *zsl, double score, robj *obj) { void zslDeleteNode(zskiplist *zsl, zskiplistNode *x, zskiplistNode **update) { int i; for (i = 0; i < zsl->level; i++) { - if (update[i]->forward[i] == x) { - if (i > 0) { - update[i]->span[i-1] += x->span[i-1] - 1; - } - update[i]->forward[i] = x->forward[i]; + if (update[i]->level[i].forward == x) { + update[i]->level[i].span += x->level[i].span - 1; + update[i]->level[i].forward = x->level[i].forward; } else { - /* invariant: i > 0, because update[0]->forward[0] - * is always equal to x */ - update[i]->span[i-1] -= 1; + update[i]->level[i].span -= 1; } } - if (x->forward[0]) { - x->forward[0]->backward = x->backward; + if (x->level[0].forward) { + x->level[0].forward->backward = x->backward; } else { zsl->tail = x->backward; } - while(zsl->level > 1 && zsl->header->forward[zsl->level-1] == NULL) + while(zsl->level > 1 && zsl->header->level[zsl->level-1].forward == NULL) zsl->level--; zsl->length--; } @@ -173,16 +153,16 @@ int zslDelete(zskiplist *zsl, double score, robj *obj) { x = zsl->header; for (i = zsl->level-1; i >= 0; i--) { - while (x->forward[i] && - (x->forward[i]->score < score || - (x->forward[i]->score == score && - compareStringObjects(x->forward[i]->obj,obj) < 0))) - x = x->forward[i]; + while (x->level[i].forward && + (x->level[i].forward->score < score || + (x->level[i].forward->score == score && + compareStringObjects(x->level[i].forward->obj,obj) < 0))) + x = x->level[i].forward; update[i] = x; } /* We may have multiple elements with the same score, what we need * is to find the element with both the right score and object. */ - x = x->forward[0]; + x = x->level[0].forward; if (x && score == x->score && equalStringObjects(x->obj,obj)) { zslDeleteNode(zsl, x, update); zslFreeNode(x); @@ -204,15 +184,15 @@ unsigned long zslDeleteRangeByScore(zskiplist *zsl, double min, double max, dict x = zsl->header; for (i = zsl->level-1; i >= 0; i--) { - while (x->forward[i] && x->forward[i]->score < min) - x = x->forward[i]; + while (x->level[i].forward && x->level[i].forward->score < min) + x = x->level[i].forward; update[i] = x; } /* We may have multiple elements with the same score, what we need * is to find the element with both the right score and object. */ - x = x->forward[0]; + x = x->level[0].forward; while (x && x->score <= max) { - zskiplistNode *next = x->forward[0]; + zskiplistNode *next = x->level[0].forward; zslDeleteNode(zsl, x, update); dictDelete(dict,x->obj); zslFreeNode(x); @@ -231,17 +211,17 @@ unsigned long zslDeleteRangeByRank(zskiplist *zsl, unsigned int start, unsigned x = zsl->header; for (i = zsl->level-1; i >= 0; i--) { - while (x->forward[i] && (traversed + (i > 0 ? x->span[i-1] : 1)) < start) { - traversed += i > 0 ? x->span[i-1] : 1; - x = x->forward[i]; + while (x->level[i].forward && (traversed + x->level[i].span) < start) { + traversed += x->level[i].span; + x = x->level[i].forward; } update[i] = x; } traversed++; - x = x->forward[0]; + x = x->level[0].forward; while (x && traversed <= end) { - zskiplistNode *next = x->forward[0]; + zskiplistNode *next = x->level[0].forward; zslDeleteNode(zsl, x, update); dictDelete(dict,x->obj); zslFreeNode(x); @@ -260,12 +240,12 @@ zskiplistNode *zslFirstWithScore(zskiplist *zsl, double score) { x = zsl->header; for (i = zsl->level-1; i >= 0; i--) { - while (x->forward[i] && x->forward[i]->score < score) - x = x->forward[i]; + while (x->level[i].forward && x->level[i].forward->score < score) + x = x->level[i].forward; } /* We may have multiple elements with the same score, what we need * is to find the element with both the right score and object. */ - return x->forward[0]; + return x->level[0].forward; } /* Find the rank for an element by both score and key. @@ -279,12 +259,12 @@ unsigned long zslistTypeGetRank(zskiplist *zsl, double score, robj *o) { x = zsl->header; for (i = zsl->level-1; i >= 0; i--) { - while (x->forward[i] && - (x->forward[i]->score < score || - (x->forward[i]->score == score && - compareStringObjects(x->forward[i]->obj,o) <= 0))) { - rank += i > 0 ? x->span[i-1] : 1; - x = x->forward[i]; + while (x->level[i].forward && + (x->level[i].forward->score < score || + (x->level[i].forward->score == score && + compareStringObjects(x->level[i].forward->obj,o) <= 0))) { + rank += x->level[i].span; + x = x->level[i].forward; } /* x might be equal to zsl->header, so test if obj is non-NULL */ @@ -303,10 +283,10 @@ zskiplistNode* zslistTypeGetElementByRank(zskiplist *zsl, unsigned long rank) { x = zsl->header; for (i = zsl->level-1; i >= 0; i--) { - while (x->forward[i] && (traversed + (i>0 ? x->span[i-1] : 1)) <= rank) + while (x->level[i].forward && (traversed + x->level[i].span) <= rank) { - traversed += i > 0 ? x->span[i-1] : 1; - x = x->forward[i]; + traversed += x->level[i].span; + x = x->level[i].forward; } if (traversed == rank) { return x; @@ -778,7 +758,7 @@ void zrangeGenericCommand(redisClient *c, int reverse) { ln = start == 0 ? zsl->tail : zslistTypeGetElementByRank(zsl, llen-start); } else { ln = start == 0 ? - zsl->header->forward[0] : zslistTypeGetElementByRank(zsl, start+1); + zsl->header->level[0].forward : zslistTypeGetElementByRank(zsl, start+1); } /* Return the result in form of a multi-bulk reply */ @@ -789,7 +769,7 @@ void zrangeGenericCommand(redisClient *c, int reverse) { addReplyBulk(c,ele); if (withscores) addReplyDouble(c,ln->score); - ln = reverse ? ln->backward : ln->forward[0]; + ln = reverse ? ln->backward : ln->level[0].forward; } } @@ -872,7 +852,7 @@ void genericZrangebyscoreCommand(redisClient *c, int justcount) { /* Get the first node with the score >= min, or with * score > min if 'minex' is true. */ ln = zslFirstWithScore(zsl,min); - while (minex && ln && ln->score == min) ln = ln->forward[0]; + while (minex && ln && ln->score == min) ln = ln->level[0].forward; if (ln == NULL) { /* No element matching the speciifed interval */ @@ -893,7 +873,7 @@ void genericZrangebyscoreCommand(redisClient *c, int justcount) { while(ln && (maxex ? (ln->score < max) : (ln->score <= max))) { if (offset) { offset--; - ln = ln->forward[0]; + ln = ln->level[0].forward; continue; } if (limit == 0) break; @@ -903,7 +883,7 @@ void genericZrangebyscoreCommand(redisClient *c, int justcount) { if (withscores) addReplyDouble(c,ln->score); } - ln = ln->forward[0]; + ln = ln->level[0].forward; rangelen++; if (limit > 0) limit--; } From 69ef89f2cf5a699d97475ff8e7c3ce714c6947cf Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Tue, 3 Aug 2010 20:49:53 +0200 Subject: [PATCH 021/139] Reference zset score in zskiplistNode from dict entries This avoids the extra allocation of sizeof(double) for storing the score of a zset entry in the hash table. Saves sizeof(double) + malloc overhead = approx. 16 bytes per zset entry. --- src/redis.c | 2 +- src/redis.h | 2 +- src/t_zset.c | 115 ++++++++++++++++++++++++++------------------------- 3 files changed, 60 insertions(+), 59 deletions(-) diff --git a/src/redis.c b/src/redis.c index c8b1c781..e6a1a137 100644 --- a/src/redis.c +++ b/src/redis.c @@ -338,7 +338,7 @@ dictType zsetDictType = { NULL, /* val dup */ dictEncObjKeyCompare, /* key compare */ dictRedisObjectDestructor, /* key destructor */ - dictVanillaFree /* val destructor of malloc(sizeof(double)) */ + NULL /* val destructor */ }; /* Db->dict, keys are sds strings, vals are Redis objects. */ diff --git a/src/redis.h b/src/redis.h index bf694bdd..4b45f5f4 100644 --- a/src/redis.h +++ b/src/redis.h @@ -675,7 +675,7 @@ void backgroundRewriteDoneHandler(int statloc); /* Sorted sets data type */ zskiplist *zslCreate(void); void zslFree(zskiplist *zsl); -void zslInsert(zskiplist *zsl, double score, robj *obj); +zskiplistNode *zslInsert(zskiplist *zsl, double score, robj *obj); /* Core functions */ void freeMemoryIfNeeded(void); diff --git a/src/t_zset.c b/src/t_zset.c index 50348638..3d9f612a 100644 --- a/src/t_zset.c +++ b/src/t_zset.c @@ -71,7 +71,7 @@ int zslRandomLevel(void) { return (leveltail = x; zsl->length++; + return x; } /* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */ @@ -193,7 +194,7 @@ unsigned long zslDeleteRangeByScore(zskiplist *zsl, double min, double max, dict x = x->level[0].forward; while (x && x->score <= max) { zskiplistNode *next = x->level[0].forward; - zslDeleteNode(zsl, x, update); + zslDeleteNode(zsl,x,update); dictDelete(dict,x->obj); zslFreeNode(x); removed++; @@ -222,7 +223,7 @@ unsigned long zslDeleteRangeByRank(zskiplist *zsl, unsigned int start, unsigned x = x->level[0].forward; while (x && traversed <= end) { zskiplistNode *next = x->level[0].forward; - zslDeleteNode(zsl, x, update); + zslDeleteNode(zsl,x,update); dictDelete(dict,x->obj); zslFreeNode(x); removed++; @@ -299,13 +300,11 @@ zskiplistNode* zslistTypeGetElementByRank(zskiplist *zsl, unsigned long rank) { * Sorted set commands *----------------------------------------------------------------------------*/ -/* This generic command implements both ZADD and ZINCRBY. - * scoreval is the score if the operation is a ZADD (doincrement == 0) or - * the increment if the operation is a ZINCRBY (doincrement == 1). */ -void zaddGenericCommand(redisClient *c, robj *key, robj *ele, double scoreval, int doincrement) { +/* This generic command implements both ZADD and ZINCRBY. */ +void zaddGenericCommand(redisClient *c, robj *key, robj *ele, double score, int incr) { robj *zsetobj; zset *zs; - double *score; + zskiplistNode *znode; zsetobj = lookupKeyWrite(c->db,key); if (zsetobj == NULL) { @@ -319,72 +318,73 @@ void zaddGenericCommand(redisClient *c, robj *key, robj *ele, double scoreval, i } zs = zsetobj->ptr; - /* Ok now since we implement both ZADD and ZINCRBY here the code - * needs to handle the two different conditions. It's all about setting - * '*score', that is, the new score to set, to the right value. */ - score = zmalloc(sizeof(double)); - if (doincrement) { - dictEntry *de; - + /* Since both ZADD and ZINCRBY are implemented here, we need to increment + * the score first by the current score if ZINCRBY is called. */ + if (incr) { /* Read the old score. If the element was not present starts from 0 */ - de = dictFind(zs->dict,ele); - if (de) { - double *oldscore = dictGetEntryVal(de); - *score = *oldscore + scoreval; - } else { - *score = scoreval; - } - if (isnan(*score)) { + dictEntry *de = dictFind(zs->dict,ele); + if (de != NULL) + score += *(double*)dictGetEntryVal(de); + + if (isnan(score)) { addReplySds(c, sdsnew("-ERR resulting score is not a number (NaN)\r\n")); - zfree(score); /* Note that we don't need to check if the zset may be empty and * should be removed here, as we can only obtain Nan as score if * there was already an element in the sorted set. */ return; } - } else { - *score = scoreval; } - /* What follows is a simple remove and re-insert operation that is common - * to both ZADD and ZINCRBY... */ - if (dictAdd(zs->dict,ele,score) == DICT_OK) { - /* case 1: New element */ + /* We need to remove and re-insert the element when it was already present + * in the dictionary, to update the skiplist. Note that we delay adding a + * pointer to the score because we want to reference the score in the + * skiplist node. */ + if (dictAdd(zs->dict,ele,NULL) == DICT_OK) { + dictEntry *de; + + /* New element */ incrRefCount(ele); /* added to hash */ - zslInsert(zs->zsl,*score,ele); + znode = zslInsert(zs->zsl,score,ele); incrRefCount(ele); /* added to skiplist */ + + /* Update the score in the dict entry */ + de = dictFind(zs->dict,ele); + redisAssert(de != NULL); + dictGetEntryVal(de) = &znode->score; touchWatchedKey(c->db,c->argv[1]); server.dirty++; - if (doincrement) - addReplyDouble(c,*score); + if (incr) + addReplyDouble(c,score); else addReply(c,shared.cone); } else { dictEntry *de; - double *oldscore; + robj *curobj; + double *curscore; + int deleted; - /* case 2: Score update operation */ + /* Update score */ de = dictFind(zs->dict,ele); redisAssert(de != NULL); - oldscore = dictGetEntryVal(de); - if (*score != *oldscore) { - int deleted; + curobj = dictGetEntryKey(de); + curscore = dictGetEntryVal(de); - /* Remove and insert the element in the skip list with new score */ - deleted = zslDelete(zs->zsl,*oldscore,ele); + /* When the score is updated, reuse the existing string object to + * prevent extra alloc/dealloc of strings on ZINCRBY. */ + if (score != *curscore) { + deleted = zslDelete(zs->zsl,*curscore,curobj); redisAssert(deleted != 0); - zslInsert(zs->zsl,*score,ele); - incrRefCount(ele); - /* Update the score in the hash table */ - dictReplace(zs->dict,ele,score); + znode = zslInsert(zs->zsl,score,curobj); + incrRefCount(curobj); + + /* Update the score in the current dict entry */ + dictGetEntryVal(de) = &znode->score; touchWatchedKey(c->db,c->argv[1]); server.dirty++; - } else { - zfree(score); } - if (doincrement) - addReplyDouble(c,*score); + if (incr) + addReplyDouble(c,score); else addReply(c,shared.czero); } @@ -406,7 +406,7 @@ void zremCommand(redisClient *c) { robj *zsetobj; zset *zs; dictEntry *de; - double *oldscore; + double curscore; int deleted; if ((zsetobj = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL || @@ -419,8 +419,8 @@ void zremCommand(redisClient *c) { return; } /* Delete from the skiplist */ - oldscore = dictGetEntryVal(de); - deleted = zslDelete(zs->zsl,*oldscore,c->argv[2]); + curscore = *(double*)dictGetEntryVal(de); + deleted = zslDelete(zs->zsl,curscore,c->argv[2]); redisAssert(deleted != 0); /* Delete from the hash table */ @@ -534,6 +534,7 @@ void zunionInterGenericCommand(redisClient *c, robj *dstkey, int op) { zsetopsrc *src; robj *dstobj; zset *dstzset; + zskiplistNode *znode; dictIterator *di; dictEntry *de; int touched = 0; @@ -642,10 +643,10 @@ void zunionInterGenericCommand(redisClient *c, robj *dstkey, int op) { zfree(score); } else { robj *o = dictGetEntryKey(de); - dictAdd(dstzset->dict,o,score); - incrRefCount(o); /* added to dictionary */ - zslInsert(dstzset->zsl,*score,o); + znode = zslInsert(dstzset->zsl,*score,o); incrRefCount(o); /* added to skiplist */ + dictAdd(dstzset->dict,o,&znode->score); + incrRefCount(o); /* added to dictionary */ } } dictReleaseIterator(di); @@ -673,10 +674,10 @@ void zunionInterGenericCommand(redisClient *c, robj *dstkey, int op) { } robj *o = dictGetEntryKey(de); - dictAdd(dstzset->dict,o,score); - incrRefCount(o); /* added to dictionary */ - zslInsert(dstzset->zsl,*score,o); + znode = zslInsert(dstzset->zsl,*score,o); incrRefCount(o); /* added to skiplist */ + dictAdd(dstzset->dict,o,&znode->score); + incrRefCount(o); /* added to dictionary */ } dictReleaseIterator(di); } From f2dd4769dd1dad4e85fffe4e560b9b355d59e703 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Wed, 4 Aug 2010 14:15:52 +0200 Subject: [PATCH 022/139] Tests for the interactive mode of redis-cli Changed redis-cli to output the raw response for a bulk reply when it is run in interactive mode instead of checking isatty. --- src/redis-cli.c | 9 +++-- tests/integration/redis-cli.tcl | 69 +++++++++++++++++++++++++++++++++ tests/test_helper.tcl | 10 ++++- 3 files changed, 84 insertions(+), 4 deletions(-) create mode 100644 tests/integration/redis-cli.tcl diff --git a/src/redis-cli.c b/src/redis-cli.c index dac82862..97a119c8 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -59,7 +59,7 @@ static struct config { int shutdown; int monitor_mode; int pubsub_mode; - int raw_output; + int raw_output; /* output mode per command */ char *auth; char *historyfile; } config; @@ -152,7 +152,7 @@ static int cliReadBulkReply(int fd) { reply = zmalloc(bulklen); anetRead(fd,reply,bulklen); anetRead(fd,crlf,2); - if (config.raw_output || !isatty(fileno(stdout))) { + if (config.raw_output || !config.interactive) { if (bulklen && fwrite(reply,bulklen,1,stdout) == 0) { zfree(reply); return 1; @@ -494,7 +494,10 @@ int main(int argc, char **argv) { cliSendCommand(2, convertToSds(2, authargv), 1); } - if (argc == 0 || config.interactive == 1) repl(); + if (argc == 0 || config.interactive == 1) { + config.interactive = 1; + repl(); + } argvcopy = convertToSds(argc+1, argv); if (config.argn_from_stdin) { diff --git a/tests/integration/redis-cli.tcl b/tests/integration/redis-cli.tcl new file mode 100644 index 00000000..191c6598 --- /dev/null +++ b/tests/integration/redis-cli.tcl @@ -0,0 +1,69 @@ +start_server {tags {"cli"}} { + proc open_cli {} { + set ::env(TERM) dumb + set fd [open [format "|src/redis-cli -p %d -n 9" [srv port]] "r+"] + fconfigure $fd -buffering none + fconfigure $fd -blocking false + fconfigure $fd -translation binary + assert_equal "redis> " [read_cli $fd] + set _ $fd + } + + proc close_cli {fd} { + close $fd + } + + proc read_cli {fd} { + set buf [read $fd] + while {[string length $buf] == 0} { + # wait some time and try again + after 10 + set buf [read $fd] + } + set _ $buf + } + + proc write_cli {fd buf} { + puts $fd $buf + flush $fd + } + + proc run_command {fd cmd} { + write_cli $fd $cmd + set lines [split [read_cli $fd] "\n"] + assert_equal "redis> " [lindex $lines end] + join [lrange $lines 0 end-1] "\n" + } + + proc test_interactive_cli {name code} { + set fd [open_cli] + test "Interactive CLI: $name" $code + close_cli $fd + } + + test_interactive_cli "INFO response should be printed raw" { + set lines [split [run_command $fd info] "\n"] + foreach line $lines { + assert [regexp {^[a-z0-9_]+:[a-z0-9_]+} $line] + } + } + + test_interactive_cli "Status reply" { + assert_equal "OK" [run_command $fd "set key foo"] + } + + test_interactive_cli "Integer reply" { + assert_equal "(integer) 1" [run_command $fd "incr counter"] + } + + test_interactive_cli "Bulk reply" { + r set key foo + assert_equal "\"foo\"" [run_command $fd "get key"] + } + + test_interactive_cli "Multi-bulk reply" { + r rpush list foo + r rpush list bar + assert_equal "1. \"foo\"\n2. \"bar\"" [run_command $fd "lrange list 0 -1"] + } +} diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl index ef1f9923..4ae9cc65 100644 --- a/tests/test_helper.tcl +++ b/tests/test_helper.tcl @@ -25,7 +25,14 @@ proc execute_tests name { # are nested, use "srv 0 pid" to get the pid of the inner server. To access # outer servers, use "srv -1 pid" etcetera. set ::servers {} -proc srv {level property} { +proc srv {args} { + set level 0 + if {[string is integer [lindex $args 0]]} { + set level [lindex $args 0] + set property [lindex $args 1] + } else { + set property [lindex $args 0] + } set srv [lindex $::servers end+$level] dict get $srv $property } @@ -88,6 +95,7 @@ proc main {} { execute_tests "unit/cas" execute_tests "integration/replication" execute_tests "integration/aof" + execute_tests "integration/redis-cli" execute_tests "unit/pubsub" # run tests with VM enabled From 0439d792c46efa328d67e098d688435bca1e2700 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Wed, 4 Aug 2010 15:29:28 +0200 Subject: [PATCH 023/139] Add tests for quotation in an interactive redis-cli session Patched redis-cli to abort on unexpected quotation. This caused redis-cli to get into an infinite, memory-consuming loop. --- src/redis-cli.c | 21 ++++++++++++++++++--- tests/integration/redis-cli.tcl | 19 +++++++++++++++++++ 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 97a119c8..87ebcb69 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -378,7 +378,7 @@ static char **splitArguments(char *line, int *argc) { if (*p) { /* get a token */ int inq=0; /* set to 1 if we are in "quotes" */ - int done = 0; + int done=0; if (current == NULL) current = sdsempty(); while(!done) { @@ -397,7 +397,12 @@ static char **splitArguments(char *line, int *argc) { } current = sdscatlen(current,&c,1); } else if (*p == '"') { - done = 1; + /* closing quote must be followed by a space */ + if (*(p+1) && !isspace(*(p+1))) goto err; + done=1; + } else if (!*p) { + /* unterminated quotes */ + goto err; } else { current = sdscatlen(current,p,1); } @@ -429,6 +434,13 @@ static char **splitArguments(char *line, int *argc) { return vector; } } + +err: + while(*argc--) + sdsfree(vector[*argc]); + zfree(vector); + if (current) sdsfree(current); + return NULL; } #define LINE_BUFLEN 4096 @@ -441,7 +453,10 @@ static void repl() { argv = splitArguments(line,&argc); linenoiseHistoryAdd(line); if (config.historyfile) linenoiseHistorySave(config.historyfile); - if (argc > 0) { + if (argv == NULL) { + printf("Invalid argument(s)\n"); + continue; + } else if (argc > 0) { if (strcasecmp(argv[0],"quit") == 0 || strcasecmp(argv[0],"exit") == 0) exit(0); diff --git a/tests/integration/redis-cli.tcl b/tests/integration/redis-cli.tcl index 191c6598..6e106135 100644 --- a/tests/integration/redis-cli.tcl +++ b/tests/integration/redis-cli.tcl @@ -66,4 +66,23 @@ start_server {tags {"cli"}} { r rpush list bar assert_equal "1. \"foo\"\n2. \"bar\"" [run_command $fd "lrange list 0 -1"] } + + test_interactive_cli "Parsing quotes" { + assert_equal "OK" [run_command $fd "set key \"bar\""] + assert_equal "bar" [r get key] + assert_equal "OK" [run_command $fd "set key \" bar \""] + assert_equal " bar " [r get key] + assert_equal "OK" [run_command $fd "set key \"\\\"bar\\\"\""] + assert_equal "\"bar\"" [r get key] + assert_equal "OK" [run_command $fd "set key \"\tbar\t\""] + assert_equal "\tbar\t" [r get key] + + # invalid quotation + assert_equal "Invalid argument(s)" [run_command $fd "get \"\"key"] + assert_equal "Invalid argument(s)" [run_command $fd "get \"key\"x"] + + # quotes after the argument are weird, but should be allowed + assert_equal "OK" [run_command $fd "set key\"\" bar"] + assert_equal "bar" [r get key] + } } From 07242c0ccf1fd6a4d8199fa09981e6f7a3a1cce9 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Wed, 4 Aug 2010 17:02:13 +0200 Subject: [PATCH 024/139] Tests for redis-cli in non-interactive mode Minor change in redis-cli output for the (multi-)bulk response but this will be fixed in the next commit. --- src/redis-cli.c | 5 +++-- tests/integration/redis-cli.tcl | 35 +++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 87ebcb69..c1cc17a3 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -134,7 +134,7 @@ static void printStringRepr(char *s, int len) { } s++; } - printf("\"\n"); + printf("\""); } static int cliReadBulkReply(int fd) { @@ -152,7 +152,7 @@ static int cliReadBulkReply(int fd) { reply = zmalloc(bulklen); anetRead(fd,reply,bulklen); anetRead(fd,crlf,2); - if (config.raw_output || !config.interactive) { + if (config.raw_output) { if (bulklen && fwrite(reply,bulklen,1,stdout) == 0) { zfree(reply); return 1; @@ -161,6 +161,7 @@ static int cliReadBulkReply(int fd) { /* If you are producing output for the standard output we want * a more interesting output with quoted characters and so forth */ printStringRepr(reply,bulklen); + printf("\n"); } zfree(reply); return 0; diff --git a/tests/integration/redis-cli.tcl b/tests/integration/redis-cli.tcl index 6e106135..a10968b1 100644 --- a/tests/integration/redis-cli.tcl +++ b/tests/integration/redis-cli.tcl @@ -41,6 +41,19 @@ start_server {tags {"cli"}} { close_cli $fd } + proc run_cli {args} { + set fd [open [format "|src/redis-cli -p %d -n 9 $args" [srv port]] "r"] + fconfigure $fd -buffering none + fconfigure $fd -translation binary + set resp [read $fd 1048576] + close $fd + set _ $resp + } + + proc test_noninteractive_cli {name code} { + test "Non-interactive CLI: $name" $code + } + test_interactive_cli "INFO response should be printed raw" { set lines [split [run_command $fd info] "\n"] foreach line $lines { @@ -85,4 +98,26 @@ start_server {tags {"cli"}} { assert_equal "OK" [run_command $fd "set key\"\" bar"] assert_equal "bar" [r get key] } + + test_noninteractive_cli "Status reply" { + assert_equal "OK\n" [run_cli set key bar] + assert_equal "bar" [r get key] + } + + test_noninteractive_cli "Integer reply" { + r del counter + assert_equal "(integer) 1\n" [run_cli incr counter] + } + + test_noninteractive_cli "Bulk reply" { + r set key "tab\tnewline\n" + assert_equal "\"tab\\tnewline\\n\"\n" [run_cli get key] + } + + test_noninteractive_cli "Multi-bulk reply" { + r del list + r rpush list foo + r rpush list bar + assert_equal "1. \"foo\"\n2. \"bar\"\n" [run_cli lrange list 0 -1] + } } From 123a10f7a5612c7174adeb05d238d88b98fb906d Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Wed, 4 Aug 2010 17:16:05 +0200 Subject: [PATCH 025/139] Let the output mode depend on having a tty or not --- src/redis-cli.c | 5 ++++- tests/integration/redis-cli.tcl | 33 ++++++++++++++++++++++----------- 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index c1cc17a3..6878d283 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -60,6 +60,7 @@ static struct config { int monitor_mode; int pubsub_mode; int raw_output; /* output mode per command */ + int tty; /* flag for default output format */ char *auth; char *historyfile; } config; @@ -152,7 +153,7 @@ static int cliReadBulkReply(int fd) { reply = zmalloc(bulklen); anetRead(fd,reply,bulklen); anetRead(fd,crlf,2); - if (config.raw_output) { + if (config.raw_output || !config.tty) { if (bulklen && fwrite(reply,bulklen,1,stdout) == 0) { zfree(reply); return 1; @@ -491,6 +492,7 @@ int main(int argc, char **argv) { config.raw_output = 0; config.auth = NULL; config.historyfile = NULL; + config.tty = 1; if (getenv("HOME") != NULL) { config.historyfile = malloc(256); @@ -515,6 +517,7 @@ int main(int argc, char **argv) { repl(); } + config.tty = isatty(stdout) || (getenv("FAKETTY") != NULL); argvcopy = convertToSds(argc+1, argv); if (config.argn_from_stdin) { sds lastarg = readArgFromStdin(); diff --git a/tests/integration/redis-cli.tcl b/tests/integration/redis-cli.tcl index a10968b1..b9c4f8e1 100644 --- a/tests/integration/redis-cli.tcl +++ b/tests/integration/redis-cli.tcl @@ -41,7 +41,7 @@ start_server {tags {"cli"}} { close_cli $fd } - proc run_cli {args} { + proc run_nontty_cli {args} { set fd [open [format "|src/redis-cli -p %d -n 9 $args" [srv port]] "r"] fconfigure $fd -buffering none fconfigure $fd -translation binary @@ -50,8 +50,19 @@ start_server {tags {"cli"}} { set _ $resp } - proc test_noninteractive_cli {name code} { - test "Non-interactive CLI: $name" $code + proc test_nontty_cli {name code} { + test "Non-interactive non-TTY CLI: $name" $code + } + + proc run_tty_cli {args} { + set ::env(FAKETTY) 1 + set resp [run_nontty_cli {*}$args] + unset ::env(FAKETTY) + set _ $resp + } + + proc test_tty_cli {name code} { + test "Non-interactive TTY CLI: $name" $code } test_interactive_cli "INFO response should be printed raw" { @@ -99,25 +110,25 @@ start_server {tags {"cli"}} { assert_equal "bar" [r get key] } - test_noninteractive_cli "Status reply" { - assert_equal "OK\n" [run_cli set key bar] + test_tty_cli "Status reply" { + assert_equal "OK\n" [run_tty_cli set key bar] assert_equal "bar" [r get key] } - test_noninteractive_cli "Integer reply" { + test_tty_cli "Integer reply" { r del counter - assert_equal "(integer) 1\n" [run_cli incr counter] + assert_equal "(integer) 1\n" [run_tty_cli incr counter] } - test_noninteractive_cli "Bulk reply" { + test_tty_cli "Bulk reply" { r set key "tab\tnewline\n" - assert_equal "\"tab\\tnewline\\n\"\n" [run_cli get key] + assert_equal "\"tab\\tnewline\\n\"\n" [run_tty_cli get key] } - test_noninteractive_cli "Multi-bulk reply" { + test_tty_cli "Multi-bulk reply" { r del list r rpush list foo r rpush list bar - assert_equal "1. \"foo\"\n2. \"bar\"\n" [run_cli lrange list 0 -1] + assert_equal "1. \"foo\"\n2. \"bar\"\n" [run_tty_cli lrange list 0 -1] } } From 3a51bff0358c38162bc925ab25661e6090cf1161 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Wed, 4 Aug 2010 17:46:56 +0200 Subject: [PATCH 026/139] Change output format for non-tty redis-cli execution --- src/redis-cli.c | 19 ++++++++++++------- tests/integration/redis-cli.tcl | 22 ++++++++++++++++++++++ 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 6878d283..fc2238d4 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -61,6 +61,7 @@ static struct config { int pubsub_mode; int raw_output; /* output mode per command */ int tty; /* flag for default output format */ + char mb_sep; char *auth; char *historyfile; } config; @@ -108,7 +109,7 @@ static int cliReadSingleLineReply(int fd, int quiet) { if (reply == NULL) return 1; if (!quiet) - printf("%s\n", reply); + printf("%s", reply); sdsfree(reply); return 0; } @@ -162,7 +163,6 @@ static int cliReadBulkReply(int fd) { /* If you are producing output for the standard output we want * a more interesting output with quoted characters and so forth */ printStringRepr(reply,bulklen); - printf("\n"); } zfree(reply); return 0; @@ -183,8 +183,9 @@ static int cliReadMultiBulkReply(int fd) { printf("(empty list or set)\n"); } while(elements--) { - printf("%d. ", c); + if (config.tty) printf("%d. ", c); if (cliReadReply(fd)) return 1; + if (elements) printf("%c",config.mb_sep); c++; } return 0; @@ -199,13 +200,13 @@ static int cliReadReply(int fd) { } switch(type) { case '-': - printf("(error) "); + if (config.tty) printf("(error) "); cliReadSingleLineReply(fd,0); return 1; case '+': return cliReadSingleLineReply(fd,0); case ':': - printf("(integer) "); + if (config.tty) printf("(integer) "); return cliReadSingleLineReply(fd,0); case '$': return cliReadBulkReply(fd); @@ -275,7 +276,7 @@ static int cliSendCommand(int argc, char **argv, int repeat) { printf("Reading messages... (press Ctrl-c to quit)\n"); while (1) { cliReadReply(fd); - printf("\n"); + printf("\n\n"); } } @@ -283,6 +284,9 @@ static int cliSendCommand(int argc, char **argv, int repeat) { if (retval) { return retval; } + if (!config.raw_output && config.tty) { + printf("\n"); + } } return 0; } @@ -493,6 +497,7 @@ int main(int argc, char **argv) { config.auth = NULL; config.historyfile = NULL; config.tty = 1; + config.mb_sep = '\n'; if (getenv("HOME") != NULL) { config.historyfile = malloc(256); @@ -517,7 +522,7 @@ int main(int argc, char **argv) { repl(); } - config.tty = isatty(stdout) || (getenv("FAKETTY") != NULL); + config.tty = isatty(fileno(stdout)) || (getenv("FAKETTY") != NULL); argvcopy = convertToSds(argc+1, argv); if (config.argn_from_stdin) { sds lastarg = readArgFromStdin(); diff --git a/tests/integration/redis-cli.tcl b/tests/integration/redis-cli.tcl index b9c4f8e1..c4954304 100644 --- a/tests/integration/redis-cli.tcl +++ b/tests/integration/redis-cli.tcl @@ -131,4 +131,26 @@ start_server {tags {"cli"}} { r rpush list bar assert_equal "1. \"foo\"\n2. \"bar\"\n" [run_tty_cli lrange list 0 -1] } + + test_nontty_cli "Status reply" { + assert_equal "OK" [run_nontty_cli set key bar] + assert_equal "bar" [r get key] + } + + test_nontty_cli "Integer reply" { + r del counter + assert_equal "1" [run_nontty_cli incr counter] + } + + test_nontty_cli "Bulk reply" { + r set key "tab\tnewline\n" + assert_equal "tab\tnewline\n" [run_nontty_cli get key] + } + + test_nontty_cli "Multi-bulk reply" { + r del list + r rpush list foo + r rpush list bar + assert_equal "foo\nbar" [run_nontty_cli lrange list 0 -1] + } } From cf0c6b78f132847a576ccd275cb0fffbf0a061ff Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Wed, 4 Aug 2010 18:16:39 +0200 Subject: [PATCH 027/139] Set tty before going into interactive mode to get non-pretty output when the commands are read from stdin. --- src/redis-cli.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index fc2238d4..0fef8cd9 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -496,7 +496,7 @@ int main(int argc, char **argv) { config.raw_output = 0; config.auth = NULL; config.historyfile = NULL; - config.tty = 1; + config.tty = isatty(fileno(stdout)) || (getenv("FAKETTY") != NULL); config.mb_sep = '\n'; if (getenv("HOME") != NULL) { @@ -522,7 +522,6 @@ int main(int argc, char **argv) { repl(); } - config.tty = isatty(fileno(stdout)) || (getenv("FAKETTY") != NULL); argvcopy = convertToSds(argc+1, argv); if (config.argn_from_stdin) { sds lastarg = readArgFromStdin(); From abb731e5b8b2c305c2f4d204d1cca12bdd6fda8c Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Wed, 4 Aug 2010 18:36:03 +0200 Subject: [PATCH 028/139] Deprecate starting interactive mode using the -i flag --- src/redis-cli.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 0fef8cd9..43cbc55e 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -55,7 +55,6 @@ static struct config { long repeat; int dbnum; int argn_from_stdin; - int interactive; int shutdown; int monitor_mode; int pubsub_mode; @@ -320,7 +319,10 @@ static int parseOptions(int argc, char **argv) { config.auth = argv[i+1]; i++; } else if (!strcmp(argv[i],"-i")) { - config.interactive = 1; + fprintf(stderr, +"Starting interactive mode using -i is deprecated. Interactive mode is started\n" +"by default when redis-cli is executed without a command to execute.\n" + ); } else if (!strcmp(argv[i],"-c")) { config.argn_from_stdin = 1; } else if (!strcmp(argv[i],"-v")) { @@ -490,7 +492,6 @@ int main(int argc, char **argv) { config.dbnum = 0; config.argn_from_stdin = 0; config.shutdown = 0; - config.interactive = 0; config.monitor_mode = 0; config.pubsub_mode = 0; config.raw_output = 0; @@ -517,10 +518,8 @@ int main(int argc, char **argv) { cliSendCommand(2, convertToSds(2, authargv), 1); } - if (argc == 0 || config.interactive == 1) { - config.interactive = 1; - repl(); - } + /* Start interactive mode when no command is provided */ + if (argc == 0) repl(); argvcopy = convertToSds(argc+1, argv); if (config.argn_from_stdin) { From cbce5171451eb53f1370aacc30decd74512347ac Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 5 Aug 2010 11:36:39 +0200 Subject: [PATCH 029/139] redis cli argument splitting is general and is now moved into the sds.c lib --- src/redis-cli.c | 70 ++------------------------------------------ src/sds.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++++ src/sds.h | 1 + 3 files changed, 81 insertions(+), 67 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index dac82862..b4a10890 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -366,79 +366,15 @@ static char **convertToSds(int count, char** args) { return sds; } -static char **splitArguments(char *line, int *argc) { - char *p = line; - char *current = NULL; - char **vector = NULL; - - *argc = 0; - while(1) { - /* skip blanks */ - while(*p && isspace(*p)) p++; - if (*p) { - /* get a token */ - int inq=0; /* set to 1 if we are in "quotes" */ - int done = 0; - - if (current == NULL) current = sdsempty(); - while(!done) { - if (inq) { - if (*p == '\\' && *(p+1)) { - char c; - - p++; - switch(*p) { - case 'n': c = '\n'; break; - case 'r': c = '\r'; break; - case 't': c = '\t'; break; - case 'b': c = '\b'; break; - case 'a': c = '\a'; break; - default: c = *p; break; - } - current = sdscatlen(current,&c,1); - } else if (*p == '"') { - done = 1; - } else { - current = sdscatlen(current,p,1); - } - } else { - switch(*p) { - case ' ': - case '\n': - case '\r': - case '\t': - case '\0': - done=1; - break; - case '"': - inq=1; - break; - default: - current = sdscatlen(current,p,1); - break; - } - } - if (*p) p++; - } - /* add the token to the vector */ - vector = zrealloc(vector,((*argc)+1)*sizeof(char*)); - vector[*argc] = current; - (*argc)++; - current = NULL; - } else { - return vector; - } - } -} - #define LINE_BUFLEN 4096 static void repl() { int argc, j; - char *line, **argv; + char *line; + sds *argv; while((line = linenoise("redis> ")) != NULL) { if (line[0] != '\0') { - argv = splitArguments(line,&argc); + argv = sdssplitargs(line,&argc); linenoiseHistoryAdd(line); if (config.historyfile) linenoiseHistorySave(config.historyfile); if (argc > 0) { diff --git a/src/sds.c b/src/sds.c index 5e67f044..4878f8a6 100644 --- a/src/sds.c +++ b/src/sds.c @@ -382,3 +382,80 @@ sds sdscatrepr(sds s, char *p, size_t len) { } return sdscatlen(s,"\"",1); } + +/* Split a line into arguments, where every argument can be in the + * following programming-language REPL-alike form: + * + * foo bar "newline are supported\n" and "\xff\x00otherstuff" + * + * The number of arguments is stored into *argc, and an array + * of sds is returned. The caller should sdsfree() all the returned + * strings and finally zfree() the array itself. + * + * Note that sdscatrepr() is able to convert back a string into + * a quoted string in the same format sdssplitargs() is able to parse. + */ +sds *sdssplitargs(char *line, int *argc) { + char *p = line; + char *current = NULL; + char **vector = NULL; + + *argc = 0; + while(1) { + /* skip blanks */ + while(*p && isspace(*p)) p++; + if (*p) { + /* get a token */ + int inq=0; /* set to 1 if we are in "quotes" */ + int done = 0; + + if (current == NULL) current = sdsempty(); + while(!done) { + if (inq) { + if (*p == '\\' && *(p+1)) { + char c; + + p++; + switch(*p) { + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + case 'b': c = '\b'; break; + case 'a': c = '\a'; break; + default: c = *p; break; + } + current = sdscatlen(current,&c,1); + } else if (*p == '"') { + done = 1; + } else { + current = sdscatlen(current,p,1); + } + } else { + switch(*p) { + case ' ': + case '\n': + case '\r': + case '\t': + case '\0': + done=1; + break; + case '"': + inq=1; + break; + default: + current = sdscatlen(current,p,1); + break; + } + } + if (*p) p++; + } + /* add the token to the vector */ + vector = zrealloc(vector,((*argc)+1)*sizeof(char*)); + vector[*argc] = current; + (*argc)++; + current = NULL; + } else { + return vector; + } + } +} diff --git a/src/sds.h b/src/sds.h index ef3a418f..a0e224f5 100644 --- a/src/sds.h +++ b/src/sds.h @@ -70,5 +70,6 @@ void sdstolower(sds s); void sdstoupper(sds s); sds sdsfromlonglong(long long value); sds sdscatrepr(sds s, char *p, size_t len); +sds *sdssplitargs(char *line, int *argc); #endif From c4705381422ead4ad99f4b7a3bc11f059c460401 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Fri, 13 Aug 2010 19:28:49 +0200 Subject: [PATCH 030/139] Make ziplist schema more efficient for strings with length > 15 --- src/ziplist.c | 227 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 144 insertions(+), 83 deletions(-) diff --git a/src/ziplist.c b/src/ziplist.c index 7a3a8b01..a6383517 100644 --- a/src/ziplist.c +++ b/src/ziplist.c @@ -1,17 +1,63 @@ -/* Memory layout of a ziplist, containing "foo", "bar", "quux": - * "foo""bar""quux" +/* The ziplist is a specially encoded dually linked list that is designed + * to be very memory efficient. It stores both strings and integer values, + * where integers are encoded as actual integers instead of a series of + * characters. It allows push and pop operations on either side of the list + * in O(1) time. However, because every operation requires a reallocation of + * the memory used by the ziplist, the actual complexity is related to the + * amount of memory used by the ziplist. * - * is an unsigned integer to hold the number of bytes that - * the ziplist occupies. This is stored to not have to traverse the ziplist - * to know the new length when pushing. + * ---------------------------------------------------------------------------- * - * is the number of items in the ziplist. When this value is - * greater than 254, we need to traverse the entire list to know - * how many items it holds. + * ZIPLIST OVERALL LAYOUT: + * The general layout of the ziplist is as follows: + * * - * is the number of bytes occupied by a single entry. When this - * number is greater than 253, the length will occupy 5 bytes, where - * the extra bytes contain an unsigned integer to hold the length. + * is an unsigned integer to hold the number of bytes that the + * ziplist occupies. This value needs to be stored to be able to resize the + * entire structure without the need to traverse it first. + * + * is the offset to the last entry in the list. This allows a pop + * operation on the far side of the list without the need for full traversal. + * + * is the number of entries.When this value is larger than 2**16-2, + * we need to traverse the entire list to know how many items it holds. + * + * is a single byte special value, equal to 255, which indicates the + * end of the list. + * + * ZIPLIST ENTRIES: + * Every entry in the ziplist is prefixed by a header that contains two pieces + * of information. First, the length of the previous entry is stored to be + * able to traverse the list from back to front. Second, the encoding with an + * optional string length of the entry itself is stored. + * + * The length of the previous entry is encoded in the following way: + * If this length is smaller than 254 bytes, it will only consume a single + * byte that takes the length as value. When the length is greater than or + * equal to 254, it will consume 5 bytes. The first byte is set to 254 to + * indicate a larger value is following. The remaining 4 bytes take the + * length of the previous entry as value. + * + * The other header field of the entry itself depends on the contents of the + * entry. When the entry is a string, the first 2 bits of this header will hold + * the type of encoding used to store the length of the string, followed by the + * actual length of the string. When the entry is an integer the first 2 bits + * are both set to 1. The following 2 bits are used to specify what kind of + * integer will be stored after this header. An overview of the different + * types and encodings is as follows: + * + * |00pppppp| - 1 byte + * String value with length less than or equal to 63 bytes (6 bits). + * |01pppppp|qqqqqqqq| - 2 bytes + * String value with length less than or equal to 16383 bytes (14 bits). + * |10______|qqqqqqqq|rrrrrrrr|ssssssss|tttttttt| - 5 bytes + * String value with length greater than or equal to 16384 bytes. + * |1100____| - 1 byte + * Integer encoded as int16_t (2 bytes). + * |1101____| - 1 byte + * Integer encoded as int32_t (4 bytes). + * |1110____| - 1 byte + * Integer encoded as int64_t (8 bytes). */ #include @@ -25,25 +71,20 @@ int ll2string(char *s, size_t len, long long value); -/* Important note: the ZIP_END value is used to depict the end of the - * ziplist structure. When a pointer contains an entry, the first couple - * of bytes contain the encoded length of the previous entry. This length - * is encoded as ZIP_ENC_RAW length, so the first two bits will contain 00 - * and the byte will therefore never have a value of 255. */ #define ZIP_END 255 #define ZIP_BIGLEN 254 -/* Entry encoding */ -#define ZIP_ENC_RAW 0 -#define ZIP_ENC_INT16 1 -#define ZIP_ENC_INT32 2 -#define ZIP_ENC_INT64 3 -#define ZIP_ENCODING(p) ((p)[0] >> 6) +/* Different encoding/length possibilities */ +#define ZIP_STR_06B (0 << 6) +#define ZIP_STR_14B (1 << 6) +#define ZIP_STR_32B (2 << 6) +#define ZIP_INT_16B (0xc0 | 0<<4) +#define ZIP_INT_32B (0xc0 | 1<<4) +#define ZIP_INT_64B (0xc0 | 2<<4) -/* Length encoding for raw entries */ -#define ZIP_LEN_INLINE 0 -#define ZIP_LEN_UINT16 1 -#define ZIP_LEN_UINT32 2 +/* Macro's to determine type */ +#define ZIP_IS_STR(enc) (((enc) & 0xc0) < 0xc0) +#define ZIP_IS_INT(enc) (!ZIP_IS_STR(enc) && ((enc) & 0x30) < 0x30) /* Utility macros */ #define ZIPLIST_BYTES(zl) (*((uint32_t*)(zl))) @@ -67,14 +108,25 @@ typedef struct zlentry { unsigned char *p; } zlentry; +/* Return the encoding pointer to by 'p'. */ +static unsigned int zipEntryEncoding(unsigned char *p) { + /* String encoding: 2 MSBs */ + unsigned char b = p[0] & 0xc0; + if (b < 0xc0) { + return b; + } else { + /* Integer encoding: 4 MSBs */ + return p[0] & 0xf0; + } + assert(NULL); +} + /* Return bytes needed to store integer encoded by 'encoding' */ -static unsigned int zipEncodingSize(unsigned char encoding) { - if (encoding == ZIP_ENC_INT16) { - return sizeof(int16_t); - } else if (encoding == ZIP_ENC_INT32) { - return sizeof(int32_t); - } else if (encoding == ZIP_ENC_INT64) { - return sizeof(int64_t); +static unsigned int zipIntSize(unsigned char encoding) { + switch(encoding) { + case ZIP_INT_16B: return sizeof(int16_t); + case ZIP_INT_32B: return sizeof(int32_t); + case ZIP_INT_64B: return sizeof(int64_t); } assert(NULL); } @@ -82,23 +134,28 @@ static unsigned int zipEncodingSize(unsigned char encoding) { /* Decode the encoded length pointed by 'p'. If a pointer to 'lensize' is * provided, it is set to the number of bytes required to encode the length. */ static unsigned int zipDecodeLength(unsigned char *p, unsigned int *lensize) { - unsigned char encoding = ZIP_ENCODING(p), lenenc; + unsigned char encoding = zipEntryEncoding(p); unsigned int len; - if (encoding == ZIP_ENC_RAW) { - lenenc = (p[0] >> 4) & 0x3; - if (lenenc == ZIP_LEN_INLINE) { - len = p[0] & 0xf; + if (ZIP_IS_STR(encoding)) { + switch(encoding) { + case ZIP_STR_06B: + len = p[0] & 0x3f; if (lensize) *lensize = 1; - } else if (lenenc == ZIP_LEN_UINT16) { - len = p[1] | (p[2] << 8); - if (lensize) *lensize = 3; - } else { - len = p[1] | (p[2] << 8) | (p[3] << 16) | (p[4] << 24); + break; + case ZIP_STR_14B: + len = ((p[0] & 0x3f) << 6) | p[1]; + if (lensize) *lensize = 2; + break; + case ZIP_STR_32B: + len = (p[1] << 24) | (p[2] << 16) | (p[3] << 8) | p[4]; if (lensize) *lensize = 5; + break; + default: + assert(NULL); } } else { - len = zipEncodingSize(encoding); + len = zipIntSize(encoding); if (lensize) *lensize = 1; } return len; @@ -106,34 +163,36 @@ static unsigned int zipDecodeLength(unsigned char *p, unsigned int *lensize) { /* Encode the length 'l' writing it in 'p'. If p is NULL it just returns * the amount of bytes required to encode such a length. */ -static unsigned int zipEncodeLength(unsigned char *p, char encoding, unsigned int rawlen) { - unsigned char len = 1, lenenc, buf[5]; - if (encoding == ZIP_ENC_RAW) { - if (rawlen <= 0xf) { +static unsigned int zipEncodeLength(unsigned char *p, unsigned char encoding, unsigned int rawlen) { + unsigned char len = 1, buf[5]; + + if (ZIP_IS_STR(encoding)) { + /* Although encoding is given it may not be set for strings, + * so we determine it here using the raw length. */ + if (rawlen <= 0x3f) { if (!p) return len; - lenenc = ZIP_LEN_INLINE; - buf[0] = rawlen; - } else if (rawlen <= 0xffff) { - len += 2; + buf[0] = ZIP_STR_06B | rawlen; + } else if (rawlen <= 0x3fff) { + len += 1; if (!p) return len; - lenenc = ZIP_LEN_UINT16; - buf[1] = (rawlen ) & 0xff; - buf[2] = (rawlen >> 8) & 0xff; + buf[0] = ZIP_STR_14B | ((rawlen >> 8) & 0x3f); + buf[1] = rawlen & 0xff; } else { len += 4; if (!p) return len; - lenenc = ZIP_LEN_UINT32; - buf[1] = (rawlen ) & 0xff; - buf[2] = (rawlen >> 8) & 0xff; - buf[3] = (rawlen >> 16) & 0xff; - buf[4] = (rawlen >> 24) & 0xff; + buf[0] = ZIP_STR_32B; + buf[1] = (rawlen >> 24) & 0xff; + buf[2] = (rawlen >> 16) & 0xff; + buf[3] = (rawlen >> 8) & 0xff; + buf[4] = rawlen & 0xff; } - buf[0] = (lenenc << 4) | (buf[0] & 0xf); + } else { + /* Implies integer encoding, so length is always 1. */ + if (!p) return len; + buf[0] = encoding; } - if (!p) return len; - /* Apparently we need to store the length in 'p' */ - buf[0] = (encoding << 6) | (buf[0] & 0x3f); + /* Store this length at p */ memcpy(p,buf,len); return len; } @@ -198,11 +257,11 @@ static int zipTryEncoding(unsigned char *entry, unsigned int entrylen, long long /* Great, the string can be encoded. Check what's the smallest * of our encoding types that can hold this value. */ if (value >= INT16_MIN && value <= INT16_MAX) { - *encoding = ZIP_ENC_INT16; + *encoding = ZIP_INT_16B; } else if (value >= INT32_MIN && value <= INT32_MAX) { - *encoding = ZIP_ENC_INT32; + *encoding = ZIP_INT_32B; } else { - *encoding = ZIP_ENC_INT64; + *encoding = ZIP_INT_64B; } *v = value; return 1; @@ -215,13 +274,13 @@ static void zipSaveInteger(unsigned char *p, int64_t value, unsigned char encodi int16_t i16; int32_t i32; int64_t i64; - if (encoding == ZIP_ENC_INT16) { + if (encoding == ZIP_INT_16B) { i16 = value; memcpy(p,&i16,sizeof(i16)); - } else if (encoding == ZIP_ENC_INT32) { + } else if (encoding == ZIP_INT_32B) { i32 = value; memcpy(p,&i32,sizeof(i32)); - } else if (encoding == ZIP_ENC_INT64) { + } else if (encoding == ZIP_INT_64B) { i64 = value; memcpy(p,&i64,sizeof(i64)); } else { @@ -234,13 +293,13 @@ static int64_t zipLoadInteger(unsigned char *p, unsigned char encoding) { int16_t i16; int32_t i32; int64_t i64, ret; - if (encoding == ZIP_ENC_INT16) { + if (encoding == ZIP_INT_16B) { memcpy(&i16,p,sizeof(i16)); ret = i16; - } else if (encoding == ZIP_ENC_INT32) { + } else if (encoding == ZIP_INT_32B) { memcpy(&i32,p,sizeof(i32)); ret = i32; - } else if (encoding == ZIP_ENC_INT64) { + } else if (encoding == ZIP_INT_64B) { memcpy(&i64,p,sizeof(i64)); ret = i64; } else { @@ -255,7 +314,7 @@ static zlentry zipEntry(unsigned char *p) { e.prevrawlen = zipPrevDecodeLength(p,&e.prevrawlensize); e.len = zipDecodeLength(p+e.prevrawlensize,&e.lensize); e.headersize = e.prevrawlensize+e.lensize; - e.encoding = ZIP_ENCODING(p+e.prevrawlensize); + e.encoding = zipEntryEncoding(p+e.prevrawlensize); e.p = p; return e; } @@ -327,7 +386,7 @@ static unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsig unsigned int curlen = ZIPLIST_BYTES(zl), reqlen, prevlen = 0; unsigned int offset, nextdiff = 0; unsigned char *tail; - unsigned char encoding = ZIP_ENC_RAW; + unsigned char encoding = 0; long long value; zlentry entry; @@ -344,11 +403,13 @@ static unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsig /* See if the entry can be encoded */ if (zipTryEncoding(s,slen,&value,&encoding)) { - reqlen = zipEncodingSize(encoding); + /* 'encoding' is set to the appropriate integer encoding */ + reqlen = zipIntSize(encoding); } else { + /* 'encoding' is untouched, however zipEncodeLength will use the + * string length to figure out how to encode it. */ reqlen = slen; } - /* We need space for both the length of the previous entry and * the length of the payload. */ reqlen += zipPrevEncodeLength(NULL,prevlen); @@ -380,10 +441,10 @@ static unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsig /* Write the entry */ p += zipPrevEncodeLength(p,prevlen); p += zipEncodeLength(p,encoding,slen); - if (encoding != ZIP_ENC_RAW) { - zipSaveInteger(p,value,encoding); - } else { + if (ZIP_IS_STR(encoding)) { memcpy(p,s,slen); + } else { + zipSaveInteger(p,value,encoding); } ZIPLIST_INCR_LENGTH(zl,1); return zl; @@ -463,7 +524,7 @@ unsigned int ziplistGet(unsigned char *p, unsigned char **sstr, unsigned int *sl if (sstr) *sstr = NULL; entry = zipEntry(p); - if (entry.encoding == ZIP_ENC_RAW) { + if (ZIP_IS_STR(entry.encoding)) { if (sstr) { *slen = entry.len; *sstr = p+entry.headersize; @@ -510,7 +571,7 @@ unsigned int ziplistCompare(unsigned char *p, unsigned char *sstr, unsigned int if (p[0] == ZIP_END) return 0; entry = zipEntry(p); - if (entry.encoding == ZIP_ENC_RAW) { + if (ZIP_IS_STR(entry.encoding)) { /* Raw compare */ if (entry.len == slen) { return memcmp(p+entry.headersize,sstr,slen) == 0; @@ -562,7 +623,7 @@ void ziplistRepr(unsigned char *zl) { entry = zipEntry(p); printf("{offset %ld, header %u, payload %u} ",p-zl,entry.headersize,entry.len); p += entry.headersize; - if (entry.encoding == ZIP_ENC_RAW) { + if (ZIP_IS_STR(entry.encoding)) { fwrite(p,entry.len,1,stdout); } else { printf("%lld", (long long) zipLoadInteger(p,entry.encoding)); From 87c74dfaa81d204d47ca9ae9e3f76ea521ca8460 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Fri, 20 Aug 2010 13:42:42 +0200 Subject: [PATCH 031/139] Check if stroll return value was clamped --- src/object.c | 1 + tests/unit/type/set.tcl | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/src/object.c b/src/object.c index 45dde52b..b16123eb 100644 --- a/src/object.c +++ b/src/object.c @@ -375,6 +375,7 @@ int getLongLongFromObject(robj *o, long long *target) { redisAssert(o->type == REDIS_STRING); if (o->encoding == REDIS_ENCODING_RAW) { value = strtoll(o->ptr, &eptr, 10); + if (errno == ERANGE) return REDIS_ERR; if (eptr[0] != '\0') return REDIS_ERR; } else if (o->encoding == REDIS_ENCODING_INT) { value = (long)o->ptr; diff --git a/tests/unit/type/set.tcl b/tests/unit/type/set.tcl index a1b655ef..056ed27c 100644 --- a/tests/unit/type/set.tcl +++ b/tests/unit/type/set.tcl @@ -45,6 +45,12 @@ start_server { assert_encoding hashtable myset } + test "SADD an integer larger than 64 bits" { + create_set myset {213244124402402314402033402} + assert_encoding hashtable myset + assert_equal 1 [r sismember myset 213244124402402314402033402] + } + test "SADD overflows the maximum allowed integers in an intset" { r del myset for {set i 0} {$i < 512} {incr i} { r sadd myset $i } From a53ebb4c8e7d209ff327b1ae9270b39e72424b2c Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Sat, 21 Aug 2010 10:54:31 +0200 Subject: [PATCH 032/139] Don't abort test suite when the server block has a return value --- tests/support/server.tcl | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/tests/support/server.tcl b/tests/support/server.tcl index 8e226a7d..24fef467 100644 --- a/tests/support/server.tcl +++ b/tests/support/server.tcl @@ -230,7 +230,11 @@ proc start_server {options {code undefined}} { # execute provided block set curnum $::testnum - catch { uplevel 1 $code } err + if {![catch { uplevel 1 $code } err]} { + # zero exit status is good + unset err + } + if {$curnum == $::testnum} { # don't check for leaks when no tests were executed dict set srv "skipleaks" 1 @@ -241,22 +245,24 @@ proc start_server {options {code undefined}} { # allow an exception to bubble up the call chain but still kill this # server, because we want to reuse the ports when the tests are re-run - if {$err eq "exception"} { - puts [format "Logged warnings (pid %d):" [dict get $srv "pid"]] - set warnings [warnings_from_file [dict get $srv "stdout"]] - if {[string length $warnings] > 0} { - puts "$warnings" - } else { - puts "(none)" + if {[info exists err]} { + if {$err eq "exception"} { + puts [format "Logged warnings (pid %d):" [dict get $srv "pid"]] + set warnings [warnings_from_file [dict get $srv "stdout"]] + if {[string length $warnings] > 0} { + puts "$warnings" + } else { + puts "(none)" + } + # kill this server without checking for leaks + dict set srv "skipleaks" 1 + kill_server $srv + error "exception" + } elseif {[string length $err] > 0} { + puts "Error executing the suite, aborting..." + puts $err + exit 1 } - # kill this server without checking for leaks - dict set srv "skipleaks" 1 - kill_server $srv - error "exception" - } elseif {[string length $err] > 0} { - puts "Error executing the suite, aborting..." - puts $err - exit 1 } set ::tags [lrange $::tags 0 end-[llength $tags]] From 5d4f3a8c85e528b47d125a588f6255eeb1836e0f Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Sat, 21 Aug 2010 10:55:53 +0200 Subject: [PATCH 033/139] Move SORT tests around --- tests/unit/sort.tcl | 293 +++++++++++++++++++++----------------------- 1 file changed, 141 insertions(+), 152 deletions(-) diff --git a/tests/unit/sort.tcl b/tests/unit/sort.tcl index 16a02b3a..554477d1 100644 --- a/tests/unit/sort.tcl +++ b/tests/unit/sort.tcl @@ -1,5 +1,88 @@ -start_server {tags {"sort"}} { - test {SORT ALPHA against integer encoded strings} { +start_server { + tags {"sort"} + overrides { + "list-max-ziplist-value" 16 + "list-max-ziplist-entries" 32 + } +} { + proc create_random_dataset {num cmd} { + set tosort {} + set result {} + array set seenrand {} + r del tosort + for {set i 0} {$i < $num} {incr i} { + # Make sure all the weights are different because + # Redis does not use a stable sort but Tcl does. + while 1 { + randpath { + set rint [expr int(rand()*1000000)] + } { + set rint [expr rand()] + } + if {![info exists seenrand($rint)]} break + } + set seenrand($rint) x + r $cmd tosort $i + r set weight_$i $rint + r hset wobj_$i weight $rint + lappend tosort [list $i $rint] + } + set sorted [lsort -index 1 -real $tosort] + for {set i 0} {$i < $num} {incr i} { + lappend result [lindex $sorted $i 0] + } + set _ $result + } + + set result [create_random_dataset 16 lpush] + test "SORT BY key" { + assert_equal $result [r sort tosort {BY weight_*}] + } + + test "SORT BY hash field" { + assert_equal $result [r sort tosort {BY wobj_*->weight}] + } + + test "SORT GET #" { + assert_equal [lsort -integer $result] [r sort tosort GET #] + } + + test "SORT GET " { + r del foo + set res [r sort tosort GET foo] + assert_equal 16 [llength $res] + foreach item $res { assert_equal {} $item } + } + + test "SORT GET (key and hash) with sanity check" { + set l1 [r sort tosort GET # GET weight_*] + set l2 [r sort tosort GET # GET wobj_*->weight] + foreach {id1 w1} $l1 {id2 w2} $l2 { + assert_equal $id1 $id2 + assert_equal $w1 [r get weight_$id1] + assert_equal $w2 [r get weight_$id1] + } + } + + test "SORT BY key STORE" { + r sort tosort {BY weight_*} store sort-res + assert_equal $result [r lrange sort-res 0 -1] + assert_equal 16 [r llen sort-res] + assert_encoding ziplist sort-res + } + + test "SORT BY hash field STORE" { + r sort tosort {BY wobj_*->weight} store sort-res + assert_equal $result [r lrange sort-res 0 -1] + assert_equal 16 [r llen sort-res] + assert_encoding ziplist sort-res + } + + test "SORT DESC" { + assert_equal [lsort -decreasing -integer $result] [r sort tosort {DESC}] + } + + test "SORT ALPHA against integer encoded strings" { r del mylist r lpush mylist 2 r lpush mylist 1 @@ -8,155 +91,7 @@ start_server {tags {"sort"}} { r sort mylist alpha } {1 10 2 3} - tags {"slow"} { - set res {} - test {Create a random list and a random set} { - set tosort {} - array set seenrand {} - for {set i 0} {$i < 10000} {incr i} { - while 1 { - # Make sure all the weights are different because - # Redis does not use a stable sort but Tcl does. - randpath { - set rint [expr int(rand()*1000000)] - } { - set rint [expr rand()] - } - if {![info exists seenrand($rint)]} break - } - set seenrand($rint) x - r lpush tosort $i - r sadd tosort-set $i - r set weight_$i $rint - r hset wobj_$i weight $rint - lappend tosort [list $i $rint] - } - set sorted [lsort -index 1 -real $tosort] - for {set i 0} {$i < 10000} {incr i} { - lappend res [lindex $sorted $i 0] - } - format {} - } {} - - test {SORT with BY against the newly created list} { - r sort tosort {BY weight_*} - } $res - - test {SORT with BY (hash field) against the newly created list} { - r sort tosort {BY wobj_*->weight} - } $res - - test {SORT with GET (key+hash) with sanity check of each element (list)} { - set err {} - set l1 [r sort tosort GET # GET weight_*] - set l2 [r sort tosort GET # GET wobj_*->weight] - foreach {id1 w1} $l1 {id2 w2} $l2 { - set realweight [r get weight_$id1] - if {$id1 != $id2} { - set err "ID mismatch $id1 != $id2" - break - } - if {$realweight != $w1 || $realweight != $w2} { - set err "Weights mismatch! w1: $w1 w2: $w2 real: $realweight" - break - } - } - set _ $err - } {} - - test {SORT with BY, but against the newly created set} { - r sort tosort-set {BY weight_*} - } $res - - test {SORT with BY (hash field), but against the newly created set} { - r sort tosort-set {BY wobj_*->weight} - } $res - - test {SORT with BY and STORE against the newly created list} { - r sort tosort {BY weight_*} store sort-res - r lrange sort-res 0 -1 - } $res - - test {SORT with BY (hash field) and STORE against the newly created list} { - r sort tosort {BY wobj_*->weight} store sort-res - r lrange sort-res 0 -1 - } $res - - test {SORT direct, numeric, against the newly created list} { - r sort tosort - } [lsort -integer $res] - - test {SORT decreasing sort} { - r sort tosort {DESC} - } [lsort -decreasing -integer $res] - - test {SORT speed, sorting 10000 elements list using BY, 100 times} { - set start [clock clicks -milliseconds] - for {set i 0} {$i < 100} {incr i} { - set sorted [r sort tosort {BY weight_* LIMIT 0 10}] - } - set elapsed [expr [clock clicks -milliseconds]-$start] - puts -nonewline "\n Average time to sort: [expr double($elapsed)/100] milliseconds " - flush stdout - format {} - } {} - - test {SORT speed, as above but against hash field} { - set start [clock clicks -milliseconds] - for {set i 0} {$i < 100} {incr i} { - set sorted [r sort tosort {BY wobj_*->weight LIMIT 0 10}] - } - set elapsed [expr [clock clicks -milliseconds]-$start] - puts -nonewline "\n Average time to sort: [expr double($elapsed)/100] milliseconds " - flush stdout - format {} - } {} - - test {SORT speed, sorting 10000 elements list directly, 100 times} { - set start [clock clicks -milliseconds] - for {set i 0} {$i < 100} {incr i} { - set sorted [r sort tosort {LIMIT 0 10}] - } - set elapsed [expr [clock clicks -milliseconds]-$start] - puts -nonewline "\n Average time to sort: [expr double($elapsed)/100] milliseconds " - flush stdout - format {} - } {} - - test {SORT speed, pseudo-sorting 10000 elements list, BY , 100 times} { - set start [clock clicks -milliseconds] - for {set i 0} {$i < 100} {incr i} { - set sorted [r sort tosort {BY nokey LIMIT 0 10}] - } - set elapsed [expr [clock clicks -milliseconds]-$start] - puts -nonewline "\n Average time to sort: [expr double($elapsed)/100] milliseconds " - flush stdout - format {} - } {} - } - - test {SORT regression for issue #19, sorting floats} { - r flushdb - foreach x {1.1 5.10 3.10 7.44 2.1 5.75 6.12 0.25 1.15} { - r lpush mylist $x - } - r sort mylist - } [lsort -real {1.1 5.10 3.10 7.44 2.1 5.75 6.12 0.25 1.15}] - - test {SORT with GET #} { - r del mylist - r lpush mylist 1 - r lpush mylist 2 - r lpush mylist 3 - r mset weight_1 10 weight_2 5 weight_3 30 - r sort mylist BY weight_* GET # - } {2 1 3} - - test {SORT with constant GET} { - r sort mylist GET foo - } {{} {} {}} - - test {SORT against sorted sets} { + test "SORT sorted set" { r del zset r zadd zset 1 a r zadd zset 5 b @@ -166,7 +101,7 @@ start_server {tags {"sort"}} { r sort zset alpha desc } {e d c b a} - test {Sorted sets +inf and -inf handling} { + test "SORT sorted set: +inf and -inf handling" { r del zset r zadd zset -100 a r zadd zset 200 b @@ -176,4 +111,58 @@ start_server {tags {"sort"}} { r zadd zset -inf min r zrange zset 0 -1 } {min c a b d max} + + test "SORT regression for issue #19, sorting floats" { + r flushdb + set floats {1.1 5.10 3.10 7.44 2.1 5.75 6.12 0.25 1.15} + foreach x $floats { + r lpush mylist $x + } + assert_equal [lsort -real $floats] [r sort mylist] + } + + tags {"slow"} { + set num 100 + set res [create_random_dataset $num lpush] + + test "SORT speed, $num element list BY key, 100 times" { + set start [clock clicks -milliseconds] + for {set i 0} {$i < 100} {incr i} { + set sorted [r sort tosort {BY weight_* LIMIT 0 10}] + } + set elapsed [expr [clock clicks -milliseconds]-$start] + puts -nonewline "\n Average time to sort: [expr double($elapsed)/100] milliseconds " + flush stdout + } + + test "SORT speed, $num element list BY hash field, 100 times" { + set start [clock clicks -milliseconds] + for {set i 0} {$i < 100} {incr i} { + set sorted [r sort tosort {BY wobj_*->weight LIMIT 0 10}] + } + set elapsed [expr [clock clicks -milliseconds]-$start] + puts -nonewline "\n Average time to sort: [expr double($elapsed)/100] milliseconds " + flush stdout + } + + test "SORT speed, $num element list directly, 100 times" { + set start [clock clicks -milliseconds] + for {set i 0} {$i < 100} {incr i} { + set sorted [r sort tosort {LIMIT 0 10}] + } + set elapsed [expr [clock clicks -milliseconds]-$start] + puts -nonewline "\n Average time to sort: [expr double($elapsed)/100] milliseconds " + flush stdout + } + + test "SORT speed, $num element list BY , 100 times" { + set start [clock clicks -milliseconds] + for {set i 0} {$i < 100} {incr i} { + set sorted [r sort tosort {BY nokey LIMIT 0 10}] + } + set elapsed [expr [clock clicks -milliseconds]-$start] + puts -nonewline "\n Average time to sort: [expr double($elapsed)/100] milliseconds " + flush stdout + } + } } From ced6709cb9f4db367c58035f1ccb4a3ddd9c21cf Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Sat, 21 Aug 2010 11:02:22 +0200 Subject: [PATCH 034/139] Make SORT tests use both ziplists and linked lists as input --- tests/unit/sort.tcl | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/tests/unit/sort.tcl b/tests/unit/sort.tcl index 554477d1..8f2710cd 100644 --- a/tests/unit/sort.tcl +++ b/tests/unit/sort.tcl @@ -34,15 +34,23 @@ start_server { set _ $result } + foreach {num cmd enc title} { + 16 lpush ziplist "Ziplist" + 64 lpush linkedlist "Linked list" + } { + set result [create_random_dataset $num $cmd] + assert_encoding $enc tosort + + test "$title: SORT BY key" { + assert_equal $result [r sort tosort {BY weight_*}] + } + + test "$title: SORT BY hash field" { + assert_equal $result [r sort tosort {BY wobj_*->weight}] + } + } + set result [create_random_dataset 16 lpush] - test "SORT BY key" { - assert_equal $result [r sort tosort {BY weight_*}] - } - - test "SORT BY hash field" { - assert_equal $result [r sort tosort {BY wobj_*->weight}] - } - test "SORT GET #" { assert_equal [lsort -integer $result] [r sort tosort GET #] } From 2b9a59471fe550e7c006ff9efb1367560be76742 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Sat, 21 Aug 2010 11:03:56 +0200 Subject: [PATCH 035/139] SORT tests with hash table encoded set as input --- tests/unit/sort.tcl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/unit/sort.tcl b/tests/unit/sort.tcl index 8f2710cd..46246351 100644 --- a/tests/unit/sort.tcl +++ b/tests/unit/sort.tcl @@ -3,6 +3,7 @@ start_server { overrides { "list-max-ziplist-value" 16 "list-max-ziplist-entries" 32 + "set-max-intset-entries" 32 } } { proc create_random_dataset {num cmd} { @@ -37,6 +38,7 @@ start_server { foreach {num cmd enc title} { 16 lpush ziplist "Ziplist" 64 lpush linkedlist "Linked list" + 64 sadd hashtable "Hash table" } { set result [create_random_dataset $num $cmd] assert_encoding $enc tosort From 029e5577ff01c13d8ddfdf9c6749ac33c9bd9e9f Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Sat, 21 Aug 2010 11:15:31 +0200 Subject: [PATCH 036/139] Make SORT use the hybrid set accessors to allow sorting intsets --- src/sort.c | 28 ++++++++++++++++------------ tests/unit/sort.tcl | 1 + 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/src/sort.c b/src/sort.c index 4295a6ec..06696cc5 100644 --- a/src/sort.c +++ b/src/sort.c @@ -202,7 +202,7 @@ void sortCommand(redisClient *c) { /* Load the sorting vector with all the objects to sort */ switch(sortval->type) { case REDIS_LIST: vectorlen = listTypeLength(sortval); break; - case REDIS_SET: vectorlen = dictSize((dict*)sortval->ptr); break; + case REDIS_SET: vectorlen = setTypeSize(sortval); break; case REDIS_ZSET: vectorlen = dictSize(((zset*)sortval->ptr)->dict); break; default: vectorlen = 0; redisPanic("Bad SORT type"); /* Avoid GCC warning */ } @@ -219,18 +219,20 @@ void sortCommand(redisClient *c) { j++; } listTypeReleaseIterator(li); - } else { - dict *set; + } else if (sortval->type == REDIS_SET) { + setIterator *si = setTypeInitIterator(sortval); + robj *ele; + while((ele = setTypeNext(si)) != NULL) { + vector[j].obj = ele; + vector[j].u.score = 0; + vector[j].u.cmpobj = NULL; + j++; + } + setTypeReleaseIterator(si); + } else if (sortval->type == REDIS_ZSET) { + dict *set = ((zset*)sortval->ptr)->dict; dictIterator *di; dictEntry *setele; - - if (sortval->type == REDIS_SET) { - set = sortval->ptr; - } else { - zset *zs = sortval->ptr; - set = zs->dict; - } - di = dictGetIterator(set); while((setele = dictNext(di)) != NULL) { vector[j].obj = dictGetEntryKey(setele); @@ -239,6 +241,8 @@ void sortCommand(redisClient *c) { j++; } dictReleaseIterator(di); + } else { + redisPanic("Unknown type"); } redisAssert(j == vectorlen); @@ -369,7 +373,7 @@ void sortCommand(redisClient *c) { } /* Cleanup */ - if (sortval->type == REDIS_LIST) + if (sortval->type == REDIS_LIST || sortval->type == REDIS_SET) for (j = 0; j < vectorlen; j++) decrRefCount(vector[j].obj); decrRefCount(sortval); diff --git a/tests/unit/sort.tcl b/tests/unit/sort.tcl index 46246351..bca01737 100644 --- a/tests/unit/sort.tcl +++ b/tests/unit/sort.tcl @@ -38,6 +38,7 @@ start_server { foreach {num cmd enc title} { 16 lpush ziplist "Ziplist" 64 lpush linkedlist "Linked list" + 16 sadd intset "Intset" 64 sadd hashtable "Hash table" } { set result [create_random_dataset $num $cmd] From cb72d0f155cb8faf6aa02f68318ba9b7477447c3 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Sat, 21 Aug 2010 11:25:13 +0200 Subject: [PATCH 037/139] Rename iterator to setTypeIterator for consistency --- src/debug.c | 2 +- src/redis.h | 8 ++++---- src/sort.c | 2 +- src/t_set.c | 14 +++++++------- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/debug.c b/src/debug.c index f58fcbae..76d18b21 100644 --- a/src/debug.c +++ b/src/debug.c @@ -119,7 +119,7 @@ void computeDatasetDigest(unsigned char *final) { } listTypeReleaseIterator(li); } else if (o->type == REDIS_SET) { - setIterator *si = setTypeInitIterator(o); + setTypeIterator *si = setTypeInitIterator(o); robj *ele; while((ele = setTypeNext(si)) != NULL) { xorObjectDigest(digest,ele); diff --git a/src/redis.h b/src/redis.h index 288c9069..0b78320c 100644 --- a/src/redis.h +++ b/src/redis.h @@ -545,7 +545,7 @@ typedef struct { int encoding; int ii; /* intset iterator */ dictIterator *di; -} setIterator; +} setTypeIterator; /* Structure to hold hash iteration abstration. Note that iteration over * hashes involves both fields and values. Because it is possible that @@ -734,9 +734,9 @@ robj *setTypeCreate(robj *value); int setTypeAdd(robj *subject, robj *value); int setTypeRemove(robj *subject, robj *value); int setTypeIsMember(robj *subject, robj *value); -setIterator *setTypeInitIterator(robj *subject); -void setTypeReleaseIterator(setIterator *si); -robj *setTypeNext(setIterator *si); +setTypeIterator *setTypeInitIterator(robj *subject); +void setTypeReleaseIterator(setTypeIterator *si); +robj *setTypeNext(setTypeIterator *si); robj *setTypeRandomElement(robj *subject); unsigned long setTypeSize(robj *subject); void setTypeConvert(robj *subject, int enc); diff --git a/src/sort.c b/src/sort.c index 06696cc5..aa1ce929 100644 --- a/src/sort.c +++ b/src/sort.c @@ -220,7 +220,7 @@ void sortCommand(redisClient *c) { } listTypeReleaseIterator(li); } else if (sortval->type == REDIS_SET) { - setIterator *si = setTypeInitIterator(sortval); + setTypeIterator *si = setTypeInitIterator(sortval); robj *ele; while((ele = setTypeNext(si)) != NULL) { vector[j].obj = ele; diff --git a/src/t_set.c b/src/t_set.c index bcb8dd3f..01c851ba 100644 --- a/src/t_set.c +++ b/src/t_set.c @@ -80,8 +80,8 @@ int setTypeIsMember(robj *subject, robj *value) { return 0; } -setIterator *setTypeInitIterator(robj *subject) { - setIterator *si = zmalloc(sizeof(setIterator)); +setTypeIterator *setTypeInitIterator(robj *subject) { + setTypeIterator *si = zmalloc(sizeof(setIterator)); si->subject = subject; si->encoding = subject->encoding; if (si->encoding == REDIS_ENCODING_HT) { @@ -94,7 +94,7 @@ setIterator *setTypeInitIterator(robj *subject) { return si; } -void setTypeReleaseIterator(setIterator *si) { +void setTypeReleaseIterator(setTypeIterator *si) { if (si->encoding == REDIS_ENCODING_HT) dictReleaseIterator(si->di); zfree(si); @@ -103,7 +103,7 @@ void setTypeReleaseIterator(setIterator *si) { /* Move to the next entry in the set. Returns the object at the current * position, or NULL when the end is reached. This object will have its * refcount incremented, so the caller needs to take care of this. */ -robj *setTypeNext(setIterator *si) { +robj *setTypeNext(setTypeIterator *si) { robj *ret = NULL; if (si->encoding == REDIS_ENCODING_HT) { dictEntry *de = dictNext(si->di); @@ -151,7 +151,7 @@ unsigned long setTypeSize(robj *subject) { * to a hashtable) is presized to hold the number of elements in the original * set. */ void setTypeConvert(robj *subject, int enc) { - setIterator *si; + setTypeIterator *si; robj *element; redisAssert(subject->type == REDIS_SET); @@ -319,7 +319,7 @@ int qsortCompareSetsByCardinality(const void *s1, const void *s2) { void sinterGenericCommand(redisClient *c, robj **setkeys, unsigned long setnum, robj *dstkey) { robj **sets = zmalloc(sizeof(robj*)*setnum); - setIterator *si; + setTypeIterator *si; robj *ele, *lenobj = NULL, *dstset = NULL; unsigned long j, cardinality = 0; @@ -419,7 +419,7 @@ void sinterstoreCommand(redisClient *c) { void sunionDiffGenericCommand(redisClient *c, robj **setkeys, int setnum, robj *dstkey, int op) { robj **sets = zmalloc(sizeof(robj*)*setnum); - setIterator *si; + setTypeIterator *si; robj *ele, *dstset = NULL; int j, cardinality = 0; From c91abdcd077f868a59290bc9d68fba3130a3121d Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 23 Aug 2010 17:06:38 +0200 Subject: [PATCH 038/139] Fixed overflow detection in argument to long convertion function in general, and in expire/ttl pairs specifically, addressing issue 54 --- src/db.c | 7 +++---- src/object.c | 4 +++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/db.c b/src/db.c index 0dec95b1..6d287d72 100644 --- a/src/db.c +++ b/src/db.c @@ -514,15 +514,14 @@ void expireatCommand(redisClient *c) { } void ttlCommand(redisClient *c) { - time_t expire; - int ttl = -1; + time_t expire, ttl = -1; expire = getExpire(c->db,c->argv[1]); if (expire != -1) { - ttl = (int) (expire-time(NULL)); + ttl = (expire-time(NULL)); if (ttl < 0) ttl = -1; } - addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",ttl)); + addReplyLongLong(c,(long long)ttl); } void persistCommand(redisClient *c) { diff --git a/src/object.c b/src/object.c index 21268340..429ac0ec 100644 --- a/src/object.c +++ b/src/object.c @@ -358,6 +358,8 @@ int getLongLongFromObject(robj *o, long long *target) { if (o->encoding == REDIS_ENCODING_RAW) { value = strtoll(o->ptr, &eptr, 10); if (eptr[0] != '\0') return REDIS_ERR; + if (errno == ERANGE && (value == LLONG_MIN || value == LLONG_MAX)) + return REDIS_ERR; } else if (o->encoding == REDIS_ENCODING_INT) { value = (long)o->ptr; } else { @@ -375,7 +377,7 @@ int getLongLongFromObjectOrReply(redisClient *c, robj *o, long long *target, con if (msg != NULL) { addReplySds(c, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg)); } else { - addReplySds(c, sdsnew("-ERR value is not an integer\r\n")); + addReplySds(c, sdsnew("-ERR value is not an integer or out of range\r\n")); } return REDIS_ERR; } From e19387302522a81d987bedef98d8961dd7ff06a9 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 24 Aug 2010 10:10:01 +0200 Subject: [PATCH 039/139] changed the comments on top of redis-copy.rb to reflect what the program really does --- utils/redis-copy.rb | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/utils/redis-copy.rb b/utils/redis-copy.rb index af214b79..d892e377 100644 --- a/utils/redis-copy.rb +++ b/utils/redis-copy.rb @@ -1,12 +1,10 @@ -# redis-sha1.rb - Copyright (C) 2009 Salvatore Sanfilippo +# redis-copy.rb - Copyright (C) 2009-2010 Salvatore Sanfilippo # BSD license, See the COPYING file for more information. # -# Performs the SHA1 sum of the whole datset. -# This is useful to spot bugs in persistence related code and to make sure -# Slaves and Masters are in SYNC. +# Copy the whole dataset from one Redis instance to another one # -# If you hack this code make sure to sort keys and set elements as this are -# unsorted elements. Otherwise the sum may differ with equal dataset. +# WARNING: currently hashes and sorted sets are not supported! This +# program should be updated. require 'rubygems' require 'redis' From a679185aa515e2f52d8a0f66c3972eb8f43d7fae Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 24 Aug 2010 11:45:05 +0200 Subject: [PATCH 040/139] sanity check for the bulk argument in protocol parsing code, fixing issue 146 --- src/redis.c | 17 +++++++++++++---- src/redis.h | 2 +- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/redis.c b/src/redis.c index 1a581a92..eade7868 100644 --- a/src/redis.c +++ b/src/redis.c @@ -912,9 +912,14 @@ int processCommand(redisClient *c) { resetClient(c); return 1; } else { - int bulklen = atoi(((char*)c->argv[0]->ptr)+1); + char *eptr; + long bulklen = strtol(((char*)c->argv[0]->ptr)+1,&eptr,10); + int perr = eptr[0] != '\0'; + decrRefCount(c->argv[0]); - if (bulklen < 0 || bulklen > 1024*1024*1024) { + if (perr || bulklen == LONG_MIN || bulklen == LONG_MAX || + bulklen < 0 || bulklen > 1024*1024*1024) + { c->argc--; addReplySds(c,sdsnew("-ERR invalid bulk write count\r\n")); resetClient(c); @@ -984,10 +989,14 @@ int processCommand(redisClient *c) { return 1; } else if (cmd->flags & REDIS_CMD_BULK && c->bulklen == -1) { /* This is a bulk command, we have to read the last argument yet. */ - int bulklen = atoi(c->argv[c->argc-1]->ptr); + char *eptr; + long bulklen = strtol(c->argv[c->argc-1]->ptr,&eptr,10); + int perr = eptr[0] != '\0'; decrRefCount(c->argv[c->argc-1]); - if (bulklen < 0 || bulklen > 1024*1024*1024) { + if (perr || bulklen == LONG_MAX || bulklen == LONG_MIN || + bulklen < 0 || bulklen > 1024*1024*1024) + { c->argc--; addReplySds(c,sdsnew("-ERR invalid bulk write count\r\n")); resetClient(c); diff --git a/src/redis.h b/src/redis.h index 781fb209..c35fe53a 100644 --- a/src/redis.h +++ b/src/redis.h @@ -283,7 +283,7 @@ typedef struct redisClient { sds querybuf; robj **argv, **mbargv; int argc, mbargc; - int bulklen; /* bulk read len. -1 if not in bulk read mode */ + long bulklen; /* bulk read len. -1 if not in bulk read mode */ int multibulk; /* multi bulk command format active */ list *reply; int sentlen; From 01daeecee7a93b92e10347fc2613b8ee22de751e Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 24 Aug 2010 11:49:05 +0200 Subject: [PATCH 041/139] added tests for invalid bulk argument --- tests/unit/protocol.tcl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/unit/protocol.tcl b/tests/unit/protocol.tcl index 8717cd9f..9eebf77f 100644 --- a/tests/unit/protocol.tcl +++ b/tests/unit/protocol.tcl @@ -27,6 +27,13 @@ start_server {} { gets $fd } {*invalid bulk*count*} + test {bulk payload is not a number} { + set fd [r channel] + puts -nonewline $fd "SET x blabla\r\n" + flush $fd + gets $fd + } {*invalid bulk*count*} + test {Multi bulk request not followed by bulk args} { set fd [r channel] puts -nonewline $fd "*1\r\nfoo\r\n" From e452436a07224022df17c59d6dbfbd47dcfc7fd6 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 24 Aug 2010 12:10:59 +0200 Subject: [PATCH 042/139] BLPOPping clients are no longer subject to connection timeouts, fixing issues 155 --- src/networking.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/networking.c b/src/networking.c index e5a66984..10b9580e 100644 --- a/src/networking.c +++ b/src/networking.c @@ -466,6 +466,7 @@ void closeTimedoutClients(void) { if (server.maxidletime && !(c->flags & REDIS_SLAVE) && /* no timeout for slaves */ !(c->flags & REDIS_MASTER) && /* no timeout for masters */ + !(c->flags & REDIS_BLOCKED) && /* no timeout for BLPOP */ dictSize(c->pubsub_channels) == 0 && /* no timeout for pubsub */ listLength(c->pubsub_patterns) == 0 && (now - c->lastinteraction > server.maxidletime)) From 778b2210a939083070abaea4b7fc62ebf2ad9bfb Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 24 Aug 2010 16:04:13 +0200 Subject: [PATCH 043/139] slave with attached slaves now close the conection to all the slaves when the connection to the master is lost. Now a slave without a connected link to the master will refuse SYNC from other slaves. Enhanced the replication error reporting. All this will fix Issue 156 --- src/networking.c | 14 +++++++++++++- src/replication.c | 14 +++++++++++++- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/src/networking.c b/src/networking.c index 10b9580e..a39be7c4 100644 --- a/src/networking.c +++ b/src/networking.c @@ -255,7 +255,8 @@ void freeClient(redisClient *c) { server.vm_blocked_clients--; } listRelease(c->io_keys); - /* Master/slave cleanup */ + /* Master/slave cleanup. + * Case 1: we lost the connection with a slave. */ if (c->flags & REDIS_SLAVE) { if (c->replstate == REDIS_REPL_SEND_BULK && c->repldbfd != -1) close(c->repldbfd); @@ -264,9 +265,20 @@ void freeClient(redisClient *c) { redisAssert(ln != NULL); listDelNode(l,ln); } + + /* Case 2: we lost the connection with the master. */ if (c->flags & REDIS_MASTER) { server.master = NULL; server.replstate = REDIS_REPL_CONNECT; + /* Since we lost the connection with the master, we should also + * close the connection with all our slaves if we have any, so + * when we'll resync with the master the other slaves will sync again + * with us as well. Note that also when the slave is not connected + * to the master it will keep refusing connections by other slaves. */ + while (listLength(server.slaves)) { + ln = listFirst(server.slaves); + freeClient((redisClient*)ln->value); + } } /* Release memory */ zfree(c->argv); diff --git a/src/replication.c b/src/replication.c index 5387db91..89375820 100644 --- a/src/replication.c +++ b/src/replication.c @@ -176,6 +176,13 @@ void syncCommand(redisClient *c) { /* ignore SYNC if aleady slave or in monitor mode */ if (c->flags & REDIS_SLAVE) return; + /* Refuse SYNC requests if we are a slave but the link with our master + * is not ok... */ + if (server.masterhost && server.replstate != REDIS_REPL_CONNECTED) { + addReplySds(c,sdsnew("-ERR Can't SYNC while not connected with my master\r\n")); + return; + } + /* SYNC can't be issued when the server has pending data to send to * the client about already issued commands. We need a fresh reply * buffer registering the differences between the BGSAVE and the current @@ -392,7 +399,12 @@ int syncWithMaster(void) { strerror(errno)); return REDIS_ERR; } - if (buf[0] != '$') { + if (buf[0] == '-') { + close(fd); + redisLog(REDIS_WARNING,"MASTER aborted replication with an error: %s", + buf+1); + return REDIS_ERR; + } else if (buf[0] != '$') { close(fd); redisLog(REDIS_WARNING,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?"); return REDIS_ERR; From b91d605a35c294573f0213c89c421d09b538c2b6 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 24 Aug 2010 16:25:00 +0200 Subject: [PATCH 044/139] slave now detect lost connection during SYNC, fixing Issue 173 --- src/replication.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/replication.c b/src/replication.c index 89375820..363ce54a 100644 --- a/src/replication.c +++ b/src/replication.c @@ -428,9 +428,9 @@ int syncWithMaster(void) { int nread, nwritten; nread = read(fd,buf,(dumpsize < 1024)?dumpsize:1024); - if (nread == -1) { + if (nread <= 0) { redisLog(REDIS_WARNING,"I/O error trying to sync with MASTER: %s", - strerror(errno)); + (nread == -1) ? strerror(errno) : "connection lost"); close(fd); close(dfd); return REDIS_ERR; From 695fe87456ac4e5ed14e4a853b9cce61fb3e5975 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 24 Aug 2010 17:09:25 +0200 Subject: [PATCH 045/139] The pid file is now created only after the server is correctly initialied. It is also removed on sigterm and when the stack trace is produced after a sigbus or a sigsegv. This two changes should fix the Issue 175 --- src/redis.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/src/redis.c b/src/redis.c index eade7868..0ee7a20b 100644 --- a/src/redis.c +++ b/src/redis.c @@ -1085,11 +1085,7 @@ int prepareForShutdown() { if (server.vm_enabled) unlink(server.vm_swap_file); } else { /* Snapshotting. Perform a SYNC SAVE and exit */ - if (rdbSave(server.dbfilename) == REDIS_OK) { - if (server.daemonize) - unlink(server.pidfile); - redisLog(REDIS_WARNING,"%zu bytes used at exit",zmalloc_used_memory()); - } else { + if (rdbSave(server.dbfilename) != REDIS_OK) { /* Ooops.. error saving! The best we can do is to continue * operating. Note that if there was a background saving process, * in the next cron() Redis will be notified that the background @@ -1099,6 +1095,7 @@ int prepareForShutdown() { return REDIS_ERR; } } + if (server.daemonize) unlink(server.pidfile); redisLog(REDIS_WARNING,"Server exit now, bye bye..."); return REDIS_OK; } @@ -1371,9 +1368,17 @@ void linuxOvercommitMemoryWarning(void) { } #endif /* __linux__ */ +void createPidFile(void) { + /* Try to write the pid file in a best-effort way. */ + FILE *fp = fopen(server.pidfile,"w"); + if (fp) { + fprintf(fp,"%d\n",getpid()); + fclose(fp); + } +} + void daemonize(void) { int fd; - FILE *fp; if (fork() != 0) exit(0); /* parent exits */ setsid(); /* create a new session */ @@ -1387,12 +1392,6 @@ void daemonize(void) { dup2(fd, STDERR_FILENO); if (fd > STDERR_FILENO) close(fd); } - /* Try to write the pid file */ - fp = fopen(server.pidfile,"w"); - if (fp) { - fprintf(fp,"%d\n",getpid()); - fclose(fp); - } } void version() { @@ -1425,6 +1424,7 @@ int main(int argc, char **argv) { } if (server.daemonize) daemonize(); initServer(); + if (server.daemonize) createPidFile(); redisLog(REDIS_NOTICE,"Server started, Redis version " REDIS_VERSION); #ifdef __linux__ linuxOvercommitMemoryWarning(); @@ -1501,6 +1501,7 @@ void segvHandler(int sig, siginfo_t *info, void *secret) { redisLog(REDIS_WARNING,"%s", messages[i]); /* free(messages); Don't call free() with possibly corrupted memory. */ + if (server.daemonize) unlink(server.pidfile); _exit(0); } From b37ca6edb10faa0ebcf54a7d23cee31d895fe5b1 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 24 Aug 2010 18:08:09 +0200 Subject: [PATCH 046/139] Issue 179 fixed, now redis-cli is able to parse correctly multi bulk replies with elements that are errors --- src/redis-cli.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index b4a10890..007ebcde 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -169,6 +169,7 @@ static int cliReadBulkReply(int fd) { static int cliReadMultiBulkReply(int fd) { sds replylen = cliReadLine(fd); int elements, c = 1; + int retval = 0; if (replylen == NULL) return 1; elements = atoi(replylen); @@ -182,10 +183,10 @@ static int cliReadMultiBulkReply(int fd) { } while(elements--) { printf("%d. ", c); - if (cliReadReply(fd)) return 1; + if (cliReadReply(fd)) retval = 1; c++; } - return 0; + return retval; } static int cliReadReply(int fd) { From c0b3d42372dbe67c6ef096372869e2b60d4a1cdc Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 24 Aug 2010 18:39:34 +0200 Subject: [PATCH 047/139] redis-cli now supports automatically reconnection in interactive mode --- src/redis-cli.c | 43 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 007ebcde..a2a909ba 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "anet.h" #include "sds.h" @@ -67,11 +68,14 @@ static struct config { static int cliReadReply(int fd); static void usage(); -static int cliConnect(void) { +/* Connect to the client. If force is not zero the connection is performed + * even if there is already a connected socket. */ +static int cliConnect(int force) { char err[ANET_ERR_LEN]; static int fd = ANET_ERR; - if (fd == ANET_ERR) { + if (fd == ANET_ERR || force) { + if (force) close(fd); fd = anetTcpConnect(err,config.hostip,config.hostport); if (fd == ANET_ERR) { fprintf(stderr, "Could not connect to Redis at %s:%d: %s", config.hostip, config.hostport, err); @@ -191,10 +195,18 @@ static int cliReadMultiBulkReply(int fd) { static int cliReadReply(int fd) { char type; + int nread; - if (anetRead(fd,&type,1) <= 0) { + if ((nread = anetRead(fd,&type,1)) <= 0) { if (config.shutdown) return 0; - exit(1); + if (config.interactive && + (nread == 0 || (nread == -1 && errno == ECONNRESET))) + { + return ECONNRESET; + } else { + printf("I/O error while reading from socket: %s",strerror(errno)); + exit(1); + } } switch(type) { case '-': @@ -246,7 +258,7 @@ static int cliSendCommand(int argc, char **argv, int repeat) { if (!strcasecmp(command,"monitor")) config.monitor_mode = 1; if (!strcasecmp(command,"subscribe") || !strcasecmp(command,"psubscribe")) config.pubsub_mode = 1; - if ((fd = cliConnect()) == -1) return 1; + if ((fd = cliConnect(0)) == -1) return 1; /* Select db number */ retval = selectDb(fd); @@ -381,9 +393,21 @@ static void repl() { if (argc > 0) { if (strcasecmp(argv[0],"quit") == 0 || strcasecmp(argv[0],"exit") == 0) - exit(0); - else - cliSendCommand(argc, argv, 1); + { + exit(0); + } else { + int err; + + if ((err = cliSendCommand(argc, argv, 1)) != 0) { + if (err == ECONNRESET) { + printf("Reconnecting... "); + fflush(stdout); + if (cliConnect(1) == -1) exit(1); + printf("OK\n"); + cliSendCommand(argc,argv,1); + } + } + } } /* Free the argument vector */ for (j = 0; j < argc; j++) @@ -431,7 +455,8 @@ int main(int argc, char **argv) { cliSendCommand(2, convertToSds(2, authargv), 1); } - if (argc == 0 || config.interactive == 1) repl(); + if (argc == 0) config.interactive = 1; + if (config.interactive) repl(); argvcopy = convertToSds(argc+1, argv); if (config.argn_from_stdin) { From 5d15b5207d3fb83b4815df596251f553b7a811d5 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Wed, 25 Aug 2010 10:05:50 +0200 Subject: [PATCH 048/139] Re-introduce the interactive field so we can reconnect in interactive mode --- src/redis-cli.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/redis-cli.c b/src/redis-cli.c index 1bd0798b..33919758 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -56,6 +56,7 @@ static struct config { long repeat; int dbnum; int argn_from_stdin; + int interactive; int shutdown; int monitor_mode; int pubsub_mode; @@ -393,6 +394,7 @@ static void repl() { char *line; sds *argv; + config.interactive = 1; while((line = linenoise("redis> ")) != NULL) { if (line[0] != '\0') { argv = sdssplitargs(line,&argc); @@ -440,6 +442,7 @@ int main(int argc, char **argv) { config.repeat = 1; config.dbnum = 0; config.argn_from_stdin = 0; + config.interactive = 0; config.shutdown = 0; config.monitor_mode = 0; config.pubsub_mode = 0; From ae77016e572bc5ed48574c3a173c4cda27c5e0d9 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Wed, 25 Aug 2010 13:39:11 +0200 Subject: [PATCH 049/139] Add a newline to tty output after every reply --- src/redis-cli.c | 10 +++------- tests/integration/redis-cli.tcl | 2 ++ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 33919758..df4c2517 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -226,7 +226,7 @@ static int cliReadReply(int fd) { case '*': return cliReadMultiBulkReply(fd); default: - printf("protocol error, got '%c' as reply type byte\n", type); + printf("protocol error, got '%c' as reply type byte", type); return 1; } } @@ -294,12 +294,8 @@ static int cliSendCommand(int argc, char **argv, int repeat) { } retval = cliReadReply(fd); - if (retval) { - return retval; - } - if (!config.raw_output && config.tty) { - printf("\n"); - } + if (!config.raw_output && config.tty) printf("\n"); + if (retval) return retval; } return 0; } diff --git a/tests/integration/redis-cli.tcl b/tests/integration/redis-cli.tcl index c4954304..4f180daf 100644 --- a/tests/integration/redis-cli.tcl +++ b/tests/integration/redis-cli.tcl @@ -36,9 +36,11 @@ start_server {tags {"cli"}} { } proc test_interactive_cli {name code} { + set ::env(FAKETTY) 1 set fd [open_cli] test "Interactive CLI: $name" $code close_cli $fd + unset ::env(FAKETTY) } proc run_nontty_cli {args} { From f9b252613bcdaf81183a1ed2fb12d1a9feabc183 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Wed, 25 Aug 2010 14:08:32 +0200 Subject: [PATCH 050/139] Comments in redis-cli tests --- tests/integration/redis-cli.tcl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/integration/redis-cli.tcl b/tests/integration/redis-cli.tcl index 4f180daf..a0df2ea1 100644 --- a/tests/integration/redis-cli.tcl +++ b/tests/integration/redis-cli.tcl @@ -28,6 +28,7 @@ start_server {tags {"cli"}} { flush $fd } + # Helpers to run tests in interactive mode proc run_command {fd cmd} { write_cli $fd $cmd set lines [split [read_cli $fd] "\n"] @@ -43,6 +44,7 @@ start_server {tags {"cli"}} { unset ::env(FAKETTY) } + # Helpers to run tests where stdout is not a tty proc run_nontty_cli {args} { set fd [open [format "|src/redis-cli -p %d -n 9 $args" [srv port]] "r"] fconfigure $fd -buffering none @@ -56,6 +58,7 @@ start_server {tags {"cli"}} { test "Non-interactive non-TTY CLI: $name" $code } + # Helpers to run tests where stdout is a tty proc run_tty_cli {args} { set ::env(FAKETTY) 1 set resp [run_nontty_cli {*}$args] From f791d66e205b7449ee1829e60c2422caa5988192 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Wed, 25 Aug 2010 14:15:41 +0200 Subject: [PATCH 051/139] Make helper functions simpler --- tests/integration/redis-cli.tcl | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/tests/integration/redis-cli.tcl b/tests/integration/redis-cli.tcl index a0df2ea1..3413e8a2 100644 --- a/tests/integration/redis-cli.tcl +++ b/tests/integration/redis-cli.tcl @@ -45,7 +45,7 @@ start_server {tags {"cli"}} { } # Helpers to run tests where stdout is not a tty - proc run_nontty_cli {args} { + proc run_cli {args} { set fd [open [format "|src/redis-cli -p %d -n 9 $args" [srv port]] "r"] fconfigure $fd -buffering none fconfigure $fd -translation binary @@ -58,16 +58,11 @@ start_server {tags {"cli"}} { test "Non-interactive non-TTY CLI: $name" $code } - # Helpers to run tests where stdout is a tty - proc run_tty_cli {args} { - set ::env(FAKETTY) 1 - set resp [run_nontty_cli {*}$args] - unset ::env(FAKETTY) - set _ $resp - } - + # Helpers to run tests where stdout is a tty (fake it) proc test_tty_cli {name code} { + set ::env(FAKETTY) 1 test "Non-interactive TTY CLI: $name" $code + unset ::env(FAKETTY) } test_interactive_cli "INFO response should be printed raw" { @@ -116,46 +111,46 @@ start_server {tags {"cli"}} { } test_tty_cli "Status reply" { - assert_equal "OK\n" [run_tty_cli set key bar] + assert_equal "OK\n" [run_cli set key bar] assert_equal "bar" [r get key] } test_tty_cli "Integer reply" { r del counter - assert_equal "(integer) 1\n" [run_tty_cli incr counter] + assert_equal "(integer) 1\n" [run_cli incr counter] } test_tty_cli "Bulk reply" { r set key "tab\tnewline\n" - assert_equal "\"tab\\tnewline\\n\"\n" [run_tty_cli get key] + assert_equal "\"tab\\tnewline\\n\"\n" [run_cli get key] } test_tty_cli "Multi-bulk reply" { r del list r rpush list foo r rpush list bar - assert_equal "1. \"foo\"\n2. \"bar\"\n" [run_tty_cli lrange list 0 -1] + assert_equal "1. \"foo\"\n2. \"bar\"\n" [run_cli lrange list 0 -1] } test_nontty_cli "Status reply" { - assert_equal "OK" [run_nontty_cli set key bar] + assert_equal "OK" [run_cli set key bar] assert_equal "bar" [r get key] } test_nontty_cli "Integer reply" { r del counter - assert_equal "1" [run_nontty_cli incr counter] + assert_equal "1" [run_cli incr counter] } test_nontty_cli "Bulk reply" { r set key "tab\tnewline\n" - assert_equal "tab\tnewline\n" [run_nontty_cli get key] + assert_equal "tab\tnewline\n" [run_cli get key] } test_nontty_cli "Multi-bulk reply" { r del list r rpush list foo r rpush list bar - assert_equal "foo\nbar" [run_nontty_cli lrange list 0 -1] + assert_equal "foo\nbar" [run_cli lrange list 0 -1] } } From b4b62c34dbc97b8bcb6c84e0d0cef1d27117175e Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Wed, 25 Aug 2010 14:48:50 +0200 Subject: [PATCH 052/139] Use fstat to detect if stdin was redirected --- src/redis-cli.c | 37 ++++++++++++++-------- tests/integration/redis-cli.tcl | 56 +++++++++++++++++++++++++++++++-- 2 files changed, 78 insertions(+), 15 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index df4c2517..8b7d0777 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -37,6 +37,7 @@ #include #include #include +#include #include "anet.h" #include "sds.h" @@ -55,7 +56,6 @@ static struct config { int hostport; long repeat; int dbnum; - int argn_from_stdin; int interactive; int shutdown; int monitor_mode; @@ -334,7 +334,11 @@ static int parseOptions(int argc, char **argv) { "by default when redis-cli is executed without a command to execute.\n" ); } else if (!strcmp(argv[i],"-c")) { - config.argn_from_stdin = 1; + fprintf(stderr, +"Reading last argument from standard input using -c is deprecated.\n" +"When standard input is connected to a pipe or regular file, it is\n" +"automatically used as last argument.\n" + ); } else if (!strcmp(argv[i],"-v")) { printf("redis-cli shipped with Redis verison %s\n", REDIS_VERSION); exit(0); @@ -364,7 +368,7 @@ static sds readArgFromStdin(void) { static void usage() { fprintf(stderr, "usage: redis-cli [-iv] [-h host] [-p port] [-a authpw] [-r repeat_times] [-n db_num] cmd arg1 arg2 arg3 ... argN\n"); - fprintf(stderr, "usage: echo \"argN\" | redis-cli -c [-h host] [-p port] [-a authpw] [-r repeat_times] [-n db_num] cmd arg1 arg2 ... arg(N-1)\n"); + fprintf(stderr, "usage: echo \"argN\" | redis-cli [-h host] [-p port] [-a authpw] [-r repeat_times] [-n db_num] cmd arg1 arg2 ... arg(N-1)\n"); fprintf(stderr, "\nIf a pipe from standard input is detected this data is used as last argument.\n\n"); fprintf(stderr, "example: cat /etc/passwd | redis-cli set my_passwd\n"); fprintf(stderr, "example: redis-cli get my_passwd\n"); @@ -429,15 +433,28 @@ static void repl() { exit(0); } +static int noninteractive(int argc, char **argv) { + int retval = 0; + struct stat s; + fstat(fileno(stdin), &s); + if (S_ISFIFO(s.st_mode) || S_ISREG(s.st_mode)) { /* pipe, regular file */ + argv = zrealloc(argv, (argc+1)*sizeof(char*)); + argv[argc] = readArgFromStdin(); + retval = cliSendCommand(argc+1, argv, config.repeat); + } else { + /* stdin is probably a tty, can be tested with S_ISCHR(s.st_mode) */ + retval = cliSendCommand(argc, argv, config.repeat); + } + return retval; +} + int main(int argc, char **argv) { int firstarg; - char **argvcopy; config.hostip = "127.0.0.1"; config.hostport = 6379; config.repeat = 1; config.dbnum = 0; - config.argn_from_stdin = 0; config.interactive = 0; config.shutdown = 0; config.monitor_mode = 0; @@ -468,12 +485,6 @@ int main(int argc, char **argv) { /* Start interactive mode when no command is provided */ if (argc == 0) repl(); - - argvcopy = convertToSds(argc+1, argv); - if (config.argn_from_stdin) { - sds lastarg = readArgFromStdin(); - argvcopy[argc] = lastarg; - argc++; - } - return cliSendCommand(argc, argvcopy, config.repeat); + /* Otherwise, we have some arguments to execute */ + return noninteractive(argc,convertToSds(argc,argv)); } diff --git a/tests/integration/redis-cli.tcl b/tests/integration/redis-cli.tcl index 3413e8a2..40e4222e 100644 --- a/tests/integration/redis-cli.tcl +++ b/tests/integration/redis-cli.tcl @@ -45,8 +45,26 @@ start_server {tags {"cli"}} { } # Helpers to run tests where stdout is not a tty - proc run_cli {args} { - set fd [open [format "|src/redis-cli -p %d -n 9 $args" [srv port]] "r"] + proc write_tmpfile {contents} { + set tmp [tmpfile "cli"] + set tmpfd [open $tmp "w"] + puts -nonewline $tmpfd $contents + close $tmpfd + set _ $tmp + } + + proc _run_cli {opts args} { + set cmd [format "src/redis-cli -p %d -n 9 $args" [srv port]] + foreach {key value} $opts { + if {$key eq "pipe"} { + set cmd "sh -c \"$value | $cmd\"" + } + if {$key eq "path"} { + set cmd "$cmd < $value" + } + } + + set fd [open "|$cmd" "r"] fconfigure $fd -buffering none fconfigure $fd -translation binary set resp [read $fd 1048576] @@ -54,6 +72,18 @@ start_server {tags {"cli"}} { set _ $resp } + proc run_cli {args} { + _run_cli {} {*}$args + } + + proc run_cli_with_input_pipe {cmd args} { + _run_cli [list pipe $cmd] {*}$args + } + + proc run_cli_with_input_file {path args} { + _run_cli [list path $path] {*}$args + } + proc test_nontty_cli {name code} { test "Non-interactive non-TTY CLI: $name" $code } @@ -132,6 +162,17 @@ start_server {tags {"cli"}} { assert_equal "1. \"foo\"\n2. \"bar\"\n" [run_cli lrange list 0 -1] } + test_tty_cli "Read last argument from pipe" { + assert_equal "OK\n" [run_cli_with_input_pipe "echo foo" set key] + assert_equal "foo\n" [r get key] + } + + test_tty_cli "Read last argument from file" { + set tmpfile [write_tmpfile "from file"] + assert_equal "OK\n" [run_cli_with_input_file $tmpfile set key] + assert_equal "from file" [r get key] + } + test_nontty_cli "Status reply" { assert_equal "OK" [run_cli set key bar] assert_equal "bar" [r get key] @@ -153,4 +194,15 @@ start_server {tags {"cli"}} { r rpush list bar assert_equal "foo\nbar" [run_cli lrange list 0 -1] } + + test_nontty_cli "Read last argument from pipe" { + assert_equal "OK" [run_cli_with_input_pipe "echo foo" set key] + assert_equal "foo\n" [r get key] + } + + test_nontty_cli "Read last argument from file" { + set tmpfile [write_tmpfile "from file"] + assert_equal "OK" [run_cli_with_input_file $tmpfile set key] + assert_equal "from file" [r get key] + } } From 76864d5626e6dc7d436df5c11901844a4c05740e Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 26 Aug 2010 11:06:30 +0200 Subject: [PATCH 053/139] Expand macro's to functions for readability --- src/intset.c | 89 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 53 insertions(+), 36 deletions(-) diff --git a/src/intset.c b/src/intset.c index 2532582e..a837592d 100644 --- a/src/intset.c +++ b/src/intset.c @@ -10,21 +10,38 @@ #define INTSET_ENC_INT32 (sizeof(int32_t)) #define INTSET_ENC_INT64 (sizeof(int64_t)) -/* Accessors for each type of encoding */ -#define INTSET_VALUE_ENCODING(__val) (((__val) < INT32_MIN || (__val) > INT32_MAX) ? \ - INTSET_ENC_INT64 : (((__val) < INT16_MIN || (__val) > INT16_MAX) ? \ - INTSET_ENC_INT32 : INTSET_ENC_INT16)) -#define INTSET_GET_ENCODED(__is,__pos,__enc) ((__enc == INTSET_ENC_INT64) ? \ - ((int64_t*)(__is)->contents)[__pos] : ((__enc == INTSET_ENC_INT32) ? \ - ((int32_t*)(__is)->contents)[__pos] : ((int16_t*)(__is)->contents)[__pos])) -#define INTSET_GET(__is,__pos) (INTSET_GET_ENCODED(__is,__pos,(__is)->encoding)) -#define INTSET_SET(__is,__pos,__val) { \ - if ((__is)->encoding == INTSET_ENC_INT64) \ - ((int64_t*)(__is)->contents)[__pos] = (__val); \ - else if ((__is)->encoding == INTSET_ENC_INT32) \ - ((int32_t*)(__is)->contents)[__pos] = (__val); \ - else \ - ((int16_t*)(__is)->contents)[__pos] = (__val); } +/* Return the required encoding for the provided value. */ +static uint8_t _intsetValueEncoding(int64_t v) { + if (v < INT32_MIN || v > INT32_MAX) + return INTSET_ENC_INT64; + else if (v < INT16_MIN || v > INT16_MAX) + return INTSET_ENC_INT32; + return INTSET_ENC_INT16; +} + +/* Return the value at pos, given an encoding. */ +static int64_t _intsetGetEncoded(intset *is, int pos, uint8_t enc) { + if (enc == INTSET_ENC_INT64) + return ((int64_t*)is->contents)[pos]; + else if (enc == INTSET_ENC_INT32) + return ((int32_t*)is->contents)[pos]; + return ((int16_t*)is->contents)[pos]; +} + +/* Return the value at pos, using the configured encoding. */ +static int64_t _intsetGet(intset *is, int pos) { + return _intsetGetEncoded(is,pos,is->encoding); +} + +/* Set the value at pos, using the configured encoding. */ +static void _intsetSet(intset *is, int pos, int64_t value) { + if (is->encoding == INTSET_ENC_INT64) + ((int64_t*)is->contents)[pos] = value; + else if (is->encoding == INTSET_ENC_INT32) + ((int32_t*)is->contents)[pos] = value; + else + ((int16_t*)is->contents)[pos] = value; +} /* Create an empty intset. */ intset *intsetNew(void) { @@ -51,7 +68,7 @@ static intset *intsetUpgrade(intset *is, uint8_t newenc, uint8_t extra, uint8_t /* Upgrade back-to-front so we don't overwrite values */ while(length--) - INTSET_SET(is,length+offset,INTSET_GET_ENCODED(is,length,curenc)); + _intsetSet(is,length+offset,_intsetGetEncoded(is,length,curenc)); return is; } @@ -70,10 +87,10 @@ static uint8_t intsetSearch(intset *is, int64_t value, uint32_t *pos) { } else { /* Check for the case where we know we cannot find the value, * but do know the insert position. */ - if (value > INTSET_GET(is,is->length-1)) { + if (value > _intsetGet(is,is->length-1)) { if (pos) *pos = is->length; return 0; - } else if (value < INTSET_GET(is,0)) { + } else if (value < _intsetGet(is,0)) { if (pos) *pos = 0; return 0; } @@ -81,7 +98,7 @@ static uint8_t intsetSearch(intset *is, int64_t value, uint32_t *pos) { while(max >= min) { mid = (min+max)/2; - cur = INTSET_GET(is,mid); + cur = _intsetGet(is,mid); if (value > cur) { min = mid+1; } else if (value < cur) { @@ -121,7 +138,7 @@ static void intsetMoveTail(intset *is, uint32_t from, uint32_t to) { /* Insert an integer in the intset */ intset *intsetAdd(intset *is, int64_t value, uint8_t *success) { - uint8_t valenc = INTSET_VALUE_ENCODING(value); + uint8_t valenc = _intsetValueEncoding(value); uint32_t pos, offset; if (success) *success = 1; @@ -145,14 +162,14 @@ intset *intsetAdd(intset *is, int64_t value, uint8_t *success) { if (pos < is->length) intsetMoveTail(is,pos,pos+1); } - INTSET_SET(is,pos,value); + _intsetSet(is,pos,value); is->length++; return is; } /* Delete integer from intset */ intset *intsetRemove(intset *is, int64_t value, uint8_t *success) { - uint8_t valenc = INTSET_VALUE_ENCODING(value); + uint8_t valenc = _intsetValueEncoding(value); uint32_t pos; if (success) *success = 0; @@ -170,20 +187,20 @@ intset *intsetRemove(intset *is, int64_t value, uint8_t *success) { /* Determine whether a value belongs to this set */ uint8_t intsetFind(intset *is, int64_t value) { - uint8_t valenc = INTSET_VALUE_ENCODING(value); + uint8_t valenc = _intsetValueEncoding(value); return valenc <= is->encoding && intsetSearch(is,value,NULL); } /* Return random member */ int64_t intsetRandom(intset *is) { - return INTSET_GET(is,rand()%is->length); + return _intsetGet(is,rand()%is->length); } /* Sets the value to the value at the given position. When this position is * out of range the function returns 0, when in range it returns 1. */ uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value) { if (pos < is->length) { - *value = INTSET_GET(is,pos); + *value = _intsetGet(is,pos); return 1; } return 0; @@ -200,7 +217,7 @@ uint32_t intsetLen(intset *is) { void intsetRepr(intset *is) { int i; for (i = 0; i < is->length; i++) { - printf("%lld\n", (uint64_t)INTSET_GET(is,i)); + printf("%lld\n", (uint64_t)_intsetGet(is,i)); } printf("\n"); } @@ -266,16 +283,16 @@ int main(int argc, char **argv) { sranddev(); printf("Value encodings: "); { - assert(INTSET_VALUE_ENCODING(-32768) == INTSET_ENC_INT16); - assert(INTSET_VALUE_ENCODING(+32767) == INTSET_ENC_INT16); - assert(INTSET_VALUE_ENCODING(-32769) == INTSET_ENC_INT32); - assert(INTSET_VALUE_ENCODING(+32768) == INTSET_ENC_INT32); - assert(INTSET_VALUE_ENCODING(-2147483648) == INTSET_ENC_INT32); - assert(INTSET_VALUE_ENCODING(+2147483647) == INTSET_ENC_INT32); - assert(INTSET_VALUE_ENCODING(-2147483649) == INTSET_ENC_INT64); - assert(INTSET_VALUE_ENCODING(+2147483648) == INTSET_ENC_INT64); - assert(INTSET_VALUE_ENCODING(-9223372036854775808ull) == INTSET_ENC_INT64); - assert(INTSET_VALUE_ENCODING(+9223372036854775807ull) == INTSET_ENC_INT64); + assert(_intsetValueEncoding(-32768) == INTSET_ENC_INT16); + assert(_intsetValueEncoding(+32767) == INTSET_ENC_INT16); + assert(_intsetValueEncoding(-32769) == INTSET_ENC_INT32); + assert(_intsetValueEncoding(+32768) == INTSET_ENC_INT32); + assert(_intsetValueEncoding(-2147483648) == INTSET_ENC_INT32); + assert(_intsetValueEncoding(+2147483647) == INTSET_ENC_INT32); + assert(_intsetValueEncoding(-2147483649) == INTSET_ENC_INT64); + assert(_intsetValueEncoding(+2147483648) == INTSET_ENC_INT64); + assert(_intsetValueEncoding(-9223372036854775808ull) == INTSET_ENC_INT64); + assert(_intsetValueEncoding(+9223372036854775807ull) == INTSET_ENC_INT64); ok(); } From f9d5c4e33c8b03d20bd9e4ec145792c000a7210f Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 26 Aug 2010 11:22:58 +0200 Subject: [PATCH 054/139] Make the function intsetUpgrade self-contained --- src/intset.c | 45 ++++++++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/src/intset.c b/src/intset.c index a837592d..2b082b9e 100644 --- a/src/intset.c +++ b/src/intset.c @@ -58,20 +58,6 @@ static intset *intsetResize(intset *is, uint32_t len) { return is; } -static intset *intsetUpgrade(intset *is, uint8_t newenc, uint8_t extra, uint8_t offset) { - uint8_t curenc = is->encoding; - int length = is->length; - - /* First set new encoding and resize */ - is->encoding = newenc; - is = intsetResize(is,is->length+extra); - - /* Upgrade back-to-front so we don't overwrite values */ - while(length--) - _intsetSet(is,length+offset,_intsetGetEncoded(is,length,curenc)); - return is; -} - /* Search for the position of "value". Return 1 when the value was found and * sets "pos" to the position of the value within the intset. Return 0 when * the value is not present in the intset and sets "pos" to the position @@ -117,6 +103,32 @@ static uint8_t intsetSearch(intset *is, int64_t value, uint32_t *pos) { } } +/* Upgrades the intset to a larger encoding and inserts the given integer. */ +static intset *intsetUpgradeAndAdd(intset *is, int64_t value) { + uint8_t curenc = is->encoding; + uint8_t newenc = _intsetValueEncoding(value); + int length = is->length; + int prepend = value < 0 ? 1 : 0; + + /* First set new encoding and resize */ + is->encoding = newenc; + is = intsetResize(is,is->length+1); + + /* Upgrade back-to-front so we don't overwrite values. + * Note that the "prepend" variable is used to make sure we have an empty + * space at either the beginning or the end of the intset. */ + while(length--) + _intsetSet(is,length+prepend,_intsetGetEncoded(is,length,curenc)); + + /* Set the value at the beginning or the end. */ + if (prepend) + _intsetSet(is,0,value); + else + _intsetSet(is,is->length,value); + is->length++; + return is; +} + static void intsetMoveTail(intset *is, uint32_t from, uint32_t to) { void *src, *dst; uint32_t bytes = is->length-from; @@ -146,9 +158,8 @@ intset *intsetAdd(intset *is, int64_t value, uint8_t *success) { * this value should be either appended (if > 0) or prepended (if < 0), * because it lies outside the range of existing values. */ if (valenc > is->encoding) { - offset = value < 0 ? 1 : 0; - is = intsetUpgrade(is,valenc,1,offset); - pos = (value < 0) ? 0 : is->length; + /* This always succeeds, so we don't need to curry *success. */ + return intsetUpgradeAndAdd(is,value); } else { /* Abort if the value is already present in the set. * This call will populate "pos" with the right position to insert From 674492bcebbfd59612ad65d7429b3db5735bd447 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 26 Aug 2010 12:10:16 +0200 Subject: [PATCH 055/139] removed a duplicated ERRNO checking that is useless at all --- src/object.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/object.c b/src/object.c index dae7f97b..92af1d6a 100644 --- a/src/object.c +++ b/src/object.c @@ -375,7 +375,6 @@ int getLongLongFromObject(robj *o, long long *target) { redisAssert(o->type == REDIS_STRING); if (o->encoding == REDIS_ENCODING_RAW) { value = strtoll(o->ptr, &eptr, 10); - if (errno == ERANGE) return REDIS_ERR; if (eptr[0] != '\0') return REDIS_ERR; if (errno == ERANGE && (value == LLONG_MIN || value == LLONG_MAX)) return REDIS_ERR; From 740eee1cc6fb65a094e5f17e85aa37fdaa24f2e9 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 26 Aug 2010 12:13:51 +0200 Subject: [PATCH 056/139] Fix type that was not renamed and compiler warning --- src/intset.c | 2 +- src/t_set.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/intset.c b/src/intset.c index 2b082b9e..2f359b7f 100644 --- a/src/intset.c +++ b/src/intset.c @@ -151,7 +151,7 @@ static void intsetMoveTail(intset *is, uint32_t from, uint32_t to) { /* Insert an integer in the intset */ intset *intsetAdd(intset *is, int64_t value, uint8_t *success) { uint8_t valenc = _intsetValueEncoding(value); - uint32_t pos, offset; + uint32_t pos; if (success) *success = 1; /* Upgrade encoding if necessary. If we need to upgrade, we know that diff --git a/src/t_set.c b/src/t_set.c index 01c851ba..cb06a6a2 100644 --- a/src/t_set.c +++ b/src/t_set.c @@ -81,7 +81,7 @@ int setTypeIsMember(robj *subject, robj *value) { } setTypeIterator *setTypeInitIterator(robj *subject) { - setTypeIterator *si = zmalloc(sizeof(setIterator)); + setTypeIterator *si = zmalloc(sizeof(setTypeIterator)); si->subject = subject; si->encoding = subject->encoding; if (si->encoding == REDIS_ENCODING_HT) { From 5f19e8a4a5183255a38c31ea88ee6b72e96eca66 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 26 Aug 2010 12:28:53 +0200 Subject: [PATCH 057/139] computeObjectSwappability is now able to compute swappability for intset encoded sets --- src/vm.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/src/vm.c b/src/vm.c index 0ccc5fe2..a3438752 100644 --- a/src/vm.c +++ b/src/vm.c @@ -395,15 +395,20 @@ double computeObjectSwappability(robj *o) { z = (o->type == REDIS_ZSET); d = z ? ((zset*)o->ptr)->dict : o->ptr; - asize = sizeof(dict)+(sizeof(struct dictEntry*)*dictSlots(d)); - if (z) asize += sizeof(zset)-sizeof(dict); - if (dictSize(d)) { - de = dictGetRandomKey(d); - ele = dictGetEntryKey(de); - elesize = (ele->encoding == REDIS_ENCODING_RAW) ? - (sizeof(*o)+sdslen(ele->ptr)) : sizeof(*o); - asize += (sizeof(struct dictEntry)+elesize)*dictSize(d); - if (z) asize += sizeof(zskiplistNode)*dictSize(d); + if (!z && o->encoding == REDIS_ENCODING_INTSET) { + intset *is = o->ptr; + asize = sizeof(*is)+is->encoding*is->length; + } else { + asize = sizeof(dict)+(sizeof(struct dictEntry*)*dictSlots(d)); + if (z) asize += sizeof(zset)-sizeof(dict); + if (dictSize(d)) { + de = dictGetRandomKey(d); + ele = dictGetEntryKey(de); + elesize = (ele->encoding == REDIS_ENCODING_RAW) ? + (sizeof(*o)+sdslen(ele->ptr)) : sizeof(*o); + asize += (sizeof(struct dictEntry)+elesize)*dictSize(d); + if (z) asize += sizeof(zskiplistNode)*dictSize(d); + } } break; case REDIS_HASH: From bad7d097e95ec0735c41f85b50ace65bec6134ae Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 26 Aug 2010 13:18:24 +0200 Subject: [PATCH 058/139] fixed a memory leak in the new Set code --- src/rdb.c | 2 ++ tests/test_helper.tcl | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/rdb.c b/src/rdb.c index 019aa9a0..12221b9f 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -714,6 +714,8 @@ robj *rdbLoadObject(int type, FILE *fp) { * to regular hashtable encoded set */ if (o->encoding == REDIS_ENCODING_HT) { dictAdd((dict*)o->ptr,ele,NULL); + } else { + decrRefCount(ele); } } } else if (type == REDIS_ZSET) { diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl index 4ae9cc65..d4a0aa14 100644 --- a/tests/test_helper.tcl +++ b/tests/test_helper.tcl @@ -11,7 +11,7 @@ source tests/support/util.tcl set ::host 127.0.0.1 set ::port 16379 -set ::traceleaks 0 +set ::traceleaks 1 set ::valgrind 0 set ::denytags {} set ::allowtags {} From 2929ca9786e0dd64c656bbd212c35c137ab54fda Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 26 Aug 2010 13:18:19 +0200 Subject: [PATCH 059/139] Fix parenthesis error on decrementing *argc --- src/sds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sds.c b/src/sds.c index d7d23c45..a0ebb059 100644 --- a/src/sds.c +++ b/src/sds.c @@ -465,7 +465,7 @@ sds *sdssplitargs(char *line, int *argc) { } err: - while(*argc--) + while((*argc)--) sdsfree(vector[*argc]); zfree(vector); if (current) sdsfree(current); From e59a64b8d37c4d051234934ad74f70c4ac625e95 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 26 Aug 2010 13:18:56 +0200 Subject: [PATCH 060/139] forgot the traceleaks var in tests set to 1, reverted to the default, 0 --- tests/test_helper.tcl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl index d4a0aa14..4ae9cc65 100644 --- a/tests/test_helper.tcl +++ b/tests/test_helper.tcl @@ -11,7 +11,7 @@ source tests/support/util.tcl set ::host 127.0.0.1 set ::port 16379 -set ::traceleaks 1 +set ::traceleaks 0 set ::valgrind 0 set ::denytags {} set ::allowtags {} From 94364d53b4746e8cd9e3da633162cb1e34f0bdb6 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 26 Aug 2010 14:05:14 +0200 Subject: [PATCH 061/139] Verify that the blocking pop timeout value is a non-negative integer --- src/t_list.c | 14 +++++++++++++- tests/unit/type/list.tcl | 22 ++++++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/src/t_list.c b/src/t_list.c index 2a981033..43d292b6 100644 --- a/src/t_list.c +++ b/src/t_list.c @@ -782,9 +782,20 @@ int handleClientsWaitingListPush(redisClient *c, robj *key, robj *ele) { /* Blocking RPOP/LPOP */ void blockingPopGenericCommand(redisClient *c, int where) { robj *o; + long long lltimeout; time_t timeout; int j; + /* Make sure timeout is an integer value */ + if (getLongLongFromObjectOrReply(c,c->argv[c->argc-1],&lltimeout, + "timeout is not an integer") != REDIS_OK) return; + + /* Make sure the timeout is not negative */ + if (lltimeout < 0) { + addReplySds(c,sdsnew("-ERR timeout is negative\r\n")); + return; + } + for (j = 1; j < c->argc-1; j++) { o = lookupKeyWrite(c->db,c->argv[j]); if (o != NULL) { @@ -823,8 +834,9 @@ void blockingPopGenericCommand(redisClient *c, int where) { } } } + /* If the list is empty or the key does not exists we must block */ - timeout = strtol(c->argv[c->argc-1]->ptr,NULL,10); + timeout = lltimeout; if (timeout > 0) timeout += time(NULL); blockForKeys(c,c->argv+1,c->argc-2,timeout); } diff --git a/tests/unit/type/list.tcl b/tests/unit/type/list.tcl index d3ed90ec..1a85c809 100644 --- a/tests/unit/type/list.tcl +++ b/tests/unit/type/list.tcl @@ -139,6 +139,28 @@ start_server { assert_equal 0 [r exists blist1] } + test "$pop: with negative timeout" { + set rd [redis_deferring_client] + $rd $pop blist1 -1 + assert_error "ERR*is negative*" {$rd read} + } + + test "$pop: with non-integer timeout" { + set rd [redis_deferring_client] + $rd $pop blist1 1.1 + assert_error "ERR*not an integer*" {$rd read} + } + + test "$pop: with zero timeout should block indefinitely" { + # To test this, use a timeout of 0 and wait a second. + # The blocking pop should still be waiting for a push. + set rd [redis_deferring_client] + $rd $pop blist1 0 + after 1000 + r rpush blist1 foo + assert_equal {blist1 foo} [$rd read] + } + test "$pop: second argument is not a list" { set rd [redis_deferring_client] r del blist1 blist2 From e4ecc931192bee19d0365d2efe0e12b1be27dc9a Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 26 Aug 2010 16:58:02 +0200 Subject: [PATCH 062/139] Version is now 2.1.3 --- src/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/version.h b/src/version.h index cac59721..b570fe04 100644 --- a/src/version.h +++ b/src/version.h @@ -1 +1 @@ -#define REDIS_VERSION "2.1.2" +#define REDIS_VERSION "2.1.3" From 452ccf7a415fd748cb0b3d00bef8062455c3c675 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 26 Aug 2010 17:29:13 +0200 Subject: [PATCH 063/139] SORT stress testing against bigger aggregate values --- tests/unit/sort.tcl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/unit/sort.tcl b/tests/unit/sort.tcl index bca01737..dcc471fb 100644 --- a/tests/unit/sort.tcl +++ b/tests/unit/sort.tcl @@ -37,9 +37,11 @@ start_server { foreach {num cmd enc title} { 16 lpush ziplist "Ziplist" - 64 lpush linkedlist "Linked list" + 1000 lpush linkedlist "Linked list" + 10000 lpush linkedlist "Big Linked list" 16 sadd intset "Intset" - 64 sadd hashtable "Hash table" + 1000 sadd hashtable "Hash table" + 10000 sadd hashtable "Big Hash table" } { set result [create_random_dataset $num $cmd] assert_encoding $enc tosort From 588cd980e93a1a9f6afae85ed52ec74ff3d811e1 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 26 Aug 2010 17:52:02 +0200 Subject: [PATCH 064/139] redis-cli tests commented out since there is a problem with the CI server and this tests, will be activated again once the problem is fixed --- tests/test_helper.tcl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl index 4ae9cc65..ee7fa3e1 100644 --- a/tests/test_helper.tcl +++ b/tests/test_helper.tcl @@ -95,7 +95,7 @@ proc main {} { execute_tests "unit/cas" execute_tests "integration/replication" execute_tests "integration/aof" - execute_tests "integration/redis-cli" +# execute_tests "integration/redis-cli" execute_tests "unit/pubsub" # run tests with VM enabled From 23c64fe50ddbc01f825ebe64f1a8b5f14c584327 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 26 Aug 2010 18:11:26 +0200 Subject: [PATCH 065/139] translated a few long logn into int64_t for correctness and to avoid compilation warnings as well --- src/aof.c | 2 +- src/rdb.c | 2 +- src/t_set.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/aof.c b/src/aof.c index dc806969..8f2dc96f 100644 --- a/src/aof.c +++ b/src/aof.c @@ -468,7 +468,7 @@ int rewriteAppendOnlyFile(char *filename) { /* Emit the SADDs needed to rebuild the set */ if (o->encoding == REDIS_ENCODING_INTSET) { int ii = 0; - long long llval; + int64_t llval; while(intsetGet(o->ptr,ii++,&llval)) { if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr; if (fwriteBulkObject(fp,&key) == 0) goto werr; diff --git a/src/rdb.c b/src/rdb.c index 12221b9f..96c5a09d 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -273,7 +273,7 @@ int rdbSaveObject(FILE *fp, robj *o) { dictReleaseIterator(di); } else if (o->encoding == REDIS_ENCODING_INTSET) { intset *is = o->ptr; - long long llval; + int64_t llval; int i = 0; if (rdbSaveLen(fp,intsetLen(is)) == -1) return -1; diff --git a/src/t_set.c b/src/t_set.c index cb06a6a2..97fc5bf4 100644 --- a/src/t_set.c +++ b/src/t_set.c @@ -112,7 +112,7 @@ robj *setTypeNext(setTypeIterator *si) { incrRefCount(ret); } } else if (si->encoding == REDIS_ENCODING_INTSET) { - long long llval; + int64_t llval; if (intsetGet(si->subject->ptr,si->ii++,&llval)) ret = createStringObjectFromLongLong(llval); } From ec7e138926b7b587adc247e8c64da6d3b1706434 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 26 Aug 2010 18:47:03 +0200 Subject: [PATCH 066/139] test for intset integer encodability test and some small refactoring --- src/redis.h | 2 ++ src/t_set.c | 8 ++++---- src/util.c | 34 +++++++++++++++++++++++++++------- 3 files changed, 33 insertions(+), 11 deletions(-) diff --git a/src/redis.h b/src/redis.h index 6156a6ca..a4fdcb19 100644 --- a/src/redis.h +++ b/src/redis.h @@ -769,6 +769,8 @@ int stringmatch(const char *pattern, const char *string, int nocase); long long memtoll(const char *p, int *err); int ll2string(char *s, size_t len, long long value); int isStringRepresentableAsLong(sds s, long *longval); +int isStringRepresentableAsLongLong(sds s, long long *longval); +int isObjectRepresentableAsLongLong(robj *o, long long *llongval); /* Configuration */ void loadServerConfig(char *filename); diff --git a/src/t_set.c b/src/t_set.c index 97fc5bf4..68e13227 100644 --- a/src/t_set.c +++ b/src/t_set.c @@ -8,7 +8,7 @@ * an integer-encodable value, an intset will be returned. Otherwise a regular * hash table. */ robj *setTypeCreate(robj *value) { - if (getLongLongFromObject(value,NULL) == REDIS_OK) + if (isObjectRepresentableAsLongLong(value,NULL) == REDIS_OK) return createIntsetObject(); return createSetObject(); } @@ -21,7 +21,7 @@ int setTypeAdd(robj *subject, robj *value) { return 1; } } else if (subject->encoding == REDIS_ENCODING_INTSET) { - if (getLongLongFromObject(value,&llval) == REDIS_OK) { + if (isObjectRepresentableAsLongLong(value,&llval) == REDIS_OK) { uint8_t success = 0; subject->ptr = intsetAdd(subject->ptr,llval,&success); if (success) { @@ -55,7 +55,7 @@ int setTypeRemove(robj *subject, robj *value) { return 1; } } else if (subject->encoding == REDIS_ENCODING_INTSET) { - if (getLongLongFromObject(value,&llval) == REDIS_OK) { + if (isObjectRepresentableAsLongLong(value,&llval) == REDIS_OK) { uint8_t success; subject->ptr = intsetRemove(subject->ptr,llval,&success); if (success) return 1; @@ -71,7 +71,7 @@ int setTypeIsMember(robj *subject, robj *value) { if (subject->encoding == REDIS_ENCODING_HT) { return dictFind((dict*)subject->ptr,value) != NULL; } else if (subject->encoding == REDIS_ENCODING_INTSET) { - if (getLongLongFromObject(value,&llval) == REDIS_OK) { + if (isObjectRepresentableAsLongLong(value,&llval) == REDIS_OK) { return intsetFind((intset*)subject->ptr,llval); } } else { diff --git a/src/util.c b/src/util.c index cc2794f6..e304ff83 100644 --- a/src/util.c +++ b/src/util.c @@ -200,24 +200,44 @@ int ll2string(char *s, size_t len, long long value) { return l; } -/* Check if the nul-terminated string 's' can be represented by a long +/* Check if the sds string 's' can be represented by a long long * (that is, is a number that fits into long without any other space or - * character before or after the digits). + * character before or after the digits, so that converting this number + * back to a string will result in the same bytes as the original string). * - * If so, the function returns REDIS_OK and *longval is set to the value + * If so, the function returns REDIS_OK and *llongval is set to the value * of the number. Otherwise REDIS_ERR is returned */ -int isStringRepresentableAsLong(sds s, long *longval) { +int isStringRepresentableAsLongLong(sds s, long long *llongval) { char buf[32], *endptr; - long value; + long long value; int slen; - value = strtol(s, &endptr, 10); + value = strtoll(s, &endptr, 10); if (endptr[0] != '\0') return REDIS_ERR; slen = ll2string(buf,32,value); /* If the number converted back into a string is not identical * then it's not possible to encode the string as integer */ if (sdslen(s) != (unsigned)slen || memcmp(buf,s,slen)) return REDIS_ERR; - if (longval) *longval = value; + if (llongval) *llongval = value; return REDIS_OK; } + +int isStringRepresentableAsLong(sds s, long *longval) { + long long ll; + + if (isStringRepresentableAsLongLong(s,&ll) == REDIS_ERR) return REDIS_ERR; + if (ll < LONG_MIN || ll > LONG_MAX) return REDIS_ERR; + *longval = (long)ll; + return REDIS_OK; +} + +int isObjectRepresentableAsLongLong(robj *o, long long *llongval) { + redisAssert(o->type == REDIS_STRING); + if (o->encoding == REDIS_ENCODING_INT) { + if (llongval) *llongval = (long) o->ptr; + return REDIS_OK; + } else { + return isStringRepresentableAsLongLong(o->ptr,llongval); + } +} From 2df84b726983ae1f4fc9c16da61d5a0ce5114cc4 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 26 Aug 2010 19:10:40 +0200 Subject: [PATCH 067/139] intset loading bug fixed --- src/rdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rdb.c b/src/rdb.c index 96c5a09d..d8de5f76 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -702,7 +702,7 @@ robj *rdbLoadObject(int type, FILE *fp) { if (o->encoding == REDIS_ENCODING_INTSET) { /* Fetch integer value from element */ - if (getLongLongFromObject(ele,&llval) == REDIS_OK) { + if (isObjectRepresentableAsLongLong(ele,&llval) == REDIS_OK) { o->ptr = intsetAdd(o->ptr,llval,NULL); } else { setTypeConvert(o,REDIS_ENCODING_HT); From 357d36733dc37d87160ca2f65013656693290e9d Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 27 Aug 2010 11:01:03 +0200 Subject: [PATCH 068/139] Fixed segfault in freeMemoryIfNeeded due to the fact that keys are now sds strings and not objects in the main hash table, thanks to Anthony Lauzon for spotting the bug and providing a patch. --- src/redis.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/redis.c b/src/redis.c index 9fbd52f2..77e67c58 100644 --- a/src/redis.c +++ b/src/redis.c @@ -1321,7 +1321,8 @@ void freeMemoryIfNeeded(void) { if (tryFreeOneObjectFromFreelist() == REDIS_OK) continue; for (j = 0; j < server.dbnum; j++) { int minttl = -1; - robj *minkey = NULL; + sds minkey = NULL; + robj *keyobj = NULL; struct dictEntry *de; if (dictSize(server.db[j].expires)) { @@ -1338,7 +1339,9 @@ void freeMemoryIfNeeded(void) { minttl = t; } } - dbDelete(server.db+j,minkey); + keyobj = createStringObject(minkey,sdslen(minkey)); + dbDelete(server.db+j,keyobj); + decrRefCount(keyobj); } } if (!freed) return; /* nothing to free... */ From 09252fc4f36e9a5b89c9a9ff8b3c9750a3144b45 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 27 Aug 2010 12:46:10 +0200 Subject: [PATCH 069/139] Fixed another instace of the Issue 173 --- src/replication.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/replication.c b/src/replication.c index 363ce54a..c2846088 100644 --- a/src/replication.c +++ b/src/replication.c @@ -138,7 +138,7 @@ int syncRead(int fd, char *ptr, ssize_t size, int timeout) { while(size) { if (aeWait(fd,AE_READABLE,1000) & AE_READABLE) { nread = read(fd,ptr,size); - if (nread == -1) return -1; + if (nread <= 0) return -1; ptr += nread; size -= nread; totread += nread; From c1ae36aea814e1bcb0f046a00b51ed46d7432c3b Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 27 Aug 2010 17:04:26 +0200 Subject: [PATCH 070/139] Fix for bug 312, yet to verify in a couple of minutes... --- src/vm.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/vm.c b/src/vm.c index a3438752..6c0d79b0 100644 --- a/src/vm.c +++ b/src/vm.c @@ -548,7 +548,15 @@ void freeIOJob(iojob *j) { /* Every time a thread finished a Job, it writes a byte into the write side * of an unix pipe in order to "awake" the main thread, and this function - * is called. */ + * is called. + * + * Note that this is called both by the event loop, when a I/O thread + * sends a byte in the notification pipe, and is also directly called from + * waitEmptyIOJobsQueue(). + * + * In the latter case we don't want to swap more, so we use the + * "privdata" argument setting it to a not NULL value to signal this + * condition. */ void vmThreadedIOCompletedJob(aeEventLoop *el, int fd, void *privdata, int mask) { @@ -558,6 +566,8 @@ void vmThreadedIOCompletedJob(aeEventLoop *el, int fd, void *privdata, REDIS_NOTUSED(mask); REDIS_NOTUSED(privdata); + if (privdata == NULL) trytoswap = 0; /* check the comments above... */ + /* For every byte we read in the read side of the pipe, there is one * I/O job completed to process. */ while((retval = read(fd,buf,1)) == 1) { @@ -869,7 +879,8 @@ void waitEmptyIOJobsQueue(void) { io_processed_len = listLength(server.io_processed); unlockThreadedIO(); if (io_processed_len) { - vmThreadedIOCompletedJob(NULL,server.io_ready_pipe_read,NULL,0); + vmThreadedIOCompletedJob(NULL,server.io_ready_pipe_read, + (void*)0xdeadbeef,0); usleep(1000); /* 1 millisecond */ } else { usleep(10000); /* 10 milliseconds */ From e5f257c2b2f7be0d58ebc0cf791d549c647cab6b Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 27 Aug 2010 17:06:36 +0200 Subject: [PATCH 071/139] fix for the prev fix --- src/vm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vm.c b/src/vm.c index 6c0d79b0..50fb326d 100644 --- a/src/vm.c +++ b/src/vm.c @@ -566,7 +566,7 @@ void vmThreadedIOCompletedJob(aeEventLoop *el, int fd, void *privdata, REDIS_NOTUSED(mask); REDIS_NOTUSED(privdata); - if (privdata == NULL) trytoswap = 0; /* check the comments above... */ + if (privdata != NULL) trytoswap = 0; /* check the comments above... */ /* For every byte we read in the read side of the pipe, there is one * I/O job completed to process. */ From 2f6b31c3bb95443991513e496d8d19656c4a80b5 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 30 Aug 2010 10:32:32 +0200 Subject: [PATCH 072/139] Fix for a race in BGSAVE that may result in some data not being saved as soon as possible (when the configured saving triggers should fire). Also known as Issue 313, more details there in the google code issue. --- src/rdb.c | 3 ++- src/redis.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/rdb.c b/src/rdb.c index d8de5f76..c15fc6f2 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -457,6 +457,7 @@ int rdbSaveBackground(char *filename) { if (server.bgsavechildpid != -1) return REDIS_ERR; if (server.vm_enabled) waitEmptyIOJobsQueue(); + server.dirty_before_bgsave = server.dirty; if ((childpid = fork()) == 0) { /* Child */ if (server.vm_enabled) vmReopenSwapFile(); @@ -913,7 +914,7 @@ void backgroundSaveDoneHandler(int statloc) { if (!bysignal && exitcode == 0) { redisLog(REDIS_NOTICE, "Background saving terminated with success"); - server.dirty = 0; + server.dirty = server.dirty - server.dirty_before_bgsave; server.lastsave = time(NULL); } else if (!bysignal && exitcode != 0) { redisLog(REDIS_WARNING, "Background saving error"); diff --git a/src/redis.h b/src/redis.h index a4fdcb19..9e27d724 100644 --- a/src/redis.h +++ b/src/redis.h @@ -335,6 +335,7 @@ struct redisServer { int fd; redisDb *db; long long dirty; /* changes to DB from the last save */ + long long dirty_before_bgsave; /* used to restore dirty on failed BGSAVE */ list *clients; list *slaves, *monitors; char neterr[ANET_ERR_LEN]; From 36babc1e31f434e95fc49a6a1f611a75b3827ade Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Mon, 30 Aug 2010 11:14:54 +0200 Subject: [PATCH 073/139] Refactor reply parsing code in redis-benchmark for efficiency --- src/redis-benchmark.c | 162 +++++++++++++++++++++++------------------- 1 file changed, 90 insertions(+), 72 deletions(-) diff --git a/src/redis-benchmark.c b/src/redis-benchmark.c index 123d8118..ceeab2b9 100644 --- a/src/redis-benchmark.c +++ b/src/redis-benchmark.c @@ -206,16 +206,27 @@ static void clientDone(client c) { } } +/* Read a length from the buffer pointed to by *p, store the length in *len, + * and return the number of bytes that the cursor advanced. */ +static int readLen(char *p, int *len) { + char *tail = strstr(p,"\r\n"); + if (tail == NULL) + return 0; + *tail = '\0'; + *len = atoi(p+1); + return tail+2-p; +} + static void readHandler(aeEventLoop *el, int fd, void *privdata, int mask) { - char buf[1024]; - int nread; + char buf[1024], *p; + int nread, pos=0, len=0; client c = privdata; REDIS_NOTUSED(el); REDIS_NOTUSED(fd); REDIS_NOTUSED(mask); - nread = read(c->fd, buf, 1024); + nread = read(c->fd,buf,sizeof(buf)); if (nread == -1) { fprintf(stderr, "Reading from socket: %s\n", strerror(errno)); freeClient(c); @@ -228,82 +239,89 @@ static void readHandler(aeEventLoop *el, int fd, void *privdata, int mask) } c->totreceived += nread; c->ibuf = sdscatlen(c->ibuf,buf,nread); + len = sdslen(c->ibuf); -processdata: - /* Are we waiting for the first line of the command of for sdf - * count in bulk or multi bulk operations? */ if (c->replytype == REPLY_INT || - c->replytype == REPLY_RETCODE || - (c->replytype == REPLY_BULK && c->readlen == -1) || - (c->replytype == REPLY_MBULK && c->readlen == -1) || - (c->replytype == REPLY_MBULK && c->mbulk == -1)) { - char *p; - - /* Check if the first line is complete. This is only true if - * there is a newline inside the buffer. */ - if ((p = strchr(c->ibuf,'\n')) != NULL) { - if (c->replytype == REPLY_BULK || - (c->replytype == REPLY_MBULK && c->mbulk != -1)) - { - /* Read the count of a bulk reply (being it a single bulk or - * a multi bulk reply). "$" for the protocol spec. */ - *p = '\0'; - *(p-1) = '\0'; - c->readlen = atoi(c->ibuf+1)+2; - // printf("BULK ATOI: %s\n", c->ibuf+1); - /* Handle null bulk reply "$-1" */ - if (c->readlen-2 == -1) { - clientDone(c); - return; - } - /* Leave all the rest in the input buffer */ - c->ibuf = sdsrange(c->ibuf,(p-c->ibuf)+1,-1); - /* fall through to reach the point where the code will try - * to check if the bulk reply is complete. */ - } else if (c->replytype == REPLY_MBULK && c->mbulk == -1) { - /* Read the count of a multi bulk reply. That is, how many - * bulk replies we have to read next. "*" protocol. */ - *p = '\0'; - *(p-1) = '\0'; - c->mbulk = atoi(c->ibuf+1); - /* Handle null bulk reply "*-1" */ - if (c->mbulk == -1) { - clientDone(c); - return; - } - // printf("%p) %d elements list\n", c, c->mbulk); - /* Leave all the rest in the input buffer */ - c->ibuf = sdsrange(c->ibuf,(p-c->ibuf)+1,-1); - goto processdata; - } else { - c->ibuf = sdstrim(c->ibuf,"\r\n"); - clientDone(c); - return; - } - } - } - /* bulk read, did we read everything? */ - if (((c->replytype == REPLY_MBULK && c->mbulk != -1) || - (c->replytype == REPLY_BULK)) && c->readlen != -1 && - (unsigned)c->readlen <= sdslen(c->ibuf)) + c->replytype == REPLY_RETCODE) { - // printf("BULKSTATUS mbulk:%d readlen:%d sdslen:%d\n", - // c->mbulk,c->readlen,sdslen(c->ibuf)); - if (c->replytype == REPLY_BULK) { - clientDone(c); - } else if (c->replytype == REPLY_MBULK) { - // printf("%p) %d (%d)) ",c, c->mbulk, c->readlen); - // fwrite(c->ibuf,c->readlen,1,stdout); - // printf("\n"); - if (--c->mbulk == 0) { - clientDone(c); + /* Check if the first line is complete. This is everything we need + * when waiting for an integer or status code reply.*/ + if ((p = strstr(c->ibuf,"\r\n")) != NULL) + goto done; + } else if (c->replytype == REPLY_BULK) { + int advance = 0; + if (c->readlen < 0) { + advance = readLen(c->ibuf+pos,&c->readlen); + if (advance) { + pos += advance; + if (c->readlen == -1) { + goto done; + } else { + /* include the trailing \r\n */ + c->readlen += 2; + } } else { - c->ibuf = sdsrange(c->ibuf,c->readlen,-1); - c->readlen = -1; - goto processdata; + goto skip; } } + + int canconsume; + if (c->readlen > 0) { + canconsume = c->readlen > (len-pos) ? (len-pos) : c->readlen; + c->readlen -= canconsume; + pos += canconsume; + } + + if (c->readlen == 0) + goto done; + } else if (c->replytype == REPLY_MBULK) { + int advance = 0; + if (c->mbulk == -1) { + advance = readLen(c->ibuf+pos,&c->mbulk); + if (advance) { + pos += advance; + if (c->mbulk == -1) + goto done; + } else { + goto skip; + } + } + + int canconsume; + while(c->mbulk > 0 && pos < len) { + if (c->readlen > 0) { + canconsume = c->readlen > (len-pos) ? (len-pos) : c->readlen; + c->readlen -= canconsume; + pos += canconsume; + if (c->readlen == 0) + c->mbulk--; + } else { + advance = readLen(c->ibuf+pos,&c->readlen); + if (advance) { + pos += advance; + if (c->readlen == -1) { + c->mbulk--; + continue; + } else { + /* include the trailing \r\n */ + c->readlen += 2; + } + } else { + goto skip; + } + } + } + + if (c->mbulk == 0) + goto done; } + +skip: + c->ibuf = sdsrange(c->ibuf,pos,-1); + return; +done: + clientDone(c); + return; } static void writeHandler(aeEventLoop *el, int fd, void *privdata, int mask) From ed0dd55402710d5bb21ef66d81a7dff694737c22 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Mon, 30 Aug 2010 11:25:02 +0200 Subject: [PATCH 074/139] Show the current throughput while benchmarking --- src/redis-benchmark.c | 86 +++++++++++++++++++++++++------------------ 1 file changed, 50 insertions(+), 36 deletions(-) diff --git a/src/redis-benchmark.c b/src/redis-benchmark.c index ceeab2b9..297ecc6c 100644 --- a/src/redis-benchmark.c +++ b/src/redis-benchmark.c @@ -75,6 +75,7 @@ static struct config { long long start; long long totlatency; int *latency; + char *title; list *clients; int quiet; int loop; @@ -389,13 +390,13 @@ static void createMissingClients(client c) { } } -static void showLatencyReport(char *title) { +static void showLatencyReport(void) { int j, seen = 0; float perc, reqpersec; reqpersec = (float)config.donerequests/((float)config.totlatency/1000); if (!config.quiet) { - printf("====== %s ======\n", title); + printf("====== %s ======\n", config.title); printf(" %d requests completed in %.2f seconds\n", config.donerequests, (float)config.totlatency/1000); printf(" %d parallel clients\n", config.numclients); @@ -411,20 +412,20 @@ static void showLatencyReport(char *title) { } printf("%.2f requests per second\n\n", reqpersec); } else { - printf("%s: %.2f requests per second\n", title, reqpersec); + printf("%s: %.2f requests per second\n", config.title, reqpersec); } } -static void prepareForBenchmark(void) -{ +static void prepareForBenchmark(char *title) { memset(config.latency,0,sizeof(int)*(MAX_LATENCY+1)); + config.title = title; config.start = mstime(); config.donerequests = 0; } -static void endBenchmark(char *title) { +static void endBenchmark(void) { config.totlatency = mstime()-config.start; - showLatencyReport(title); + showLatencyReport(); freeAllClients(); } @@ -498,6 +499,18 @@ void parseOptions(int argc, char **argv) { } } +int showThroughput(struct aeEventLoop *eventLoop, long long id, void *clientData) { + REDIS_NOTUSED(eventLoop); + REDIS_NOTUSED(id); + REDIS_NOTUSED(clientData); + + float dt = (float)(mstime()-config.start)/1000.0; + float rps = (float)config.donerequests/dt; + printf("%s: %.2f\r", config.title, rps); + fflush(stdout); + return 250; /* every 250ms */ +} + int main(int argc, char **argv) { client c; @@ -509,6 +522,7 @@ int main(int argc, char **argv) { config.requests = 10000; config.liveclients = 0; config.el = aeCreateEventLoop(); + aeCreateTimeEvent(config.el,1,showThroughput,NULL,NULL); config.keepalive = 1; config.donerequests = 0; config.datasize = 3; @@ -532,7 +546,7 @@ int main(int argc, char **argv) { if (config.idlemode) { printf("Creating %d idle connections and waiting forever (Ctrl+C when done)\n", config.numclients); - prepareForBenchmark(); + prepareForBenchmark("IDLE"); c = createClient(); if (!c) exit(1); c->obuf = sdsempty(); @@ -543,25 +557,25 @@ int main(int argc, char **argv) { } do { - prepareForBenchmark(); + prepareForBenchmark("PING"); c = createClient(); if (!c) exit(1); c->obuf = sdscat(c->obuf,"PING\r\n"); prepareClientForReply(c,REPLY_RETCODE); createMissingClients(c); aeMain(config.el); - endBenchmark("PING"); + endBenchmark(); - prepareForBenchmark(); + prepareForBenchmark("PING (multi bulk)"); c = createClient(); if (!c) exit(1); c->obuf = sdscat(c->obuf,"*1\r\n$4\r\nPING\r\n"); prepareClientForReply(c,REPLY_RETCODE); createMissingClients(c); aeMain(config.el); - endBenchmark("PING (multi bulk)"); + endBenchmark(); - prepareForBenchmark(); + prepareForBenchmark("SET"); c = createClient(); if (!c) exit(1); c->obuf = sdscatprintf(c->obuf,"SET foo_rand000000000000 %d\r\n",config.datasize); @@ -575,106 +589,106 @@ int main(int argc, char **argv) { prepareClientForReply(c,REPLY_RETCODE); createMissingClients(c); aeMain(config.el); - endBenchmark("SET"); + endBenchmark(); - prepareForBenchmark(); + prepareForBenchmark("GET"); c = createClient(); if (!c) exit(1); c->obuf = sdscat(c->obuf,"GET foo_rand000000000000\r\n"); prepareClientForReply(c,REPLY_BULK); createMissingClients(c); aeMain(config.el); - endBenchmark("GET"); + endBenchmark(); - prepareForBenchmark(); + prepareForBenchmark("INCR"); c = createClient(); if (!c) exit(1); c->obuf = sdscat(c->obuf,"INCR counter_rand000000000000\r\n"); prepareClientForReply(c,REPLY_INT); createMissingClients(c); aeMain(config.el); - endBenchmark("INCR"); + endBenchmark(); - prepareForBenchmark(); + prepareForBenchmark("LPUSH"); c = createClient(); if (!c) exit(1); c->obuf = sdscat(c->obuf,"LPUSH mylist 3\r\nbar\r\n"); prepareClientForReply(c,REPLY_INT); createMissingClients(c); aeMain(config.el); - endBenchmark("LPUSH"); + endBenchmark(); - prepareForBenchmark(); + prepareForBenchmark("LPOP"); c = createClient(); if (!c) exit(1); c->obuf = sdscat(c->obuf,"LPOP mylist\r\n"); prepareClientForReply(c,REPLY_BULK); createMissingClients(c); aeMain(config.el); - endBenchmark("LPOP"); + endBenchmark(); - prepareForBenchmark(); + prepareForBenchmark("SADD"); c = createClient(); if (!c) exit(1); c->obuf = sdscat(c->obuf,"SADD myset 24\r\ncounter_rand000000000000\r\n"); prepareClientForReply(c,REPLY_RETCODE); createMissingClients(c); aeMain(config.el); - endBenchmark("SADD"); + endBenchmark(); - prepareForBenchmark(); + prepareForBenchmark("SPOP"); c = createClient(); if (!c) exit(1); c->obuf = sdscat(c->obuf,"SPOP myset\r\n"); prepareClientForReply(c,REPLY_BULK); createMissingClients(c); aeMain(config.el); - endBenchmark("SPOP"); + endBenchmark(); - prepareForBenchmark(); + prepareForBenchmark("LPUSH (again, in order to bench LRANGE)"); c = createClient(); if (!c) exit(1); c->obuf = sdscat(c->obuf,"LPUSH mylist 3\r\nbar\r\n"); prepareClientForReply(c,REPLY_RETCODE); createMissingClients(c); aeMain(config.el); - endBenchmark("LPUSH (again, in order to bench LRANGE)"); + endBenchmark(); - prepareForBenchmark(); + prepareForBenchmark("LRANGE (first 100 elements)"); c = createClient(); if (!c) exit(1); c->obuf = sdscat(c->obuf,"LRANGE mylist 0 99\r\n"); prepareClientForReply(c,REPLY_MBULK); createMissingClients(c); aeMain(config.el); - endBenchmark("LRANGE (first 100 elements)"); + endBenchmark(); - prepareForBenchmark(); + prepareForBenchmark("LRANGE (first 300 elements)"); c = createClient(); if (!c) exit(1); c->obuf = sdscat(c->obuf,"LRANGE mylist 0 299\r\n"); prepareClientForReply(c,REPLY_MBULK); createMissingClients(c); aeMain(config.el); - endBenchmark("LRANGE (first 300 elements)"); + endBenchmark(); - prepareForBenchmark(); + prepareForBenchmark("LRANGE (first 450 elements)"); c = createClient(); if (!c) exit(1); c->obuf = sdscat(c->obuf,"LRANGE mylist 0 449\r\n"); prepareClientForReply(c,REPLY_MBULK); createMissingClients(c); aeMain(config.el); - endBenchmark("LRANGE (first 450 elements)"); + endBenchmark(); - prepareForBenchmark(); + prepareForBenchmark("LRANGE (first 600 elements)"); c = createClient(); if (!c) exit(1); c->obuf = sdscat(c->obuf,"LRANGE mylist 0 599\r\n"); prepareClientForReply(c,REPLY_MBULK); createMissingClients(c); aeMain(config.el); - endBenchmark("LRANGE (first 600 elements)"); + endBenchmark(); printf("\n"); } while(config.loop); From 8fedd04dccd08e9a25652a22c2034d3f38f72d0f Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 30 Aug 2010 11:37:17 +0200 Subject: [PATCH 075/139] Makefile deps updated --- src/Makefile | 47 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 5 deletions(-) diff --git a/src/Makefile b/src/Makefile index 5fe3971e..38007e8d 100644 --- a/src/Makefile +++ b/src/Makefile @@ -33,6 +33,7 @@ CHECKAOFPRGNAME = redis-check-aof all: redis-server redis-benchmark redis-cli redis-check-dump redis-check-aof + # Deps (use make dep to generate this) adlist.o: adlist.c adlist.h zmalloc.h ae.o: ae.c ae.h zmalloc.h config.h ae_kqueue.c @@ -40,25 +41,61 @@ ae_epoll.o: ae_epoll.c ae_kqueue.o: ae_kqueue.c ae_select.o: ae_select.c anet.o: anet.c fmacros.h anet.h +aof.o: aof.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ + zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h +config.o: config.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ + zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h +db.o: db.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ + zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h +debug.o: debug.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ + zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h sha1.h dict.o: dict.c fmacros.h dict.h zmalloc.h +intset.o: intset.c intset.h zmalloc.h linenoise.o: linenoise.c fmacros.h lzf_c.o: lzf_c.c lzfP.h lzf_d.o: lzf_d.c lzfP.h +multi.o: multi.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ + zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h +networking.o: networking.c redis.h fmacros.h config.h ae.h sds.h dict.h \ + adlist.h zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h +object.o: object.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ + zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h pqsort.o: pqsort.c +pubsub.o: pubsub.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ + zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h +rdb.o: rdb.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ + zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h lzf.h redis-benchmark.o: redis-benchmark.c fmacros.h ae.h anet.h sds.h adlist.h \ zmalloc.h redis-check-aof.o: redis-check-aof.c fmacros.h config.h redis-check-dump.o: redis-check-dump.c lzf.h -redis-cli.o: redis-cli.c fmacros.h anet.h sds.h adlist.h zmalloc.h \ - linenoise.h -redis.o: redis.c fmacros.h config.h redis.h ae.h sds.h anet.h dict.h \ - adlist.h zmalloc.h lzf.h pqsort.h zipmap.h ziplist.h sha1.h +redis-cli.o: redis-cli.c fmacros.h version.h anet.h sds.h adlist.h \ + zmalloc.h linenoise.h +redis.o: redis.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ + zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h release.o: release.c release.h +replication.o: replication.c redis.h fmacros.h config.h ae.h sds.h dict.h \ + adlist.h zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h sds.o: sds.c sds.h zmalloc.h sha1.o: sha1.c sha1.h +sort.o: sort.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ + zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h pqsort.h +t_hash.o: t_hash.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ + zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h +t_list.o: t_list.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ + zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h +t_set.o: t_set.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ + zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h +t_string.o: t_string.c redis.h fmacros.h config.h ae.h sds.h dict.h \ + adlist.h zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h +t_zset.o: t_zset.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ + zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h +util.o: util.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ + zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h +vm.o: vm.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ + zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h ziplist.o: ziplist.c zmalloc.h ziplist.h zipmap.o: zipmap.c zmalloc.h -intset.o: intset.c zmalloc.h zmalloc.o: zmalloc.c config.h redis-server: $(OBJ) From e0e1c195202dd74ab22554dd4293672cc95368ee Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 30 Aug 2010 11:51:45 +0200 Subject: [PATCH 076/139] Fixed MONITOR mode and Issue 296 --- src/db.c | 6 +++--- src/redis-cli.c | 5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/db.c b/src/db.c index 6d287d72..f380bf6e 100644 --- a/src/db.c +++ b/src/db.c @@ -221,19 +221,19 @@ void keysCommand(redisClient *c) { dictIterator *di; dictEntry *de; sds pattern = c->argv[1]->ptr; - int plen = sdslen(pattern); + int plen = sdslen(pattern), allkeys; unsigned long numkeys = 0; robj *lenobj = createObject(REDIS_STRING,NULL); di = dictGetIterator(c->db->dict); addReply(c,lenobj); decrRefCount(lenobj); + allkeys = (pattern[0] == '*' && pattern[1] == '\0'); while((de = dictNext(di)) != NULL) { sds key = dictGetEntryKey(de); robj *keyobj; - if ((pattern[0] == '*' && pattern[1] == '\0') || - stringmatchlen(pattern,plen,key,sdslen(key),0)) { + if (allkeys || stringmatchlen(pattern,plen,key,sdslen(key),0)) { keyobj = createStringObject(key,sdslen(key)); if (expireIfNeeded(c->db,keyobj) == 0) { addReplyBulk(c,keyobj); diff --git a/src/redis-cli.c b/src/redis-cli.c index 8b7d0777..3a6b0a90 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -96,7 +96,7 @@ static sds cliReadLine(int fd) { ssize_t ret; ret = read(fd,&c,1); - if (ret == -1) { + if (ret <= 0) { sdsfree(line); return NULL; } else if ((ret == 0) || (c == '\n')) { @@ -282,7 +282,8 @@ static int cliSendCommand(int argc, char **argv, int repeat) { while(repeat--) { anetWrite(fd,cmd,sdslen(cmd)); while (config.monitor_mode) { - cliReadSingleLineReply(fd,0); + if (cliReadSingleLineReply(fd,0)) exit(1); + printf("\n"); } if (config.pubsub_mode) { From 93b2a7718eefd73cdd1b3b221cfc38be83fca0b5 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 30 Aug 2010 15:36:13 +0200 Subject: [PATCH 077/139] It is now possible to use authentication and DB selection options at the same time in redis-cli (Issue 298) --- src/redis-cli.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/redis-cli.c b/src/redis-cli.c index 3a6b0a90..0b2fd0c1 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -478,10 +478,16 @@ int main(int argc, char **argv) { if (config.auth != NULL) { char *authargv[2]; + int dbnum = config.dbnum; + /* We need to save the real configured database number and set it to + * zero here, otherwise cliSendCommand() will try to perform the + * SELECT command before the authentication, and it will fail. */ + config.dbnum = 0; authargv[0] = "AUTH"; authargv[1] = config.auth; cliSendCommand(2, convertToSds(2, authargv), 1); + config.dbnum = dbnum; /* restore the right DB number */ } /* Start interactive mode when no command is provided */ From 8079656a8ea7c379815366c6f89f9954e86a57be Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 30 Aug 2010 15:57:03 +0200 Subject: [PATCH 078/139] Now redis-cli replies to help showing some basic usage information (Issue 291) --- src/redis-cli.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/redis-cli.c b/src/redis-cli.c index 0b2fd0c1..761c025e 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -251,12 +251,32 @@ static int selectDb(int fd) { return 0; } +static void showInteractiveHelp(void) { + printf( + "\n" + "Welcome to redis-cli " REDIS_VERSION "!\n" + "Just type any valid Redis command to see a pretty printed output.\n" + "\n" + "It is possible to quote strings, like in:\n" + " set \"my key\" \"some string \\xff\\n\"\n" + "\n" + "You can find a list of valid Redis commands at\n" + " http://code.google.com/p/redis/wiki/CommandReference\n" + "\n" + "Note: redis-cli supports line editing, use up/down arrows for history." + "\n\n"); +} + static int cliSendCommand(int argc, char **argv, int repeat) { char *command = argv[0]; int fd, j, retval = 0; sds cmd; config.raw_output = !strcasecmp(command,"info"); + if (!strcasecmp(command,"help")) { + showInteractiveHelp(); + return 0; + } if (!strcasecmp(command,"shutdown")) config.shutdown = 1; if (!strcasecmp(command,"monitor")) config.monitor_mode = 1; if (!strcasecmp(command,"subscribe") || From fb92ecece75ec48efb927fa6c2f2f86a58f73609 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 30 Aug 2010 16:31:03 +0200 Subject: [PATCH 079/139] BLPOP inside MULTI/EXEC block no longer crashes, instead if the list is empty the behavior is like if the timeout is reached. This fixes Issue 285 --- src/t_list.c | 7 +++++++ tests/unit/type/list.tcl | 11 +++++++++++ 2 files changed, 18 insertions(+) diff --git a/src/t_list.c b/src/t_list.c index 2a981033..6b4a611e 100644 --- a/src/t_list.c +++ b/src/t_list.c @@ -823,6 +823,13 @@ void blockingPopGenericCommand(redisClient *c, int where) { } } } + /* If we are inside a MULTI/EXEC and the list is empty the only thing + * we can do is treating it as a timeout (even with timeout 0). */ + if (c->flags & REDIS_MULTI) { + addReply(c,shared.nullmultibulk); + return; + } + /* If the list is empty or the key does not exists we must block */ timeout = strtol(c->argv[c->argc-1]->ptr,NULL,10); if (timeout > 0) timeout += time(NULL); diff --git a/tests/unit/type/list.tcl b/tests/unit/type/list.tcl index d3ed90ec..ca0da764 100644 --- a/tests/unit/type/list.tcl +++ b/tests/unit/type/list.tcl @@ -172,6 +172,17 @@ start_server { } } + test {BLPOP inside a transaction} { + r del xlist + r lpush xlist foo + r lpush xlist bar + r multi + r blpop xlist 0 + r blpop xlist 0 + r blpop xlist 0 + r exec + } {{xlist bar} {xlist foo} {}} + test {LPUSHX, RPUSHX - generic} { r del xlist assert_equal 0 [r lpushx xlist a] From 834ef78e27a8690a91d727259aaece611664a368 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Mon, 30 Aug 2010 14:44:34 +0200 Subject: [PATCH 080/139] Refactor reply buildup for speed on large multi bulk replies --- src/networking.c | 239 +++++++++++++++++++++++++++-------------------- src/object.c | 1 + src/redis.h | 15 +++ 3 files changed, 156 insertions(+), 99 deletions(-) diff --git a/src/networking.c b/src/networking.c index a39be7c4..da0cd0a1 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1,5 +1,4 @@ #include "redis.h" - #include void *dupClientReplyValue(void *o) { @@ -12,7 +11,16 @@ int listMatchObjects(void *a, void *b) { } redisClient *createClient(int fd) { - redisClient *c = zmalloc(sizeof(*c)); + redisClient *c; + + /* Make sure to allocate a multiple of the page size to prevent wasting + * memory. A page size of 4096 is assumed here. We need to compensate + * for the zmalloc overhead of sizeof(size_t) bytes. */ + size_t size = 8192-sizeof(size_t); + redisAssert(size > sizeof(redisClient)); + c = zmalloc(size); + c->buflen = size-sizeof(redisClient); + c->bufpos = 0; anetNonBlock(NULL,fd); anetTcpNoDelay(NULL,fd); @@ -53,70 +61,118 @@ redisClient *createClient(int fd) { return c; } -void addReply(redisClient *c, robj *obj) { - if (listLength(c->reply) == 0 && +int _ensureFileEvent(redisClient *c) { + if (c->bufpos == 0 && listLength(c->reply) == 0 && (c->replstate == REDIS_REPL_NONE || c->replstate == REDIS_REPL_ONLINE) && aeCreateFileEvent(server.el, c->fd, AE_WRITABLE, - sendReplyToClient, c) == AE_ERR) return; + sendReplyToClient, c) == AE_ERR) return REDIS_ERR; + return REDIS_OK; +} - if (server.vm_enabled && obj->storage != REDIS_VM_MEMORY) { - obj = dupStringObject(obj); - obj->refcount = 0; /* getDecodedObject() will increment the refcount */ +void _addReplyObjectToList(redisClient *c, robj *obj) { + redisAssert(obj->type == REDIS_STRING && + obj->encoding == REDIS_ENCODING_RAW); + listAddNodeTail(c->reply,obj); +} + +void _ensureBufferInReplyList(redisClient *c) { + sds buffer = sdsnewlen(NULL,REDIS_REPLY_CHUNK_SIZE); + sdsupdatelen(buffer); /* sdsnewlen expects non-empty string */ + listAddNodeTail(c->reply,createObject(REDIS_REPLY_NODE,buffer)); +} + +void _addReplyStringToBuffer(redisClient *c, char *s, size_t len) { + size_t available = 0; + redisAssert(len < REDIS_REPLY_CHUNK_THRESHOLD); + if (listLength(c->reply) > 0) { + robj *o = listNodeValue(listLast(c->reply)); + + /* Make sure to append to a reply node with enough bytes available. */ + if (o->type == REDIS_REPLY_NODE) available = sdsavail(o->ptr); + if (o->type != REDIS_REPLY_NODE || len > available) { + _ensureBufferInReplyList(c); + _addReplyStringToBuffer(c,s,len); + } else { + o->ptr = sdscatlen(o->ptr,s,len); + } + } else { + available = c->buflen-c->bufpos; + if (len > available) { + _ensureBufferInReplyList(c); + _addReplyStringToBuffer(c,s,len); + } else { + memcpy(c->buf+c->bufpos,s,len); + c->bufpos += len; + } + } +} + +void addReply(redisClient *c, robj *obj) { + if (_ensureFileEvent(c) != REDIS_OK) return; + if (server.vm_enabled && obj->storage != REDIS_VM_MEMORY) { + /* Returns a new object with refcount 1 */ + obj = dupStringObject(obj); + } else { + /* This increments the refcount. */ + obj = getDecodedObject(obj); + } + + if (sdslen(obj->ptr) < REDIS_REPLY_CHUNK_THRESHOLD) { + _addReplyStringToBuffer(c,obj->ptr,sdslen(obj->ptr)); + decrRefCount(obj); + } else { + _addReplyObjectToList(c,obj); } - listAddNodeTail(c->reply,getDecodedObject(obj)); } void addReplySds(redisClient *c, sds s) { - robj *o = createObject(REDIS_STRING,s); - addReply(c,o); - decrRefCount(o); + if (_ensureFileEvent(c) != REDIS_OK) return; + if (sdslen(s) < REDIS_REPLY_CHUNK_THRESHOLD) { + _addReplyStringToBuffer(c,s,sdslen(s)); + sdsfree(s); + } else { + _addReplyObjectToList(c,createObject(REDIS_STRING,s)); + } +} + +void addReplyString(redisClient *c, char *s, size_t len) { + if (_ensureFileEvent(c) != REDIS_OK) return; + if (len < REDIS_REPLY_CHUNK_THRESHOLD) { + _addReplyStringToBuffer(c,s,len); + } else { + _addReplyObjectToList(c,createStringObject(s,len)); + } } void addReplyDouble(redisClient *c, double d) { - char buf[128]; - - snprintf(buf,sizeof(buf),"%.17g",d); - addReplySds(c,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n", - (unsigned long) strlen(buf),buf)); + char dbuf[128], sbuf[128]; + int dlen, slen; + dlen = snprintf(dbuf,sizeof(dbuf),"%.17g",d); + slen = snprintf(sbuf,sizeof(sbuf),"$%d\r\n%s\r\n",dlen,dbuf); + addReplyString(c,sbuf,slen); } -void addReplyLongLong(redisClient *c, long long ll) { +void _addReplyLongLong(redisClient *c, long long ll, char prefix) { char buf[128]; - size_t len; - - if (ll == 0) { - addReply(c,shared.czero); - return; - } else if (ll == 1) { - addReply(c,shared.cone); - return; - } - buf[0] = ':'; + int len; + buf[0] = prefix; len = ll2string(buf+1,sizeof(buf)-1,ll); buf[len+1] = '\r'; buf[len+2] = '\n'; - addReplySds(c,sdsnewlen(buf,len+3)); + addReplyString(c,buf,len+3); +} + +void addReplyLongLong(redisClient *c, long long ll) { + _addReplyLongLong(c,ll,':'); } void addReplyUlong(redisClient *c, unsigned long ul) { - char buf[128]; - size_t len; - - if (ul == 0) { - addReply(c,shared.czero); - return; - } else if (ul == 1) { - addReply(c,shared.cone); - return; - } - len = snprintf(buf,sizeof(buf),":%lu\r\n",ul); - addReplySds(c,sdsnewlen(buf,len)); + _addReplyLongLong(c,(long long)ul,':'); } void addReplyBulkLen(redisClient *c, robj *obj) { - size_t len, intlen; - char buf[128]; + size_t len; if (obj->encoding == REDIS_ENCODING_RAW) { len = sdslen(obj->ptr); @@ -133,11 +189,7 @@ void addReplyBulkLen(redisClient *c, robj *obj) { len++; } } - buf[0] = '$'; - intlen = ll2string(buf+1,sizeof(buf)-1,(long long)len); - buf[intlen+1] = '\r'; - buf[intlen+2] = '\n'; - addReplySds(c,sdsnewlen(buf,intlen+3)); + _addReplyLongLong(c,len,'$'); } void addReplyBulk(redisClient *c, robj *obj) { @@ -287,34 +339,6 @@ void freeClient(redisClient *c) { zfree(c); } -#define GLUEREPLY_UP_TO (1024) -static void glueReplyBuffersIfNeeded(redisClient *c) { - int copylen = 0; - char buf[GLUEREPLY_UP_TO]; - listNode *ln; - listIter li; - robj *o; - - listRewind(c->reply,&li); - while((ln = listNext(&li))) { - int objlen; - - o = ln->value; - objlen = sdslen(o->ptr); - if (copylen + objlen <= GLUEREPLY_UP_TO) { - memcpy(buf+copylen,o->ptr,objlen); - copylen += objlen; - listDelNode(c->reply,ln); - } else { - if (copylen == 0) return; - break; - } - } - /* Now the output buffer is empty, add the new single element */ - o = createObject(REDIS_STRING,sdsnewlen(buf,copylen)); - listAddNodeHead(c->reply,o); -} - void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) { redisClient *c = privdata; int nwritten = 0, totwritten = 0, objlen; @@ -331,31 +355,48 @@ void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) { return; } - while(listLength(c->reply)) { - if (server.glueoutputbuf && listLength(c->reply) > 1) - glueReplyBuffersIfNeeded(c); + while(c->bufpos > 0 || listLength(c->reply)) { + if (c->bufpos > 0) { + if (c->flags & REDIS_MASTER) { + /* Don't reply to a master */ + nwritten = c->bufpos - c->sentlen; + } else { + nwritten = write(fd,c->buf+c->sentlen,c->bufpos-c->sentlen); + if (nwritten <= 0) break; + } + c->sentlen += nwritten; + totwritten += nwritten; - o = listNodeValue(listFirst(c->reply)); - objlen = sdslen(o->ptr); - - if (objlen == 0) { - listDelNode(c->reply,listFirst(c->reply)); - continue; - } - - if (c->flags & REDIS_MASTER) { - /* Don't reply to a master */ - nwritten = objlen - c->sentlen; + /* If the buffer was sent, set bufpos to zero to continue with + * the remainder of the reply. */ + if (c->sentlen == c->bufpos) { + c->bufpos = 0; + c->sentlen = 0; + } } else { - nwritten = write(fd, ((char*)o->ptr)+c->sentlen, objlen - c->sentlen); - if (nwritten <= 0) break; - } - c->sentlen += nwritten; - totwritten += nwritten; - /* If we fully sent the object on head go to the next one */ - if (c->sentlen == objlen) { - listDelNode(c->reply,listFirst(c->reply)); - c->sentlen = 0; + o = listNodeValue(listFirst(c->reply)); + objlen = sdslen(o->ptr); + + if (objlen == 0) { + listDelNode(c->reply,listFirst(c->reply)); + continue; + } + + if (c->flags & REDIS_MASTER) { + /* Don't reply to a master */ + nwritten = objlen - c->sentlen; + } else { + nwritten = write(fd, ((char*)o->ptr)+c->sentlen,objlen-c->sentlen); + if (nwritten <= 0) break; + } + c->sentlen += nwritten; + totwritten += nwritten; + + /* If we fully sent the object on head go to the next one */ + if (c->sentlen == objlen) { + listDelNode(c->reply,listFirst(c->reply)); + c->sentlen = 0; + } } /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT * bytes, in a single threaded server it's a good idea to serve diff --git a/src/object.c b/src/object.c index 92af1d6a..5e8dbfa2 100644 --- a/src/object.c +++ b/src/object.c @@ -196,6 +196,7 @@ void decrRefCount(void *obj) { case REDIS_SET: freeSetObject(o); break; case REDIS_ZSET: freeZsetObject(o); break; case REDIS_HASH: freeHashObject(o); break; + case REDIS_REPLY_NODE: freeStringObject(o); break; default: redisPanic("Unknown object type"); break; } o->ptr = NULL; /* defensive programming. We'll see NULL in traces. */ diff --git a/src/redis.h b/src/redis.h index 9e27d724..e2f69454 100644 --- a/src/redis.h +++ b/src/redis.h @@ -48,6 +48,15 @@ #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */ #define REDIS_SHARED_INTEGERS 10000 +/* Size of a reply chunk, configured to exactly allocate 4k bytes */ +#define REDIS_REPLY_CHUNK_BYTES (4*1024) +#define REDIS_REPLY_CHUNK_SIZE (REDIS_REPLY_CHUNK_BYTES-sizeof(struct sdshdr)-1-sizeof(size_t)) +/* It doesn't make sense to memcpy objects to a chunk when the net result is + * not being able to glue other objects. We want to make sure it can be glued + * to at least a bulk length or \r\n, so set the threshold to be a couple + * of bytes less than the size of the buffer. */ +#define REDIS_REPLY_CHUNK_THRESHOLD (REDIS_REPLY_CHUNK_SIZE-16) + /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */ #define REDIS_WRITEV_THRESHOLD 3 /* Max number of iovecs used for each writev call */ @@ -72,6 +81,7 @@ #define REDIS_SET 2 #define REDIS_ZSET 3 #define REDIS_HASH 4 +#define REDIS_REPLY_NODE 5 #define REDIS_VMPOINTER 8 /* Objects encoding. Some kind of objects like Strings and Hashes can be @@ -309,6 +319,11 @@ typedef struct redisClient { list *watched_keys; /* Keys WATCHED for MULTI/EXEC CAS */ dict *pubsub_channels; /* channels a client is interested in (SUBSCRIBE) */ list *pubsub_patterns; /* patterns a client is interested in (SUBSCRIBE) */ + + /* Response buffer */ + int bufpos; + int buflen; + char buf[]; } redisClient; struct saveparam { From b301c1fc2bbf977a7d9fd4718cd9914113541c75 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Mon, 30 Aug 2010 16:02:06 +0200 Subject: [PATCH 081/139] Wrapper for adding unknown multi bulk length to reply list --- src/config.c | 7 ++----- src/db.c | 6 ++---- src/networking.c | 28 ++++++++++++++++++++++++++++ src/redis.h | 2 ++ src/t_hash.c | 11 ++++------- src/t_set.c | 9 ++++----- src/t_zset.c | 12 +++++------- 7 files changed, 47 insertions(+), 28 deletions(-) diff --git a/src/config.c b/src/config.c index e1b743db..5c449886 100644 --- a/src/config.c +++ b/src/config.c @@ -332,13 +332,10 @@ badfmt: /* Bad format errors */ void configGetCommand(redisClient *c) { robj *o = getDecodedObject(c->argv[2]); - robj *lenobj = createObject(REDIS_STRING,NULL); + void *replylen = addDeferredMultiBulkLength(c); char *pattern = o->ptr; int matches = 0; - addReply(c,lenobj); - decrRefCount(lenobj); - if (stringmatch(pattern,"dbfilename",0)) { addReplyBulkCString(c,"dbfilename"); addReplyBulkCString(c,server.dbfilename); @@ -410,7 +407,7 @@ void configGetCommand(redisClient *c) { matches++; } decrRefCount(o); - lenobj->ptr = sdscatprintf(sdsempty(),"*%d\r\n",matches*2); + setDeferredMultiBulkLength(c,replylen,matches*2); } void configCommand(redisClient *c) { diff --git a/src/db.c b/src/db.c index 6d287d72..8c6c6bc8 100644 --- a/src/db.c +++ b/src/db.c @@ -223,11 +223,9 @@ void keysCommand(redisClient *c) { sds pattern = c->argv[1]->ptr; int plen = sdslen(pattern); unsigned long numkeys = 0; - robj *lenobj = createObject(REDIS_STRING,NULL); + void *replylen = addDeferredMultiBulkLength(c); di = dictGetIterator(c->db->dict); - addReply(c,lenobj); - decrRefCount(lenobj); while((de = dictNext(di)) != NULL) { sds key = dictGetEntryKey(de); robj *keyobj; @@ -243,7 +241,7 @@ void keysCommand(redisClient *c) { } } dictReleaseIterator(di); - lenobj->ptr = sdscatprintf(sdsempty(),"*%lu\r\n",numkeys); + setDeferredMultiBulkLength(c,replylen,numkeys); } void dbsizeCommand(redisClient *c) { diff --git a/src/networking.c b/src/networking.c index da0cd0a1..464d5e02 100644 --- a/src/networking.c +++ b/src/networking.c @@ -145,6 +145,34 @@ void addReplyString(redisClient *c, char *s, size_t len) { } } +/* Adds an empty object to the reply list that will contain the multi bulk + * length, which is not known when this function is called. */ +void *addDeferredMultiBulkLength(redisClient *c) { + if (_ensureFileEvent(c) != REDIS_OK) return NULL; + _addReplyObjectToList(c,createObject(REDIS_STRING,NULL)); + return listLast(c->reply); +} + +/* Populate the length object and try glueing it to the next chunk. */ +void setDeferredMultiBulkLength(redisClient *c, void *node, long length) { + listNode *ln = (listNode*)node; + robj *len, *next; + + /* Abort when *node is NULL (see addDeferredMultiBulkLength). */ + if (node == NULL) return; + + len = listNodeValue(ln); + len->ptr = sdscatprintf(sdsempty(),"*%ld\r\n",length); + if (ln->next != NULL) { + next = listNodeValue(ln->next); + /* Only glue when the next node is a reply chunk. */ + if (next->type == REDIS_REPLY_NODE) { + len->ptr = sdscatlen(len->ptr,next->ptr,sdslen(next->ptr)); + listDelNode(c->reply,ln->next); + } + } +} + void addReplyDouble(redisClient *c, double d) { char dbuf[128], sbuf[128]; int dlen, slen; diff --git a/src/redis.h b/src/redis.h index e2f69454..752d56c3 100644 --- a/src/redis.h +++ b/src/redis.h @@ -603,6 +603,8 @@ void resetClient(redisClient *c); void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask); void sendReplyToClientWritev(aeEventLoop *el, int fd, void *privdata, int mask); void addReply(redisClient *c, robj *obj); +void *addDeferredMultiBulkLength(redisClient *c); +void setDeferredMultiBulkLength(redisClient *c, void *node, long length); void addReplySds(redisClient *c, sds s); void processInputBuffer(redisClient *c); void acceptHandler(aeEventLoop *el, int fd, void *privdata, int mask); diff --git a/src/t_hash.c b/src/t_hash.c index b6be284f..c8be72f2 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -350,17 +350,15 @@ void hlenCommand(redisClient *c) { } void genericHgetallCommand(redisClient *c, int flags) { - robj *o, *lenobj, *obj; + robj *o, *obj; unsigned long count = 0; hashTypeIterator *hi; + void *replylen = NULL; if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.emptymultibulk)) == NULL || checkType(c,o,REDIS_HASH)) return; - lenobj = createObject(REDIS_STRING,NULL); - addReply(c,lenobj); - decrRefCount(lenobj); - + replylen = addDeferredMultiBulkLength(c); hi = hashTypeInitIterator(o); while (hashTypeNext(hi) != REDIS_ERR) { if (flags & REDIS_HASH_KEY) { @@ -377,8 +375,7 @@ void genericHgetallCommand(redisClient *c, int flags) { } } hashTypeReleaseIterator(hi); - - lenobj->ptr = sdscatprintf(sdsempty(),"*%lu\r\n",count); + setDeferredMultiBulkLength(c,replylen,count); } void hkeysCommand(redisClient *c) { diff --git a/src/t_set.c b/src/t_set.c index 68e13227..d6041e72 100644 --- a/src/t_set.c +++ b/src/t_set.c @@ -320,7 +320,8 @@ int qsortCompareSetsByCardinality(const void *s1, const void *s2) { void sinterGenericCommand(redisClient *c, robj **setkeys, unsigned long setnum, robj *dstkey) { robj **sets = zmalloc(sizeof(robj*)*setnum); setTypeIterator *si; - robj *ele, *lenobj = NULL, *dstset = NULL; + robj *ele, *dstset = NULL; + void *replylen = NULL; unsigned long j, cardinality = 0; for (j = 0; j < setnum; j++) { @@ -356,9 +357,7 @@ void sinterGenericCommand(redisClient *c, robj **setkeys, unsigned long setnum, * to the output list and save the pointer to later modify it with the * right length */ if (!dstkey) { - lenobj = createObject(REDIS_STRING,NULL); - addReply(c,lenobj); - decrRefCount(lenobj); + replylen = addDeferredMultiBulkLength(c); } else { /* If we have a target key where to store the resulting set * create this key with an empty set inside */ @@ -400,7 +399,7 @@ void sinterGenericCommand(redisClient *c, robj **setkeys, unsigned long setnum, touchWatchedKey(c->db,dstkey); server.dirty++; } else { - lenobj->ptr = sdscatprintf(sdsempty(),"*%lu\r\n",cardinality); + setDeferredMultiBulkLength(c,replylen,cardinality); } zfree(sets); } diff --git a/src/t_zset.c b/src/t_zset.c index e93e5c40..d25b1a66 100644 --- a/src/t_zset.c +++ b/src/t_zset.c @@ -866,7 +866,8 @@ void genericZrangebyscoreCommand(redisClient *c, int justcount) { zset *zsetobj = o->ptr; zskiplist *zsl = zsetobj->zsl; zskiplistNode *ln; - robj *ele, *lenobj = NULL; + robj *ele; + void *replylen = NULL; unsigned long rangelen = 0; /* Get the first node with the score >= min, or with @@ -884,11 +885,8 @@ void genericZrangebyscoreCommand(redisClient *c, int justcount) { * are in the list, so we push this object that will represent * the multi-bulk length in the output buffer, and will "fix" * it later */ - if (!justcount) { - lenobj = createObject(REDIS_STRING,NULL); - addReply(c,lenobj); - decrRefCount(lenobj); - } + if (!justcount) + replylen = addDeferredMultiBulkLength(c); while(ln && (maxex ? (ln->score < max) : (ln->score <= max))) { if (offset) { @@ -910,7 +908,7 @@ void genericZrangebyscoreCommand(redisClient *c, int justcount) { if (justcount) { addReplyLongLong(c,(long)rangelen); } else { - lenobj->ptr = sdscatprintf(sdsempty(),"*%lu\r\n", + setDeferredMultiBulkLength(c,replylen, withscores ? (rangelen*2) : rangelen); } } From 57b0738011007e47ebe25d5c81acfe333c561e02 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Mon, 30 Aug 2010 16:51:39 +0200 Subject: [PATCH 082/139] Don't build a reply when replaying the AOF --- src/aof.c | 8 +++++--- src/networking.c | 1 + 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/aof.c b/src/aof.c index 8f2dc96f..1ed2363a 100644 --- a/src/aof.c +++ b/src/aof.c @@ -272,12 +272,14 @@ int loadAppendOnlyFile(char *filename) { fakeClient->argc = argc; fakeClient->argv = argv; cmd->proc(fakeClient); - /* Discard the reply objects list from the fake client */ - while(listLength(fakeClient->reply)) - listDelNode(fakeClient->reply,listFirst(fakeClient->reply)); + + /* The fake client should not have a reply */ + redisAssert(fakeClient->bufpos == 0 && listLength(fakeClient->reply) == 0); + /* Clean up, ready for the next command */ for (j = 0; j < argc; j++) decrRefCount(argv[j]); zfree(argv); + /* Handle swapping while loading big datasets when VM is on */ force_swapout = 0; if ((zmalloc_used_memory() - server.vm_max_memory) > 1024*1024*32) diff --git a/src/networking.c b/src/networking.c index 464d5e02..971cbfc1 100644 --- a/src/networking.c +++ b/src/networking.c @@ -62,6 +62,7 @@ redisClient *createClient(int fd) { } int _ensureFileEvent(redisClient *c) { + if (c->fd <= 0) return REDIS_ERR; if (c->bufpos == 0 && listLength(c->reply) == 0 && (c->replstate == REDIS_REPL_NONE || c->replstate == REDIS_REPL_ONLINE) && From 1eb13e4913622927b17b9c2922754f864d520710 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Tue, 31 Aug 2010 09:37:25 +0200 Subject: [PATCH 083/139] Fix set tests to make sets have a deterministic encoding --- tests/unit/type/set.tcl | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/tests/unit/type/set.tcl b/tests/unit/type/set.tcl index 056ed27c..0f9f6abe 100644 --- a/tests/unit/type/set.tcl +++ b/tests/unit/type/set.tcl @@ -106,14 +106,17 @@ start_server { } r sadd set5 0 - # it is possible that a hashtable encoded only contains integers, - # because it is converted from an intset to a hashtable when a - # non-integer element is added and then removed. + # To make sure the sets are encoded as the type we are testing -- also + # when the VM is enabled and the values may be swapped in and out + # while the tests are running -- an extra element is added to every + # set that determines its encoding. + set large 200 if {$type eq "hashtable"} { - for {set i 1} {$i <= 5} {incr i} { - r sadd [format "set%d" $i] foo - r srem [format "set%d" $i] foo - } + set large foo + } + + for {set i 1} {$i <= 5} {incr i} { + r sadd [format "set%d" $i] $large } test "Generated sets must be encoded as $type" { @@ -123,20 +126,20 @@ start_server { } test "SINTER with two sets - $type" { - assert_equal {195 196 197 198 199} [lsort [r sinter set1 set2]] + assert_equal [list 195 196 197 198 199 $large] [lsort [r sinter set1 set2]] } test "SINTERSTORE with two sets - $type" { r sinterstore setres set1 set2 - assert_encoding intset setres - assert_equal {195 196 197 198 199} [lsort [r smembers setres]] + assert_encoding $type setres + assert_equal [list 195 196 197 198 199 $large] [lsort [r smembers setres]] } test "SINTERSTORE with two sets, after a DEBUG RELOAD - $type" { r debug reload r sinterstore setres set1 set2 - assert_encoding intset setres - assert_equal {195 196 197 198 199} [lsort [r smembers setres]] + assert_encoding $type setres + assert_equal [list 195 196 197 198 199 $large] [lsort [r smembers setres]] } test "SUNION with two sets - $type" { @@ -146,18 +149,18 @@ start_server { test "SUNIONSTORE with two sets - $type" { r sunionstore setres set1 set2 - assert_encoding intset setres + assert_encoding $type setres set expected [lsort -uniq "[r smembers set1] [r smembers set2]"] assert_equal $expected [lsort [r smembers setres]] } test "SINTER against three sets - $type" { - assert_equal {195 199} [lsort [r sinter set1 set2 set3]] + assert_equal [list 195 199 $large] [lsort [r sinter set1 set2 set3]] } test "SINTERSTORE with three sets - $type" { r sinterstore setres set1 set2 set3 - assert_equal {195 199} [r smembers setres] + assert_equal [list 195 199 $large] [lsort [r smembers setres]] } test "SUNION with non existing keys - $type" { @@ -175,7 +178,9 @@ start_server { test "SDIFFSTORE with three sets - $type" { r sdiffstore setres set1 set4 set5 - assert_encoding intset setres + # The type is determined by type of the first key to diff against. + # See the implementation for more information. + assert_encoding $type setres assert_equal {1 2 3 4} [lsort [r smembers setres]] } } From 7b30cc3a7bed6ea1d5b4131f977d554d78791bf7 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Tue, 31 Aug 2010 10:21:35 +0200 Subject: [PATCH 084/139] Fix issue 300 by upgrading variable types to 64-bit --- src/redis-check-dump.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/src/redis-check-dump.c b/src/redis-check-dump.c index 0b002790..a7e85973 100644 --- a/src/redis-check-dump.c +++ b/src/redis-check-dump.c @@ -65,8 +65,8 @@ /* data type to hold offset in file and size */ typedef struct { void *data; - unsigned long size; - unsigned long offset; + uint64_t size; + uint64_t offset; } pos; static unsigned char level = 0; @@ -77,8 +77,8 @@ static pos positions[16]; /* Hold a stack of errors */ typedef struct { char error[16][1024]; - unsigned long offset[16]; - unsigned int level; + uint64_t offset[16]; + uint32_t level; } errors_t; static errors_t errors; @@ -494,15 +494,15 @@ void printCentered(int indent, int width, char* body) { printf("%s %s %s\n", head, body, tail); } -void printValid(int ops, int bytes) { +void printValid(uint64_t ops, uint64_t bytes) { char body[80]; - sprintf(body, "Processed %d valid opcodes (in %d bytes)", ops, bytes); + sprintf(body, "Processed %llu valid opcodes (in %llu bytes)", ops, bytes); printCentered(4, 80, body); } -void printSkipped(int bytes, int offset) { +void printSkipped(uint64_t bytes, uint64_t offset) { char body[80]; - sprintf(body, "Skipped %d bytes (resuming at 0x%08x)", bytes, offset); + sprintf(body, "Skipped %llu bytes (resuming at 0x%08llx)", bytes, offset); printCentered(4, 80, body); } @@ -536,12 +536,12 @@ void printErrorStack(entry *e) { /* display error stack */ for (i = 0; i < errors.level; i++) { - printf("0x%08lx - %s\n", errors.offset[i], errors.error[i]); + printf("0x%08llx - %s\n", errors.offset[i], errors.error[i]); } } void process() { - int i, num_errors = 0, num_valid_ops = 0, num_valid_bytes = 0; + uint64_t num_errors = 0, num_valid_ops = 0, num_valid_bytes = 0; entry entry; processHeader(); @@ -558,7 +558,9 @@ void process() { num_valid_bytes = 0; /* search for next valid entry */ - unsigned long offset = positions[0].offset + 1; + uint64_t offset = positions[0].offset + 1; + int i = 0; + while (!entry.success && offset < positions[0].size) { positions[1].offset = offset; @@ -606,9 +608,9 @@ void process() { } /* print summary on errors */ - if (num_errors > 0) { + if (num_errors) { printf("\n"); - printf("Total unprocessable opcodes: %d\n", num_errors); + printf("Total unprocessable opcodes: %llu\n", num_errors); } } @@ -620,7 +622,7 @@ int main(int argc, char **argv) { } int fd; - unsigned long size; + size_t size; struct stat stat; void *data; From 08f55b786b58b3e0f8310e02683a0e1761c5b6bf Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 31 Aug 2010 11:17:06 +0200 Subject: [PATCH 085/139] faster server starting in Redis tests --- tests/support/server.tcl | 29 +++++++++++++++++++++++++---- tests/unit/other.tcl | 2 +- tests/unit/protocol.tcl | 2 +- 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/tests/support/server.tcl b/tests/support/server.tcl index 24fef467..e5ca6c6c 100644 --- a/tests/support/server.tcl +++ b/tests/support/server.tcl @@ -83,7 +83,9 @@ proc ping_server {host port} { } close $fd } e]} { - puts "Can't PING server at $host:$port... $e" + puts -nonewline "." + } else { + puts -nonewline "ok" } return $retval } @@ -170,14 +172,33 @@ proc start_server {options {code undefined}} { if {$::valgrind} { exec valgrind src/redis-server $config_file > $stdout 2> $stderr & - after 2000 } else { exec src/redis-server $config_file > $stdout 2> $stderr & - after 500 } # check that the server actually started - if {$code ne "undefined" && ![ping_server $::host $::port]} { + # ugly but tries to be as fast as possible... + set retrynum 20 + set serverisup 0 + + puts -nonewline "=== ($tags) Starting server ${::host}:${::port} " + after 10 + if {$code ne "undefined"} { + while {[incr retrynum -1]} { + catch { + if {[ping_server $::host $::port]} { + set serverisup 1 + } + } + if {$serverisup} break + after 50 + } + } else { + set serverisup 1 + } + puts {} + + if {!$serverisup} { error_and_quit $config_file [exec cat $stderr] } diff --git a/tests/unit/other.tcl b/tests/unit/other.tcl index f0497b62..5967c722 100644 --- a/tests/unit/other.tcl +++ b/tests/unit/other.tcl @@ -1,4 +1,4 @@ -start_server {} { +start_server {tags {"other"}} { test {SAVE - make sure there are all the types as values} { # Wait for a background saving in progress to terminate waitForBgsave r diff --git a/tests/unit/protocol.tcl b/tests/unit/protocol.tcl index 9eebf77f..5bf42d7f 100644 --- a/tests/unit/protocol.tcl +++ b/tests/unit/protocol.tcl @@ -1,4 +1,4 @@ -start_server {} { +start_server {tags {"protocol"}} { test {Handle an empty query well} { set fd [r channel] puts -nonewline $fd "\r\n" From d320764706cce1b4339043eb2ee9240d5fe3f2d2 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 31 Aug 2010 11:42:52 +0200 Subject: [PATCH 086/139] We finally have an half decent README! (Issue 277) --- README | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 68 insertions(+), 1 deletion(-) diff --git a/README b/README index a810a7c0..5eeabf74 100644 --- a/README +++ b/README @@ -1 +1,68 @@ -Check the 'doc' directory. doc/README.html is a good starting point :) +Where to find complete Redis documentation? +------------------------------------------- + +This README is just a fast "quick start" document. You can find more detailed +documentation here: + +1) http://code.google.com/p/redis +2) Check the 'doc' directory. doc/README.html is a good starting point :) + +Building Redis +-------------- + +It is as simple as: + + % make + +Redis is just a single binary, but if you want to install it you can use +the "make install" target that will copy the binary in /usr/local/bin +for default. + +You can run a 32 bit Redis binary using: + + % make 32bit + +After you build Redis is a good idea to test it, using: + + % make test + +Running Redis +------------- + +To run Redis with the default configuration just type: + + % cd src + % ./redis-server + +If you want to provide your redis.conf, you have to run it using an additional +parameter (the path of the configuration file): + + % cd src + % ./redis-server /path/to/redis.conf + +Playing with Redis +------------------ + +You can use redis-cli to play with Redis. Start a redis-server instance, +then in another terminal try the following: + + % cd src + % ./redis-cli + redis> ping + PONG + redis> set foo bar + OK + redis> get foo + "bar" + redis> incr mycounter + (integer) 1 + redis> incr mycounter + (integer) 2 + redis> + +You can find the list of all the available commands here: + + http://code.google.com/p/redis/wiki/CommandReference + +Enjoy! + From f85202c3dc6213bffdc3ccf998e4ea85a36ad9a8 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Tue, 31 Aug 2010 13:06:26 +0200 Subject: [PATCH 087/139] Fix compilation errors and add warning for 32-bit platforms --- src/redis-check-dump.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/redis-check-dump.c b/src/redis-check-dump.c index a7e85973..959ecf8d 100644 --- a/src/redis-check-dump.c +++ b/src/redis-check-dump.c @@ -65,8 +65,8 @@ /* data type to hold offset in file and size */ typedef struct { void *data; - uint64_t size; - uint64_t offset; + size_t size; + size_t offset; } pos; static unsigned char level = 0; @@ -77,8 +77,8 @@ static pos positions[16]; /* Hold a stack of errors */ typedef struct { char error[16][1024]; - uint64_t offset[16]; - uint32_t level; + size_t offset[16]; + size_t level; } errors_t; static errors_t errors; @@ -112,7 +112,7 @@ int readBytes(void *target, long num) { if (p.offset + num > p.size) { return 0; } else { - memcpy(target, (void*)((unsigned long)p.data + p.offset), num); + memcpy(target, (void*)((size_t)p.data + p.offset), num); if (!peek) positions[level].offset += num; } return 1; @@ -536,7 +536,7 @@ void printErrorStack(entry *e) { /* display error stack */ for (i = 0; i < errors.level; i++) { - printf("0x%08llx - %s\n", errors.offset[i], errors.error[i]); + printf("0x%08lx - %s\n", errors.offset[i], errors.error[i]); } } @@ -622,7 +622,7 @@ int main(int argc, char **argv) { } int fd; - size_t size; + off_t size; struct stat stat; void *data; @@ -636,6 +636,10 @@ int main(int argc, char **argv) { size = stat.st_size; } + if (sizeof(size_t) == sizeof(int32_t) && size >= INT_MAX) { + ERROR("Cannot check dump files >2GB on a 32-bit platform\n"); + } + data = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0); if (data == MAP_FAILED) { ERROR("Cannot mmap: %s\n", argv[1]); From dbebd395ebc897275ec84edb143b1067aef8d882 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 31 Aug 2010 18:34:34 +0200 Subject: [PATCH 088/139] Version is now 2.1.4 -- AKA 2.2-alpha1 --- src/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/version.h b/src/version.h index b570fe04..80decef1 100644 --- a/src/version.h +++ b/src/version.h @@ -1 +1 @@ -#define REDIS_VERSION "2.1.3" +#define REDIS_VERSION "2.1.4" From a047bf52a4fed963a434c6e7e7376880cf17abcf Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 1 Sep 2010 18:31:30 +0200 Subject: [PATCH 089/139] fixed a few harmless warnings complining on Linux --- src/redis-check-dump.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/redis-check-dump.c b/src/redis-check-dump.c index 959ecf8d..987e1db3 100644 --- a/src/redis-check-dump.c +++ b/src/redis-check-dump.c @@ -496,13 +496,15 @@ void printCentered(int indent, int width, char* body) { void printValid(uint64_t ops, uint64_t bytes) { char body[80]; - sprintf(body, "Processed %llu valid opcodes (in %llu bytes)", ops, bytes); + sprintf(body, "Processed %llu valid opcodes (in %llu bytes)", + (unsigned long long) ops, (unsigned long long) bytes); printCentered(4, 80, body); } void printSkipped(uint64_t bytes, uint64_t offset) { char body[80]; - sprintf(body, "Skipped %llu bytes (resuming at 0x%08llx)", bytes, offset); + sprintf(body, "Skipped %llu bytes (resuming at 0x%08llx)", + (unsigned long long) bytes, (unsigned long long) offset); printCentered(4, 80, body); } @@ -610,7 +612,8 @@ void process() { /* print summary on errors */ if (num_errors) { printf("\n"); - printf("Total unprocessable opcodes: %llu\n", num_errors); + printf("Total unprocessable opcodes: %llu\n", + (unsigned long long) num_errors); } } From eddb388ef90258be406bdf1355f7c65bdd71bbe8 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 2 Sep 2010 10:34:39 +0200 Subject: [PATCH 090/139] memory fragmentation ratio in INFO output --- src/config.h | 5 +++++ src/redis.c | 2 ++ src/zmalloc.c | 42 ++++++++++++++++++++++++++++++++++++++++++ src/zmalloc.h | 1 + 4 files changed, 50 insertions(+) diff --git a/src/config.h b/src/config.h index 6e98fbb2..acc95cf5 100644 --- a/src/config.h +++ b/src/config.h @@ -21,6 +21,11 @@ #define redis_stat stat #endif +/* test for proc filesystem */ +#ifdef __linux__ +#define HAVE_PROCFS 1 +#endif + /* test for backtrace() */ #if defined(__APPLE__) || defined(__linux__) #define HAVE_BACKTRACE 1 diff --git a/src/redis.c b/src/redis.c index 77e67c58..8206b5d3 100644 --- a/src/redis.c +++ b/src/redis.c @@ -1166,6 +1166,7 @@ sds genRedisInfoString(void) { "blocked_clients:%d\r\n" "used_memory:%zu\r\n" "used_memory_human:%s\r\n" + "mem_fragmentation_ratio:%.2f\r\n" "changes_since_last_save:%lld\r\n" "bgsave_in_progress:%d\r\n" "last_save_time:%ld\r\n" @@ -1192,6 +1193,7 @@ sds genRedisInfoString(void) { server.blpop_blocked_clients, zmalloc_used_memory(), hmem, + zmalloc_get_fragmentation_ratio(), server.dirty, server.bgsavechildpid != -1, server.lastsave, diff --git a/src/zmalloc.c b/src/zmalloc.c index 5c1b5e9a..81fc4c04 100644 --- a/src/zmalloc.c +++ b/src/zmalloc.c @@ -32,6 +32,10 @@ #include #include #include +#include +#include +#include +#include #include "config.h" #if defined(__sun) @@ -170,3 +174,41 @@ size_t zmalloc_used_memory(void) { void zmalloc_enable_thread_safeness(void) { zmalloc_thread_safe = 1; } + +/* Fragmentation = RSS / allocated-bytes */ +float zmalloc_get_fragmentation_ratio(void) { +#ifdef HAVE_PROCFS + size_t allocated = zmalloc_used_memory(); + int page = sysconf(_SC_PAGESIZE); + size_t rss; + char buf[4096]; + char filename[256]; + int fd, count; + char *p, *x; + + snprintf(filename,256,"/proc/%d/stat",getpid()); + if ((fd = open(filename,O_RDONLY)) == -1) return 0; + if (read(fd,buf,4096) <= 0) { + close(fd); + return 0; + } + close(fd); + + p = buf; + count = 23; /* RSS is the 24th field in /proc//stat */ + while(p && count--) { + p = strchr(p,' '); + if (p) p++; + } + if (!p) return 0; + x = strchr(p,' '); + if (!x) return 0; + *x = '\0'; + + rss = strtoll(p,NULL,10); + rss *= page; + return (float)rss/allocated; +#else + return 0; +#endif +} diff --git a/src/zmalloc.h b/src/zmalloc.h index db858bba..281aa3a8 100644 --- a/src/zmalloc.h +++ b/src/zmalloc.h @@ -38,5 +38,6 @@ void zfree(void *ptr); char *zstrdup(const char *s); size_t zmalloc_used_memory(void); void zmalloc_enable_thread_safeness(void); +float zmalloc_get_fragmentation_ratio(void); #endif /* _ZMALLOC_H */ From 73db2acc374c99ca8224e44a7383f69e7ca24a4f Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 2 Sep 2010 10:57:58 +0200 Subject: [PATCH 091/139] memory fragmentation reporting in INFO also added for Mac OS X --- src/config.h | 5 +++++ src/zmalloc.c | 41 +++++++++++++++++++++++++++++++++-------- 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/src/config.h b/src/config.h index acc95cf5..e2d84818 100644 --- a/src/config.h +++ b/src/config.h @@ -26,6 +26,11 @@ #define HAVE_PROCFS 1 #endif +/* test for task_info() */ +#if defined(__APPLE__) +#define HAVE_TASKINFO 1 +#endif + /* test for backtrace() */ #if defined(__APPLE__) || defined(__linux__) #define HAVE_BACKTRACE 1 diff --git a/src/zmalloc.c b/src/zmalloc.c index 81fc4c04..544155e7 100644 --- a/src/zmalloc.c +++ b/src/zmalloc.c @@ -32,10 +32,7 @@ #include #include #include -#include -#include -#include -#include + #include "config.h" #if defined(__sun) @@ -176,8 +173,14 @@ void zmalloc_enable_thread_safeness(void) { } /* Fragmentation = RSS / allocated-bytes */ + +#if defined(HAVE_PROCFS) +#include +#include +#include +#include + float zmalloc_get_fragmentation_ratio(void) { -#ifdef HAVE_PROCFS size_t allocated = zmalloc_used_memory(); int page = sysconf(_SC_PAGESIZE); size_t rss; @@ -208,7 +211,29 @@ float zmalloc_get_fragmentation_ratio(void) { rss = strtoll(p,NULL,10); rss *= page; return (float)rss/allocated; -#else - return 0; -#endif } +#elif defined(HAVE_TASKINFO) +#include +#include +#include +#include +#include +#include +#include + +float zmalloc_get_fragmentation_ratio(void) { + task_t task = MACH_PORT_NULL; + struct task_basic_info t_info; + mach_msg_type_number_t t_info_count = TASK_BASIC_INFO_COUNT; + + if (task_for_pid(current_task(), getpid(), &task) != KERN_SUCCESS) + return 0; + task_info(task, TASK_BASIC_INFO, (task_info_t)&t_info, &t_info_count); + + return (float)t_info.resident_size/zmalloc_used_memory(); +} +#else +float zmalloc_get_fragmentation_ratio(void) { + return 0; +} +#endif From 0537e7bf8042cf9954d3b0abab567edf3b5c0516 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 2 Sep 2010 12:38:34 +0200 Subject: [PATCH 092/139] Use specialized function to add multi bulk reply length --- src/multi.c | 2 +- src/networking.c | 4 ++++ src/redis.h | 1 + src/sort.c | 2 +- src/t_hash.c | 2 +- src/t_list.c | 6 +++--- src/t_set.c | 2 +- src/t_string.c | 2 +- src/t_zset.c | 3 +-- 9 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/multi.c b/src/multi.c index def1dd67..c85516df 100644 --- a/src/multi.c +++ b/src/multi.c @@ -107,7 +107,7 @@ void execCommand(redisClient *c) { unwatchAllKeys(c); /* Unwatch ASAP otherwise we'll waste CPU cycles */ orig_argv = c->argv; orig_argc = c->argc; - addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",c->mstate.count)); + addReplyMultiBulkLen(c,c->mstate.count); for (j = 0; j < c->mstate.count; j++) { c->argc = c->mstate.commands[j].argc; c->argv = c->mstate.commands[j].argv; diff --git a/src/networking.c b/src/networking.c index 971cbfc1..d2a4e231 100644 --- a/src/networking.c +++ b/src/networking.c @@ -200,6 +200,10 @@ void addReplyUlong(redisClient *c, unsigned long ul) { _addReplyLongLong(c,(long long)ul,':'); } +void addReplyMultiBulkLen(redisClient *c, long length) { + _addReplyLongLong(c,length,'*'); +} + void addReplyBulkLen(redisClient *c, robj *obj) { size_t len; diff --git a/src/redis.h b/src/redis.h index 752d56c3..6ee1d2e3 100644 --- a/src/redis.h +++ b/src/redis.h @@ -617,6 +617,7 @@ void addReplySds(redisClient *c, sds s); void addReplyDouble(redisClient *c, double d); void addReplyLongLong(redisClient *c, long long ll); void addReplyUlong(redisClient *c, unsigned long ul); +void addReplyMultiBulkLen(redisClient *c, long length); void *dupClientReplyValue(void *o); /* List data type */ diff --git a/src/sort.c b/src/sort.c index aa1ce929..f53ad486 100644 --- a/src/sort.c +++ b/src/sort.c @@ -307,7 +307,7 @@ void sortCommand(redisClient *c) { outputlen = getop ? getop*(end-start+1) : end-start+1; if (storekey == NULL) { /* STORE option not specified, sent the sorting result to client */ - addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",outputlen)); + addReplyMultiBulkLen(c,outputlen); for (j = start; j <= end; j++) { listNode *ln; listIter li; diff --git a/src/t_hash.c b/src/t_hash.c index c8be72f2..ad5d3e1e 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -315,7 +315,7 @@ void hmgetCommand(redisClient *c) { /* Note the check for o != NULL happens inside the loop. This is * done because objects that cannot be found are considered to be * an empty hash. The reply should then be a series of NULLs. */ - addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",c->argc-2)); + addReplyMultiBulkLen(c,c->argc-2); for (i = 2; i < c->argc; i++) { if (o != NULL && (value = hashTypeGet(o,c->argv[i])) != NULL) { addReplyBulk(c,value); diff --git a/src/t_list.c b/src/t_list.c index 2a981033..db9ca18e 100644 --- a/src/t_list.c +++ b/src/t_list.c @@ -494,7 +494,7 @@ void lrangeCommand(redisClient *c) { rangelen = (end-start)+1; /* Return the result in form of a multi-bulk reply */ - addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",rangelen)); + addReplyMultiBulkLen(c,rangelen); listTypeIterator *li = listTypeInitIterator(o,start,REDIS_TAIL); for (j = 0; j < rangelen; j++) { redisAssert(listTypeNext(li,&entry)); @@ -772,7 +772,7 @@ int handleClientsWaitingListPush(redisClient *c, robj *key, robj *ele) { redisAssert(ln != NULL); receiver = ln->value; - addReplySds(receiver,sdsnew("*2\r\n")); + addReplyMultiBulkLen(receiver,2); addReplyBulk(receiver,key); addReplyBulk(receiver,ele); unblockClientWaitingData(receiver); @@ -811,7 +811,7 @@ void blockingPopGenericCommand(redisClient *c, int where) { * "real" command will add the last element (the value) * for us. If this souds like an hack to you it's just * because it is... */ - addReplySds(c,sdsnew("*2\r\n")); + addReplyMultiBulkLen(c,2); addReplyBulk(c,argv[1]); popGenericCommand(c,where); diff --git a/src/t_set.c b/src/t_set.c index d6041e72..17cac934 100644 --- a/src/t_set.c +++ b/src/t_set.c @@ -469,7 +469,7 @@ void sunionDiffGenericCommand(redisClient *c, robj **setkeys, int setnum, robj * /* Output the content of the resulting set, if not in STORE mode */ if (!dstkey) { - addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",cardinality)); + addReplyMultiBulkLen(c,cardinality); si = setTypeInitIterator(dstset); while((ele = setTypeNext(si)) != NULL) { addReplyBulk(c,ele); diff --git a/src/t_string.c b/src/t_string.c index 3b8a39bb..411687a5 100644 --- a/src/t_string.c +++ b/src/t_string.c @@ -79,7 +79,7 @@ void getsetCommand(redisClient *c) { void mgetCommand(redisClient *c) { int j; - addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",c->argc-1)); + addReplyMultiBulkLen(c,c->argc-1); for (j = 1; j < c->argc; j++) { robj *o = lookupKeyRead(c->db,c->argv[j]); if (o == NULL) { diff --git a/src/t_zset.c b/src/t_zset.c index d25b1a66..7de63158 100644 --- a/src/t_zset.c +++ b/src/t_zset.c @@ -782,8 +782,7 @@ void zrangeGenericCommand(redisClient *c, int reverse) { } /* Return the result in form of a multi-bulk reply */ - addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n", - withscores ? (rangelen*2) : rangelen)); + addReplyMultiBulkLen(c,withscores ? (rangelen*2) : rangelen); for (j = 0; j < rangelen; j++) { ele = ln->obj; addReplyBulk(c,ele); From 2403fc9fdec6113f10aa54770714e550eaab1b69 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 2 Sep 2010 14:17:53 +0200 Subject: [PATCH 093/139] Intialize bufpos in the fake client --- src/aof.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/aof.c b/src/aof.c index 1ed2363a..58dd5538 100644 --- a/src/aof.c +++ b/src/aof.c @@ -189,6 +189,7 @@ struct redisClient *createFakeClient(void) { c->querybuf = sdsempty(); c->argc = 0; c->argv = NULL; + c->bufpos = 0; c->flags = 0; /* We set the fake client as a slave waiting for the synchronization * so that Redis will not try to send replies to this client. */ From cd76bb651ddc9168451e6729fdf7793eb628f57c Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 2 Sep 2010 14:19:15 +0200 Subject: [PATCH 094/139] Free the sds in addReplySds when it cannot be added to the reply --- src/networking.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/networking.c b/src/networking.c index d2a4e231..89613ced 100644 --- a/src/networking.c +++ b/src/networking.c @@ -128,7 +128,11 @@ void addReply(redisClient *c, robj *obj) { } void addReplySds(redisClient *c, sds s) { - if (_ensureFileEvent(c) != REDIS_OK) return; + if (_ensureFileEvent(c) != REDIS_OK) { + /* The caller expects the sds to be free'd. */ + sdsfree(s); + return; + } if (sdslen(s) < REDIS_REPLY_CHUNK_THRESHOLD) { _addReplyStringToBuffer(c,s,sdslen(s)); sdsfree(s); From b70d355521fd02737c4de2a1583025699f1554f8 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 2 Sep 2010 14:30:56 +0200 Subject: [PATCH 095/139] Use existing reply functions where possible --- src/db.c | 6 ++---- src/sort.c | 2 +- src/t_hash.c | 2 +- src/t_list.c | 6 +++--- src/t_set.c | 2 +- src/t_string.c | 2 +- src/t_zset.c | 2 +- 7 files changed, 10 insertions(+), 12 deletions(-) diff --git a/src/db.c b/src/db.c index 8c6c6bc8..4d119cf2 100644 --- a/src/db.c +++ b/src/db.c @@ -245,13 +245,11 @@ void keysCommand(redisClient *c) { } void dbsizeCommand(redisClient *c) { - addReplySds(c, - sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c->db->dict))); + addReplyLongLong(c,dictSize(c->db->dict)); } void lastsaveCommand(redisClient *c) { - addReplySds(c, - sdscatprintf(sdsempty(),":%lu\r\n",server.lastsave)); + addReplyLongLong(c,server.lastsave); } void typeCommand(redisClient *c) { diff --git a/src/sort.c b/src/sort.c index f53ad486..79f79010 100644 --- a/src/sort.c +++ b/src/sort.c @@ -369,7 +369,7 @@ void sortCommand(redisClient *c) { * replaced. */ server.dirty += 1+outputlen; touchWatchedKey(c->db,storekey); - addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",outputlen)); + addReplyLongLong(c,outputlen); } /* Cleanup */ diff --git a/src/t_hash.c b/src/t_hash.c index ad5d3e1e..5745f88c 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -346,7 +346,7 @@ void hlenCommand(redisClient *c) { if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL || checkType(c,o,REDIS_HASH)) return; - addReplyUlong(c,hashTypeLength(o)); + addReplyLongLong(c,hashTypeLength(o)); } void genericHgetallCommand(redisClient *c, int flags) { diff --git a/src/t_list.c b/src/t_list.c index db9ca18e..4d948294 100644 --- a/src/t_list.c +++ b/src/t_list.c @@ -342,7 +342,7 @@ void pushxGenericCommand(redisClient *c, robj *refval, robj *val, int where) { server.dirty++; } - addReplyUlong(c,listTypeLength(subject)); + addReplyLongLong(c,listTypeLength(subject)); } void lpushxCommand(redisClient *c) { @@ -366,7 +366,7 @@ void linsertCommand(redisClient *c) { void llenCommand(redisClient *c) { robj *o = lookupKeyReadOrReply(c,c->argv[1],shared.czero); if (o == NULL || checkType(c,o,REDIS_LIST)) return; - addReplyUlong(c,listTypeLength(o)); + addReplyLongLong(c,listTypeLength(o)); } void lindexCommand(redisClient *c) { @@ -594,7 +594,7 @@ void lremCommand(redisClient *c) { decrRefCount(obj); if (listTypeLength(subject) == 0) dbDelete(c->db,c->argv[1]); - addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",removed)); + addReplyLongLong(c,removed); if (removed) touchWatchedKey(c->db,c->argv[1]); } diff --git a/src/t_set.c b/src/t_set.c index 17cac934..e2ac5ae5 100644 --- a/src/t_set.c +++ b/src/t_set.c @@ -276,7 +276,7 @@ void scardCommand(redisClient *c) { if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL || checkType(c,o,REDIS_SET)) return; - addReplyUlong(c,setTypeSize(o)); + addReplyLongLong(c,setTypeSize(o)); } void spopCommand(redisClient *c) { diff --git a/src/t_string.c b/src/t_string.c index 411687a5..276f4dab 100644 --- a/src/t_string.c +++ b/src/t_string.c @@ -211,7 +211,7 @@ void appendCommand(redisClient *c) { } touchWatchedKey(c->db,c->argv[1]); server.dirty++; - addReplySds(c,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen)); + addReplyLongLong(c,totlen); } void substrCommand(redisClient *c) { diff --git a/src/t_zset.c b/src/t_zset.c index 7de63158..6a332c6a 100644 --- a/src/t_zset.c +++ b/src/t_zset.c @@ -930,7 +930,7 @@ void zcardCommand(redisClient *c) { checkType(c,o,REDIS_ZSET)) return; zs = o->ptr; - addReplyUlong(c,zs->zsl->length); + addReplyLongLong(c,zs->zsl->length); } void zscoreCommand(redisClient *c) { From 4a7893ca9ce334f2a144faa96ef02113bef4b2b2 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 2 Sep 2010 14:31:25 +0200 Subject: [PATCH 096/139] Removed unneeded function --- src/networking.c | 4 ---- src/redis.h | 1 - 2 files changed, 5 deletions(-) diff --git a/src/networking.c b/src/networking.c index 89613ced..f37ecac1 100644 --- a/src/networking.c +++ b/src/networking.c @@ -200,10 +200,6 @@ void addReplyLongLong(redisClient *c, long long ll) { _addReplyLongLong(c,ll,':'); } -void addReplyUlong(redisClient *c, unsigned long ul) { - _addReplyLongLong(c,(long long)ul,':'); -} - void addReplyMultiBulkLen(redisClient *c, long length) { _addReplyLongLong(c,length,'*'); } diff --git a/src/redis.h b/src/redis.h index 6ee1d2e3..ea05fcd0 100644 --- a/src/redis.h +++ b/src/redis.h @@ -616,7 +616,6 @@ void addReply(redisClient *c, robj *obj); void addReplySds(redisClient *c, sds s); void addReplyDouble(redisClient *c, double d); void addReplyLongLong(redisClient *c, long long ll); -void addReplyUlong(redisClient *c, unsigned long ul); void addReplyMultiBulkLen(redisClient *c, long length); void *dupClientReplyValue(void *o); From 36c19d03e08b94ea1bc246918cbd71ea810d38aa Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 2 Sep 2010 19:18:55 +0200 Subject: [PATCH 097/139] Changed reply buildup internals --- src/networking.c | 151 ++++++++++++++++++++++++++++++----------------- src/object.c | 1 - src/redis.h | 11 +--- 3 files changed, 99 insertions(+), 64 deletions(-) diff --git a/src/networking.c b/src/networking.c index f37ecac1..dd005335 100644 --- a/src/networking.c +++ b/src/networking.c @@ -13,13 +13,9 @@ int listMatchObjects(void *a, void *b) { redisClient *createClient(int fd) { redisClient *c; - /* Make sure to allocate a multiple of the page size to prevent wasting - * memory. A page size of 4096 is assumed here. We need to compensate - * for the zmalloc overhead of sizeof(size_t) bytes. */ - size_t size = 8192-sizeof(size_t); - redisAssert(size > sizeof(redisClient)); - c = zmalloc(size); - c->buflen = size-sizeof(redisClient); + /* Allocate more space to hold a static write buffer. */ + c = zmalloc(sizeof(redisClient)+REDIS_REPLY_CHUNK_BYTES); + c->buflen = REDIS_REPLY_CHUNK_BYTES; c->bufpos = 0; anetNonBlock(NULL,fd); @@ -71,40 +67,95 @@ int _ensureFileEvent(redisClient *c) { return REDIS_OK; } -void _addReplyObjectToList(redisClient *c, robj *obj) { - redisAssert(obj->type == REDIS_STRING && - obj->encoding == REDIS_ENCODING_RAW); - listAddNodeTail(c->reply,obj); +/* Create a duplicate of the last object in the reply list when + * it is not exclusively owned by the reply list. */ +robj *dupLastObjectIfNeeded(list *reply) { + robj *new, *cur; + listNode *ln; + redisAssert(listLength(reply) > 0); + ln = listLast(reply); + cur = listNodeValue(ln); + if (cur->refcount > 1) { + new = dupStringObject(cur); + decrRefCount(cur); + listNodeValue(ln) = new; + } + return listNodeValue(ln); } -void _ensureBufferInReplyList(redisClient *c) { - sds buffer = sdsnewlen(NULL,REDIS_REPLY_CHUNK_SIZE); - sdsupdatelen(buffer); /* sdsnewlen expects non-empty string */ - listAddNodeTail(c->reply,createObject(REDIS_REPLY_NODE,buffer)); +int _addReplyToBuffer(redisClient *c, char *s, size_t len) { + size_t available = c->buflen-c->bufpos; + + /* If there already are entries in the reply list, we cannot + * add anything more to the static buffer. */ + if (listLength(c->reply) > 0) return REDIS_ERR; + + /* Check that the buffer has enough space available for this string. */ + if (len > available) return REDIS_ERR; + + memcpy(c->buf+c->bufpos,s,len); + c->bufpos+=len; + return REDIS_OK; } -void _addReplyStringToBuffer(redisClient *c, char *s, size_t len) { - size_t available = 0; - redisAssert(len < REDIS_REPLY_CHUNK_THRESHOLD); - if (listLength(c->reply) > 0) { - robj *o = listNodeValue(listLast(c->reply)); - - /* Make sure to append to a reply node with enough bytes available. */ - if (o->type == REDIS_REPLY_NODE) available = sdsavail(o->ptr); - if (o->type != REDIS_REPLY_NODE || len > available) { - _ensureBufferInReplyList(c); - _addReplyStringToBuffer(c,s,len); - } else { - o->ptr = sdscatlen(o->ptr,s,len); - } +void _addReplyObjectToList(redisClient *c, robj *o) { + robj *tail; + if (listLength(c->reply) == 0) { + incrRefCount(o); + listAddNodeTail(c->reply,o); } else { - available = c->buflen-c->bufpos; - if (len > available) { - _ensureBufferInReplyList(c); - _addReplyStringToBuffer(c,s,len); + tail = listNodeValue(listLast(c->reply)); + + /* Append to this object when possible. */ + if (tail->ptr != NULL && + sdslen(tail->ptr)+sdslen(o->ptr) <= REDIS_REPLY_CHUNK_BYTES) + { + tail = dupLastObjectIfNeeded(c->reply); + tail->ptr = sdscatlen(tail->ptr,o->ptr,sdslen(o->ptr)); } else { - memcpy(c->buf+c->bufpos,s,len); - c->bufpos += len; + incrRefCount(o); + listAddNodeTail(c->reply,o); + } + } +} + +/* This method takes responsibility over the sds. When it is no longer + * needed it will be free'd, otherwise it ends up in a robj. */ +void _addReplySdsToList(redisClient *c, sds s) { + robj *tail; + if (listLength(c->reply) == 0) { + listAddNodeTail(c->reply,createObject(REDIS_STRING,s)); + } else { + tail = listNodeValue(listLast(c->reply)); + + /* Append to this object when possible. */ + if (tail->ptr != NULL && + sdslen(tail->ptr)+sdslen(s) <= REDIS_REPLY_CHUNK_BYTES) + { + tail = dupLastObjectIfNeeded(c->reply); + tail->ptr = sdscatlen(tail->ptr,s,sdslen(s)); + sdsfree(s); + } else { + listAddNodeTail(c->reply,createObject(REDIS_STRING,s)); + } + } +} + +void _addReplyStringToList(redisClient *c, char *s, size_t len) { + robj *tail; + if (listLength(c->reply) == 0) { + listAddNodeTail(c->reply,createStringObject(s,len)); + } else { + tail = listNodeValue(listLast(c->reply)); + + /* Append to this object when possible. */ + if (tail->ptr != NULL && + sdslen(tail->ptr)+len <= REDIS_REPLY_CHUNK_BYTES) + { + tail = dupLastObjectIfNeeded(c->reply); + tail->ptr = sdscatlen(tail->ptr,s,len); + } else { + listAddNodeTail(c->reply,createStringObject(s,len)); } } } @@ -118,13 +169,9 @@ void addReply(redisClient *c, robj *obj) { /* This increments the refcount. */ obj = getDecodedObject(obj); } - - if (sdslen(obj->ptr) < REDIS_REPLY_CHUNK_THRESHOLD) { - _addReplyStringToBuffer(c,obj->ptr,sdslen(obj->ptr)); - decrRefCount(obj); - } else { + if (_addReplyToBuffer(c,obj->ptr,sdslen(obj->ptr)) != REDIS_OK) _addReplyObjectToList(c,obj); - } + decrRefCount(obj); } void addReplySds(redisClient *c, sds s) { @@ -133,28 +180,25 @@ void addReplySds(redisClient *c, sds s) { sdsfree(s); return; } - if (sdslen(s) < REDIS_REPLY_CHUNK_THRESHOLD) { - _addReplyStringToBuffer(c,s,sdslen(s)); + if (_addReplyToBuffer(c,s,sdslen(s)) == REDIS_OK) { sdsfree(s); } else { - _addReplyObjectToList(c,createObject(REDIS_STRING,s)); + /* This method free's the sds when it is no longer needed. */ + _addReplySdsToList(c,s); } } void addReplyString(redisClient *c, char *s, size_t len) { if (_ensureFileEvent(c) != REDIS_OK) return; - if (len < REDIS_REPLY_CHUNK_THRESHOLD) { - _addReplyStringToBuffer(c,s,len); - } else { - _addReplyObjectToList(c,createStringObject(s,len)); - } + if (_addReplyToBuffer(c,s,len) != REDIS_OK) + _addReplyStringToList(c,s,len); } /* Adds an empty object to the reply list that will contain the multi bulk * length, which is not known when this function is called. */ void *addDeferredMultiBulkLength(redisClient *c) { if (_ensureFileEvent(c) != REDIS_OK) return NULL; - _addReplyObjectToList(c,createObject(REDIS_STRING,NULL)); + listAddNodeTail(c->reply,createObject(REDIS_STRING,NULL)); return listLast(c->reply); } @@ -170,9 +214,10 @@ void setDeferredMultiBulkLength(redisClient *c, void *node, long length) { len->ptr = sdscatprintf(sdsempty(),"*%ld\r\n",length); if (ln->next != NULL) { next = listNodeValue(ln->next); - /* Only glue when the next node is a reply chunk. */ - if (next->type == REDIS_REPLY_NODE) { - len->ptr = sdscatlen(len->ptr,next->ptr,sdslen(next->ptr)); + + /* Only glue when the next node is an sds */ + if (next->ptr != NULL) { + len->ptr = sdscat(len->ptr,next->ptr); listDelNode(c->reply,ln->next); } } diff --git a/src/object.c b/src/object.c index 5e8dbfa2..92af1d6a 100644 --- a/src/object.c +++ b/src/object.c @@ -196,7 +196,6 @@ void decrRefCount(void *obj) { case REDIS_SET: freeSetObject(o); break; case REDIS_ZSET: freeZsetObject(o); break; case REDIS_HASH: freeHashObject(o); break; - case REDIS_REPLY_NODE: freeStringObject(o); break; default: redisPanic("Unknown object type"); break; } o->ptr = NULL; /* defensive programming. We'll see NULL in traces. */ diff --git a/src/redis.h b/src/redis.h index ea05fcd0..328df08d 100644 --- a/src/redis.h +++ b/src/redis.h @@ -47,15 +47,7 @@ #define REDIS_MAX_WRITE_PER_EVENT (1024*64) #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */ #define REDIS_SHARED_INTEGERS 10000 - -/* Size of a reply chunk, configured to exactly allocate 4k bytes */ -#define REDIS_REPLY_CHUNK_BYTES (4*1024) -#define REDIS_REPLY_CHUNK_SIZE (REDIS_REPLY_CHUNK_BYTES-sizeof(struct sdshdr)-1-sizeof(size_t)) -/* It doesn't make sense to memcpy objects to a chunk when the net result is - * not being able to glue other objects. We want to make sure it can be glued - * to at least a bulk length or \r\n, so set the threshold to be a couple - * of bytes less than the size of the buffer. */ -#define REDIS_REPLY_CHUNK_THRESHOLD (REDIS_REPLY_CHUNK_SIZE-16) +#define REDIS_REPLY_CHUNK_BYTES (5*1500) /* 5 TCP packets with default MTU */ /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */ #define REDIS_WRITEV_THRESHOLD 3 @@ -81,7 +73,6 @@ #define REDIS_SET 2 #define REDIS_ZSET 3 #define REDIS_HASH 4 -#define REDIS_REPLY_NODE 5 #define REDIS_VMPOINTER 8 /* Objects encoding. Some kind of objects like Strings and Hashes can be From 60361e5aac5b06ab06f4a63439ce84cd58c87f3d Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 2 Sep 2010 19:35:07 +0200 Subject: [PATCH 098/139] Add sds function that can be called with va_list --- src/sds.c | 19 +++++++++++++------ src/sds.h | 2 ++ 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/src/sds.c b/src/sds.c index a0ebb059..2f3ffedc 100644 --- a/src/sds.c +++ b/src/sds.c @@ -33,7 +33,6 @@ #include "sds.h" #include #include -#include #include #include #include "zmalloc.h" @@ -156,8 +155,8 @@ sds sdscpy(sds s, char *t) { return sdscpylen(s, t, strlen(t)); } -sds sdscatprintf(sds s, const char *fmt, ...) { - va_list ap; +sds sdscatvprintf(sds s, const char *fmt, va_list ap) { + va_list cpy; char *buf, *t; size_t buflen = 16; @@ -169,9 +168,8 @@ sds sdscatprintf(sds s, const char *fmt, ...) { if (buf == NULL) return NULL; #endif buf[buflen-2] = '\0'; - va_start(ap, fmt); - vsnprintf(buf, buflen, fmt, ap); - va_end(ap); + va_copy(cpy,ap); + vsnprintf(buf, buflen, fmt, cpy); if (buf[buflen-2] != '\0') { zfree(buf); buflen *= 2; @@ -184,6 +182,15 @@ sds sdscatprintf(sds s, const char *fmt, ...) { return t; } +sds sdscatprintf(sds s, const char *fmt, ...) { + va_list ap; + char *t; + va_start(ap, fmt); + t = sdscatvprintf(s,fmt,ap); + va_end(ap); + return t; +} + sds sdstrim(sds s, const char *cset) { struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr))); char *start, *end, *sp, *ep; diff --git a/src/sds.h b/src/sds.h index a0e224f5..ae0f84fb 100644 --- a/src/sds.h +++ b/src/sds.h @@ -32,6 +32,7 @@ #define __SDS_H #include +#include typedef char *sds; @@ -53,6 +54,7 @@ sds sdscat(sds s, char *t); sds sdscpylen(sds s, char *t, size_t len); sds sdscpy(sds s, char *t); +sds sdscatvprintf(sds s, const char *fmt, va_list ap); #ifdef __GNUC__ sds sdscatprintf(sds s, const char *fmt, ...) __attribute__((format(printf, 2, 3))); From 3ab203762f28ffec4036dc4f5a188d637cf78ff1 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 2 Sep 2010 19:52:24 +0200 Subject: [PATCH 099/139] Use specialized function to add status and error replies --- src/aof.c | 5 ++--- src/config.c | 23 ++++++++++------------- src/db.c | 28 +++++++++++++--------------- src/debug.c | 33 ++++++++++++++++----------------- src/multi.c | 8 ++++---- src/networking.c | 38 ++++++++++++++++++++++++++++++++++++++ src/object.c | 12 ++++++------ src/redis.c | 25 +++++++++++-------------- src/redis.h | 12 ++++++++++++ src/replication.c | 6 +++--- src/t_hash.c | 2 +- src/t_string.c | 4 ++-- src/t_zset.c | 9 ++++----- 13 files changed, 122 insertions(+), 83 deletions(-) diff --git a/src/aof.c b/src/aof.c index 58dd5538..b639eb52 100644 --- a/src/aof.c +++ b/src/aof.c @@ -632,12 +632,11 @@ int rewriteAppendOnlyFileBackground(void) { void bgrewriteaofCommand(redisClient *c) { if (server.bgrewritechildpid != -1) { - addReplySds(c,sdsnew("-ERR background append only file rewriting already in progress\r\n")); + addReplyError(c,"Background append only file rewriting already in progress"); return; } if (rewriteAppendOnlyFileBackground() == REDIS_OK) { - char *status = "+Background append only file rewriting started\r\n"; - addReplySds(c,sdsnew(status)); + addReplyStatus(c,"Background append only file rewriting started"); } else { addReply(c,shared.err); } diff --git a/src/config.c b/src/config.c index 5c449886..8a5ad6c2 100644 --- a/src/config.c +++ b/src/config.c @@ -270,8 +270,8 @@ void configSetCommand(redisClient *c) { stopAppendOnly(); } else { if (startAppendOnly() == REDIS_ERR) { - addReplySds(c,sdscatprintf(sdsempty(), - "-ERR Unable to turn on AOF. Check server logs.\r\n")); + addReplyError(c, + "Unable to turn on AOF. Check server logs."); decrRefCount(o); return; } @@ -312,9 +312,8 @@ void configSetCommand(redisClient *c) { } sdsfreesplitres(v,vlen); } else { - addReplySds(c,sdscatprintf(sdsempty(), - "-ERR not supported CONFIG parameter %s\r\n", - (char*)c->argv[2]->ptr)); + addReplyErrorFormat(c,"Unsupported CONFIG parameter: %s", + (char*)c->argv[2]->ptr); decrRefCount(o); return; } @@ -323,10 +322,9 @@ void configSetCommand(redisClient *c) { return; badfmt: /* Bad format errors */ - addReplySds(c,sdscatprintf(sdsempty(), - "-ERR invalid argument '%s' for CONFIG SET '%s'\r\n", + addReplyErrorFormat(c,"Invalid argument '%s' for CONFIG SET '%s'", (char*)o->ptr, - (char*)c->argv[2]->ptr)); + (char*)c->argv[2]->ptr); decrRefCount(o); } @@ -425,13 +423,12 @@ void configCommand(redisClient *c) { server.stat_starttime = time(NULL); addReply(c,shared.ok); } else { - addReplySds(c,sdscatprintf(sdsempty(), - "-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n")); + addReplyError(c, + "CONFIG subcommand must be one of GET, SET, RESETSTAT"); } return; badarity: - addReplySds(c,sdscatprintf(sdsempty(), - "-ERR Wrong number of arguments for CONFIG %s\r\n", - (char*) c->argv[1]->ptr)); + addReplyErrorFormat(c,"Wrong number of arguments for CONFIG %s", + (char*) c->argv[1]->ptr); } diff --git a/src/db.c b/src/db.c index 4d119cf2..4f1572a6 100644 --- a/src/db.c +++ b/src/db.c @@ -199,7 +199,7 @@ void selectCommand(redisClient *c) { int id = atoi(c->argv[1]->ptr); if (selectDb(c,id) == REDIS_ERR) { - addReplySds(c,sdsnew("-ERR invalid DB index\r\n")); + addReplyError(c,"invalid DB index"); } else { addReply(c,shared.ok); } @@ -258,24 +258,23 @@ void typeCommand(redisClient *c) { o = lookupKeyRead(c->db,c->argv[1]); if (o == NULL) { - type = "+none"; + type = "none"; } else { switch(o->type) { - case REDIS_STRING: type = "+string"; break; - case REDIS_LIST: type = "+list"; break; - case REDIS_SET: type = "+set"; break; - case REDIS_ZSET: type = "+zset"; break; - case REDIS_HASH: type = "+hash"; break; - default: type = "+unknown"; break; + case REDIS_STRING: type = "string"; break; + case REDIS_LIST: type = "list"; break; + case REDIS_SET: type = "set"; break; + case REDIS_ZSET: type = "zset"; break; + case REDIS_HASH: type = "hash"; break; + default: type = "unknown"; break; } } - addReplySds(c,sdsnew(type)); - addReply(c,shared.crlf); + addReplyStatus(c,type); } void saveCommand(redisClient *c) { if (server.bgsavechildpid != -1) { - addReplySds(c,sdsnew("-ERR background save in progress\r\n")); + addReplyError(c,"Background save already in progress"); return; } if (rdbSave(server.dbfilename) == REDIS_OK) { @@ -287,12 +286,11 @@ void saveCommand(redisClient *c) { void bgsaveCommand(redisClient *c) { if (server.bgsavechildpid != -1) { - addReplySds(c,sdsnew("-ERR background save already in progress\r\n")); + addReplyError(c,"Background save already in progress"); return; } if (rdbSaveBackground(server.dbfilename) == REDIS_OK) { - char *status = "+Background saving started\r\n"; - addReplySds(c,sdsnew(status)); + addReplyStatus(c,"Background saving started"); } else { addReply(c,shared.err); } @@ -301,7 +299,7 @@ void bgsaveCommand(redisClient *c) { void shutdownCommand(redisClient *c) { if (prepareForShutdown() == REDIS_OK) exit(0); - addReplySds(c, sdsnew("-ERR Errors trying to SHUTDOWN. Check logs.\r\n")); + addReplyError(c,"Errors trying to SHUTDOWN. Check logs."); } void renameGenericCommand(redisClient *c, int nx) { diff --git a/src/debug.c b/src/debug.c index 76d18b21..2f7ab58f 100644 --- a/src/debug.c +++ b/src/debug.c @@ -211,18 +211,18 @@ void debugCommand(redisClient *c) { char *strenc; strenc = strEncoding(val->encoding); - addReplySds(c,sdscatprintf(sdsempty(), - "+Value at:%p refcount:%d " - "encoding:%s serializedlength:%lld\r\n", + addReplyStatusFormat(c, + "Value at:%p refcount:%d " + "encoding:%s serializedlength:%lld", (void*)val, val->refcount, - strenc, (long long) rdbSavedObjectLen(val,NULL))); + strenc, (long long) rdbSavedObjectLen(val,NULL)); } else { vmpointer *vp = (vmpointer*) val; - addReplySds(c,sdscatprintf(sdsempty(), - "+Value swapped at: page %llu " - "using %llu pages\r\n", + addReplyStatusFormat(c, + "Value swapped at: page %llu " + "using %llu pages", (unsigned long long) vp->page, - (unsigned long long) vp->usedpages)); + (unsigned long long) vp->usedpages); } } else if (!strcasecmp(c->argv[1]->ptr,"swapin") && c->argc == 3) { lookupKeyRead(c->db,c->argv[2]); @@ -233,7 +233,7 @@ void debugCommand(redisClient *c) { vmpointer *vp; if (!server.vm_enabled) { - addReplySds(c,sdsnew("-ERR Virtual Memory is disabled\r\n")); + addReplyError(c,"Virtual Memory is disabled"); return; } if (!de) { @@ -243,9 +243,9 @@ void debugCommand(redisClient *c) { val = dictGetEntryVal(de); /* Swap it */ if (val->storage != REDIS_VM_MEMORY) { - addReplySds(c,sdsnew("-ERR This key is not in memory\r\n")); + addReplyError(c,"This key is not in memory"); } else if (val->refcount != 1) { - addReplySds(c,sdsnew("-ERR Object is shared\r\n")); + addReplyError(c,"Object is shared"); } else if ((vp = vmSwapObjectBlocking(val)) != NULL) { dictGetEntryVal(de) = vp; addReply(c,shared.ok); @@ -274,18 +274,17 @@ void debugCommand(redisClient *c) { addReply(c,shared.ok); } else if (!strcasecmp(c->argv[1]->ptr,"digest") && c->argc == 2) { unsigned char digest[20]; - sds d = sdsnew("+"); + sds d = sdsempty(); int j; computeDatasetDigest(digest); for (j = 0; j < 20; j++) d = sdscatprintf(d, "%02x",digest[j]); - - d = sdscatlen(d,"\r\n",2); - addReplySds(c,d); + addReplyStatus(c,d); + sdsfree(d); } else { - addReplySds(c,sdsnew( - "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT |SWAPIN |SWAPOUT |RELOAD]\r\n")); + addReplyError(c, + "Syntax error, try DEBUG [SEGFAULT|OBJECT |SWAPIN |SWAPOUT |RELOAD]"); } } diff --git a/src/multi.c b/src/multi.c index c85516df..47615eb0 100644 --- a/src/multi.c +++ b/src/multi.c @@ -42,7 +42,7 @@ void queueMultiCommand(redisClient *c, struct redisCommand *cmd) { void multiCommand(redisClient *c) { if (c->flags & REDIS_MULTI) { - addReplySds(c,sdsnew("-ERR MULTI calls can not be nested\r\n")); + addReplyError(c,"MULTI calls can not be nested"); return; } c->flags |= REDIS_MULTI; @@ -51,7 +51,7 @@ void multiCommand(redisClient *c) { void discardCommand(redisClient *c) { if (!(c->flags & REDIS_MULTI)) { - addReplySds(c,sdsnew("-ERR DISCARD without MULTI\r\n")); + addReplyError(c,"DISCARD without MULTI"); return; } @@ -82,7 +82,7 @@ void execCommand(redisClient *c) { int orig_argc; if (!(c->flags & REDIS_MULTI)) { - addReplySds(c,sdsnew("-ERR EXEC without MULTI\r\n")); + addReplyError(c,"EXEC without MULTI"); return; } @@ -251,7 +251,7 @@ void watchCommand(redisClient *c) { int j; if (c->flags & REDIS_MULTI) { - addReplySds(c,sdsnew("-ERR WATCH inside MULTI is not allowed\r\n")); + addReplyError(c,"WATCH inside MULTI is not allowed"); return; } for (j = 1; j < c->argc; j++) diff --git a/src/networking.c b/src/networking.c index dd005335..d62456a3 100644 --- a/src/networking.c +++ b/src/networking.c @@ -194,6 +194,44 @@ void addReplyString(redisClient *c, char *s, size_t len) { _addReplyStringToList(c,s,len); } +void _addReplyError(redisClient *c, char *s, size_t len) { + addReplyString(c,"-ERR ",5); + addReplyString(c,s,len); + addReplyString(c,"\r\n",2); +} + +void addReplyError(redisClient *c, char *err) { + _addReplyError(c,err,strlen(err)); +} + +void addReplyErrorFormat(redisClient *c, const char *fmt, ...) { + va_list ap; + va_start(ap,fmt); + sds s = sdscatvprintf(sdsempty(),fmt,ap); + va_end(ap); + _addReplyError(c,s,sdslen(s)); + sdsfree(s); +} + +void _addReplyStatus(redisClient *c, char *s, size_t len) { + addReplyString(c,"+",1); + addReplyString(c,s,len); + addReplyString(c,"\r\n",2); +} + +void addReplyStatus(redisClient *c, char *status) { + _addReplyStatus(c,status,strlen(status)); +} + +void addReplyStatusFormat(redisClient *c, const char *fmt, ...) { + va_list ap; + va_start(ap,fmt); + sds s = sdscatvprintf(sdsempty(),fmt,ap); + va_end(ap); + _addReplyStatus(c,s,sdslen(s)); + sdsfree(s); +} + /* Adds an empty object to the reply list that will contain the multi bulk * length, which is not known when this function is called. */ void *addDeferredMultiBulkLength(redisClient *c) { diff --git a/src/object.c b/src/object.c index 92af1d6a..c1a08245 100644 --- a/src/object.c +++ b/src/object.c @@ -354,9 +354,9 @@ int getDoubleFromObjectOrReply(redisClient *c, robj *o, double *target, const ch double value; if (getDoubleFromObject(o, &value) != REDIS_OK) { if (msg != NULL) { - addReplySds(c, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg)); + addReplyError(c,(char*)msg); } else { - addReplySds(c, sdsnew("-ERR value is not a double\r\n")); + addReplyError(c,"value is not a double"); } return REDIS_ERR; } @@ -393,9 +393,9 @@ int getLongLongFromObjectOrReply(redisClient *c, robj *o, long long *target, con long long value; if (getLongLongFromObject(o, &value) != REDIS_OK) { if (msg != NULL) { - addReplySds(c, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg)); + addReplyError(c,(char*)msg); } else { - addReplySds(c, sdsnew("-ERR value is not an integer or out of range\r\n")); + addReplyError(c,"value is not an integer or out of range"); } return REDIS_ERR; } @@ -410,9 +410,9 @@ int getLongFromObjectOrReply(redisClient *c, robj *o, long *target, const char * if (getLongLongFromObjectOrReply(c, o, &value, msg) != REDIS_OK) return REDIS_ERR; if (value < LONG_MIN || value > LONG_MAX) { if (msg != NULL) { - addReplySds(c, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg)); + addReplyError(c,(char*)msg); } else { - addReplySds(c, sdsnew("-ERR value is out of range\r\n")); + addReplyError(c,"value is out of range"); } return REDIS_ERR; } diff --git a/src/redis.c b/src/redis.c index 77e67c58..5af9b235 100644 --- a/src/redis.c +++ b/src/redis.c @@ -909,7 +909,7 @@ int processCommand(redisClient *c) { } else if (c->multibulk) { if (c->bulklen == -1) { if (((char*)c->argv[0]->ptr)[0] != '$') { - addReplySds(c,sdsnew("-ERR multi bulk protocol error\r\n")); + addReplyError(c,"multi bulk protocol error"); resetClient(c); return 1; } else { @@ -922,7 +922,7 @@ int processCommand(redisClient *c) { bulklen < 0 || bulklen > 1024*1024*1024) { c->argc--; - addReplySds(c,sdsnew("-ERR invalid bulk write count\r\n")); + addReplyError(c,"invalid bulk write count"); resetClient(c); return 1; } @@ -975,17 +975,14 @@ int processCommand(redisClient *c) { * such wrong arity, bad command name and so forth. */ cmd = lookupCommand(c->argv[0]->ptr); if (!cmd) { - addReplySds(c, - sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n", - (char*)c->argv[0]->ptr)); + addReplyErrorFormat(c,"unknown command '%s'", + (char*)c->argv[0]->ptr); resetClient(c); return 1; } else if ((cmd->arity > 0 && cmd->arity != c->argc) || (c->argc < -cmd->arity)) { - addReplySds(c, - sdscatprintf(sdsempty(), - "-ERR wrong number of arguments for '%s' command\r\n", - cmd->name)); + addReplyErrorFormat(c,"wrong number of arguments for '%s' command", + cmd->name); resetClient(c); return 1; } else if (cmd->flags & REDIS_CMD_BULK && c->bulklen == -1) { @@ -999,7 +996,7 @@ int processCommand(redisClient *c) { bulklen < 0 || bulklen > 1024*1024*1024) { c->argc--; - addReplySds(c,sdsnew("-ERR invalid bulk write count\r\n")); + addReplyError(c,"invalid bulk write count"); resetClient(c); return 1; } @@ -1026,7 +1023,7 @@ int processCommand(redisClient *c) { /* Check if the user is authenticated */ if (server.requirepass && !c->authenticated && cmd->proc != authCommand) { - addReplySds(c,sdsnew("-ERR operation not permitted\r\n")); + addReplyError(c,"operation not permitted"); resetClient(c); return 1; } @@ -1035,7 +1032,7 @@ int processCommand(redisClient *c) { if (server.maxmemory && (cmd->flags & REDIS_CMD_DENYOOM) && zmalloc_used_memory() > server.maxmemory) { - addReplySds(c,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n")); + addReplyError(c,"command not allowed when used memory > 'maxmemory'"); resetClient(c); return 1; } @@ -1045,7 +1042,7 @@ int processCommand(redisClient *c) { && cmd->proc != subscribeCommand && cmd->proc != unsubscribeCommand && cmd->proc != psubscribeCommand && cmd->proc != punsubscribeCommand) { - addReplySds(c,sdsnew("-ERR only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context\r\n")); + addReplyError(c,"only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context"); resetClient(c); return 1; } @@ -1109,7 +1106,7 @@ void authCommand(redisClient *c) { addReply(c,shared.ok); } else { c->authenticated = 0; - addReplySds(c,sdscatprintf(sdsempty(),"-ERR invalid password\r\n")); + addReplyError(c,"invalid password"); } } diff --git a/src/redis.h b/src/redis.h index 328df08d..1ef56288 100644 --- a/src/redis.h +++ b/src/redis.h @@ -605,11 +605,23 @@ void addReplyBulkCString(redisClient *c, char *s); void acceptHandler(aeEventLoop *el, int fd, void *privdata, int mask); void addReply(redisClient *c, robj *obj); void addReplySds(redisClient *c, sds s); +void addReplyError(redisClient *c, char *err); +void addReplyStatus(redisClient *c, char *status); void addReplyDouble(redisClient *c, double d); void addReplyLongLong(redisClient *c, long long ll); void addReplyMultiBulkLen(redisClient *c, long length); void *dupClientReplyValue(void *o); +#ifdef __GNUC__ +void addReplyErrorFormat(redisClient *c, const char *fmt, ...) + __attribute__((format(printf, 2, 3))); +void addReplyStatusFormat(redisClient *c, const char *fmt, ...) + __attribute__((format(printf, 2, 3))); +#else +void addReplyErrorFormat(redisClient *c, const char *fmt, ...); +void addReplyStatusFormat(redisClient *c, const char *fmt, ...); +#endif + /* List data type */ void listTypeTryConversion(robj *subject, robj *value); void listTypePush(robj *subject, robj *value, int where); diff --git a/src/replication.c b/src/replication.c index c2846088..8c629006 100644 --- a/src/replication.c +++ b/src/replication.c @@ -179,7 +179,7 @@ void syncCommand(redisClient *c) { /* Refuse SYNC requests if we are a slave but the link with our master * is not ok... */ if (server.masterhost && server.replstate != REDIS_REPL_CONNECTED) { - addReplySds(c,sdsnew("-ERR Can't SYNC while not connected with my master\r\n")); + addReplyError(c,"Can't SYNC while not connected with my master"); return; } @@ -188,7 +188,7 @@ void syncCommand(redisClient *c) { * buffer registering the differences between the BGSAVE and the current * dataset, so that we can copy to other slaves if needed. */ if (listLength(c->reply) != 0) { - addReplySds(c,sdsnew("-ERR SYNC is invalid with pending input\r\n")); + addReplyError(c,"SYNC is invalid with pending input"); return; } @@ -226,7 +226,7 @@ void syncCommand(redisClient *c) { redisLog(REDIS_NOTICE,"Starting BGSAVE for SYNC"); if (rdbSaveBackground(server.dbfilename) != REDIS_OK) { redisLog(REDIS_NOTICE,"Replication failed, can't BGSAVE"); - addReplySds(c,sdsnew("-ERR Unalbe to perform background save\r\n")); + addReplyError(c,"Unable to perform background save"); return; } c->replstate = REDIS_REPL_WAIT_BGSAVE_END; diff --git a/src/t_hash.c b/src/t_hash.c index 5745f88c..5cef1cab 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -249,7 +249,7 @@ void hmsetCommand(redisClient *c) { robj *o; if ((c->argc % 2) == 1) { - addReplySds(c,sdsnew("-ERR wrong number of arguments for HMSET\r\n")); + addReplyError(c,"wrong number of arguments for HMSET"); return; } diff --git a/src/t_string.c b/src/t_string.c index 276f4dab..509c630a 100644 --- a/src/t_string.c +++ b/src/t_string.c @@ -12,7 +12,7 @@ void setGenericCommand(redisClient *c, int nx, robj *key, robj *val, robj *expir if (getLongFromObjectOrReply(c, expire, &seconds, NULL) != REDIS_OK) return; if (seconds <= 0) { - addReplySds(c,sdsnew("-ERR invalid expire time in SETEX\r\n")); + addReplyError(c,"invalid expire time in SETEX"); return; } } @@ -98,7 +98,7 @@ void msetGenericCommand(redisClient *c, int nx) { int j, busykeys = 0; if ((c->argc % 2) == 0) { - addReplySds(c,sdsnew("-ERR wrong number of arguments for MSET\r\n")); + addReplyError(c,"wrong number of arguments for MSET"); return; } /* Handle the NX flag. The MSETNX semantic is to return zero and don't diff --git a/src/t_zset.c b/src/t_zset.c index 6a332c6a..d944e923 100644 --- a/src/t_zset.c +++ b/src/t_zset.c @@ -355,8 +355,7 @@ void zaddGenericCommand(redisClient *c, robj *key, robj *ele, double scoreval, i *score = scoreval; } if (isnan(*score)) { - addReplySds(c, - sdsnew("-ERR resulting score is not a number (NaN)\r\n")); + addReplyError(c,"resulting score is not a number (NaN)"); zfree(score); /* Note that we don't need to check if the zset may be empty and * should be removed here, as we can only obtain Nan as score if @@ -561,7 +560,8 @@ void zunionInterGenericCommand(redisClient *c, robj *dstkey, int op) { /* expect setnum input keys to be given */ setnum = atoi(c->argv[2]->ptr); if (setnum < 1) { - addReplySds(c,sdsnew("-ERR at least 1 input key is needed for ZUNIONSTORE/ZINTERSTORE\r\n")); + addReplyError(c, + "at least 1 input key is needed for ZUNIONSTORE/ZINTERSTORE"); return; } @@ -839,8 +839,7 @@ void genericZrangebyscoreCommand(redisClient *c, int justcount) { if (c->argc != (4 + withscores) && c->argc != (7 + withscores)) badsyntax = 1; if (badsyntax) { - addReplySds(c, - sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n")); + addReplyError(c,"wrong number of arguments for ZRANGEBYSCORE"); return; } From 49128f0b9da725de992e427fa341a837bcc2991b Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 2 Sep 2010 23:34:32 +0200 Subject: [PATCH 100/139] Fix bug in gluing a deferred multi bulk length to the next reply chunk --- src/networking.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/networking.c b/src/networking.c index d62456a3..55b7475b 100644 --- a/src/networking.c +++ b/src/networking.c @@ -253,9 +253,9 @@ void setDeferredMultiBulkLength(redisClient *c, void *node, long length) { if (ln->next != NULL) { next = listNodeValue(ln->next); - /* Only glue when the next node is an sds */ + /* Only glue when the next node is non-NULL (an sds in this case) */ if (next->ptr != NULL) { - len->ptr = sdscat(len->ptr,next->ptr); + len->ptr = sdscatlen(len->ptr,next->ptr,sdslen(next->ptr)); listDelNode(c->reply,ln->next); } } From b435f64510a032528c42fc1cfc4eca15a4474a1b Mon Sep 17 00:00:00 2001 From: Anko painting Date: Thu, 2 Sep 2010 21:13:27 -0700 Subject: [PATCH 101/139] fix for issue 237 --- src/vm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/vm.c b/src/vm.c index 50fb326d..635016b8 100644 --- a/src/vm.c +++ b/src/vm.c @@ -110,6 +110,9 @@ void vmInit(void) { /* LZF requires a lot of stack */ pthread_attr_init(&server.io_threads_attr); pthread_attr_getstacksize(&server.io_threads_attr, &stacksize); + if(!stacksize) { + stacksize = 1; + } while (stacksize < REDIS_THREAD_STACK_SIZE) stacksize *= 2; pthread_attr_setstacksize(&server.io_threads_attr, stacksize); /* Listen for events in the threaded I/O pipe */ From 556bdfbab9062d472e19d882ae045fece36a25ab Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 3 Sep 2010 10:24:18 +0200 Subject: [PATCH 102/139] added some comment and changed coding style for fix for 237 --- src/vm.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/vm.c b/src/vm.c index 635016b8..ee831fb9 100644 --- a/src/vm.c +++ b/src/vm.c @@ -110,9 +110,11 @@ void vmInit(void) { /* LZF requires a lot of stack */ pthread_attr_init(&server.io_threads_attr); pthread_attr_getstacksize(&server.io_threads_attr, &stacksize); - if(!stacksize) { - stacksize = 1; - } + + /* Solaris may report a stacksize of 0, let's set it to 1 otherwise + * multiplying it by 2 in the while loop later will not really help ;) */ + if (!stacksize) stacksize = 1; + while (stacksize < REDIS_THREAD_STACK_SIZE) stacksize *= 2; pthread_attr_setstacksize(&server.io_threads_attr, stacksize); /* Listen for events in the threaded I/O pipe */ From abe18d0e00f8ef15aac18ce59c17f90627b72e9e Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 6 Sep 2010 10:12:44 +0200 Subject: [PATCH 103/139] Fix for solaris compilation bug Issue 325 --- doc/BlpopCommand.html | 6 +- doc/CommandReference.html | 26 ++++---- doc/DelCommand.html | 3 +- doc/ExpireCommand.html | 31 ++++++--- doc/FAQ.html | 2 +- doc/GenericCommandsSidebar.html | 2 +- doc/HashCommandsSidebar.html | 2 +- doc/KeysCommand.html | 3 +- doc/LindexCommand.html | 3 +- doc/MultiExecCommand.html | 105 +++++++++++++++++++++++++----- doc/README.html | 49 +++++++++++--- doc/SortedSetCommandsSidebar.html | 2 +- doc/SupportedLanguages.html | 5 +- doc/ZincrbyCommand.html | 4 +- doc/ZrangebyscoreCommand.html | 10 ++- doc/ZunionCommand.html | 7 +- doc/ZunionstoreCommand.html | 8 ++- doc/index.html | 6 +- src/solarisfixes.h | 1 + 19 files changed, 199 insertions(+), 76 deletions(-) diff --git a/doc/BlpopCommand.html b/doc/BlpopCommand.html index 9c64390a..7627ed71 100644 --- a/doc/BlpopCommand.html +++ b/doc/BlpopCommand.html @@ -16,7 +16,7 @@

BlpopCommand

@@ -35,8 +35,10 @@

Blocking behavior

If none of the specified keys exist or contain non empty lists, BLPOPblocks until some other client performs a LPUSH oran RPUSH operation against one of the lists.
Once new data is present on one of the lists, the client finally returnswith the name of the key unblocking it and the popped value.
When blocking, if a non-zero timeout is specified, the client will unblockreturning a nil special value if the specified amount of seconds passedwithout a push operation against at least one of the specified keys.
-
A timeout of zero means instead to block forever.
+
The timeout argument is interpreted as an integer value. A timeout of zero means instead to block forever.

Multiple clients blocking for the same keys

Multiple clients can block for the same key. They are put intoa queue, so the first to be served will be the one that started to waitearlier, in a first-blpopping first-served fashion.
+

blocking POP inside a MULTI/EXEC transaction

BLPOP and BRPOP can be used with pipelining (sending multiple commands and reading the replies in batch), but it does not make sense to use BLPOP or BRPOP inside a MULTI/EXEC block (a Redis transaction).
+
The behavior of BLPOP inside MULTI/EXEC when the list is empty is to return a multi-bulk nil reply, exactly what happens when the timeout is reached. If you like science fiction, think at it like if inside MULTI/EXEC the time will flow at infinite speed :)

Return value

BLPOP returns a two-elements array via a multi bulk reply in order to returnboth the unblocking key and the popped value.
When a non-zero timeout is specified, and the BLPOP operation timed out,the return value is a nil multi bulk reply. Most client values will returnfalse or nil accordingly to the programming language used.
Multi bulk reply diff --git a/doc/CommandReference.html b/doc/CommandReference.html index 647c1b0c..7021bd7e 100644 --- a/doc/CommandReference.html +++ b/doc/CommandReference.html @@ -16,7 +16,7 @@
- = Redis Command Reference =

Every command name links to a specific wiki page describing the behavior of the command.

Connection handling

  • QUIT close the connection
  • AUTH simple password authentication if enabled
-

Commands operating on all the kind of values

  • EXISTS key test if a key exists
  • DEL key delete a key
  • TYPE key return the type of the value stored at key
  • KEYS pattern return all the keys matching a given pattern
  • RANDOMKEY return a random key from the key space
  • RENAME oldname newname rename the old key in the new one, destroing the newname key if it already exists
  • RENAMENX oldname newname rename the old key in the new one, if the newname key does not already exist
  • DBSIZE return the number of keys in the current db
  • EXPIRE set a time to live in seconds on a key
  • TTL get the time to live in seconds of a key
  • SELECT index Select the DB having the specified index
  • MOVE key dbindex Move the key from the currently selected DB to the DB having as index dbindex
  • FLUSHDB Remove all the keys of the currently selected DB
  • FLUSHALL Remove all the keys from all the databases
-

Commands operating on string values

  • SET key value set a key to a string value
  • GET key return the string value of the key
  • GETSET key value set a key to a string returning the old value of the key
  • MGET key1 key2 ... keyN multi-get, return the strings values of the keys
  • SETNX key value set a key to a string value if the key does not exist
  • SETEX key time value Set+Expire combo command
  • MSET key1 value1 key2 value2 ... keyN valueN set a multiple keys to multiple values in a single atomic operation
  • MSETNX key1 value1 key2 value2 ... keyN valueN set a multiple keys to multiple values in a single atomic operation if none of the keys already exist
  • INCR key increment the integer value of key
  • INCRBY key integer increment the integer value of key by integer
  • DECR key decrement the integer value of key
  • DECRBY key integer decrement the integer value of key by integer
  • APPEND key value append the specified string to the string stored at key
  • SUBSTR key start end return a substring out of a larger string
-

Commands operating on lists

  • RPUSH key value Append an element to the tail of the List value at key
  • LPUSH key value Append an element to the head of the List value at key
  • LLEN key Return the length of the List value at key
  • LRANGE key start end Return a range of elements from the List at key
  • LTRIM key start end Trim the list at key to the specified range of elements
  • LINDEX key index Return the element at index position from the List at key
  • LSET key index value Set a new value as the element at index position of the List at key
  • LREM key count value Remove the first-N, last-N, or all the elements matching value from the List at key
  • LPOP key Return and remove (atomically) the first element of the List at key
  • RPOP key Return and remove (atomically) the last element of the List at key
  • BLPOP key1 key2 ... keyN timeout Blocking LPOP
  • BRPOP key1 key2 ... keyN timeout Blocking RPOP
  • RPOPLPUSH srckey dstkey Return and remove (atomically) the last element of the source List stored at _srckey_ and push the same element to the destination List stored at _dstkey_
-

Commands operating on sets

  • SADD key member Add the specified member to the Set value at key
  • SREM key member Remove the specified member from the Set value at key
  • SPOP key Remove and return (pop) a random element from the Set value at key
  • SMOVE srckey dstkey member Move the specified member from one Set to another atomically
  • SCARD key Return the number of elements (the cardinality) of the Set at key
  • SISMEMBER key member Test if the specified value is a member of the Set at key
  • SINTER key1 key2 ... keyN Return the intersection between the Sets stored at key1, key2, ..., keyN
  • SINTERSTORE dstkey key1 key2 ... keyN Compute the intersection between the Sets stored at key1, key2, ..., keyN, and store the resulting Set at dstkey
  • SUNION key1 key2 ... keyN Return the union between the Sets stored at key1, key2, ..., keyN
  • SUNIONSTORE dstkey key1 key2 ... keyN Compute the union between the Sets stored at key1, key2, ..., keyN, and store the resulting Set at dstkey
  • SDIFF key1 key2 ... keyN Return the difference between the Set stored at key1 and all the Sets key2, ..., keyN
  • SDIFFSTORE dstkey key1 key2 ... keyN Compute the difference between the Set key1 and all the Sets key2, ..., keyN, and store the resulting Set at dstkey
  • SMEMBERS key Return all the members of the Set value at key
  • SRANDMEMBER key Return a random member of the Set value at key
-

Commands operating on sorted sets (zsets, Redis version >

1.1) ==

  • ZADD key score member Add the specified member to the Sorted Set value at key or update the score if it already exist
  • ZREM key member Remove the specified member from the Sorted Set value at key
  • ZINCRBY key increment member If the member already exists increment its score by _increment_, otherwise add the member setting _increment_ as score
  • ZRANK key member Return the rank (or index) or _member_ in the sorted set at _key_, with scores being ordered from low to high
  • ZREVRANK key member Return the rank (or index) or _member_ in the sorted set at _key_, with scores being ordered from high to low
  • ZRANGE key start end Return a range of elements from the sorted set at key
  • ZREVRANGE key start end Return a range of elements from the sorted set at key, exactly like ZRANGE, but the sorted set is ordered in traversed in reverse order, from the greatest to the smallest score
  • ZRANGEBYSCORE key min max Return all the elements with score >= min and score <= max (a range query) from the sorted set
  • ZCARD key Return the cardinality (number of elements) of the sorted set at key
  • ZSCORE key element Return the score associated with the specified element of the sorted set at key
  • ZREMRANGEBYRANK key min max Remove all the elements with rank >= min and rank <= max from the sorted set
  • ZREMRANGEBYSCORE key min max Remove all the elements with score >= min and score <= max from the sorted set
  • ZUNIONSTORE / ZINTERSTORE dstkey N key1 ... keyN WEIGHTS w1 ... wN AGGREGATE SUM|MIN|MAX Perform a union or intersection over a number of sorted sets with optional weight and aggregate
-

Commands operating on hashes

  • HSET key field value Set the hash field to the specified value. Creates the hash if needed.
  • HGET key field Retrieve the value of the specified hash field.
  • HMSET key field1 value1 ... fieldN valueN Set the hash fields to their respective values.
  • HINCRBY key field integer Increment the integer value of the hash at _key_ on _field_ with _integer_.
  • HEXISTS key field Test for existence of a specified field in a hash
  • HDEL key field Remove the specified field from a hash
  • HLEN key Return the number of items in a hash.
  • HKEYS key Return all the fields in a hash.
  • HVALS key Return all the values in a hash.
  • HGETALL key Return all the fields and associated values in a hash.
-

Sorting

  • SORT key BY pattern LIMIT start end GET pattern ASC|DESC ALPHA Sort a Set or a List accordingly to the specified parameters
-

Transactions

-

Publish/Subscribe

-

Persistence control commands

  • SAVE Synchronously save the DB on disk
  • BGSAVE Asynchronously save the DB on disk
  • LASTSAVE Return the UNIX time stamp of the last successfully saving of the dataset on disk
  • SHUTDOWN Synchronously save the DB on disk, then shutdown the server
  • BGREWRITEAOF Rewrite the append only file in background when it gets too big
-

Remote server control commands

  • INFO Provide information and statistics about the server
  • MONITOR Dump all the received requests in real time
  • SLAVEOF Change the replication settings
  • CONFIG Configure a Redis server at runtime
+ = Redis Command Reference =

Every command name links to a specific wiki page describing the behavior of the command.

Categorized Command List

Connection handling

Command Parameters Description
QUIT - close the connection
AUTH password simple password authentication if enabled
+

Commands operating on all value types

Command Parameters Description
EXISTS key test if a key exists
DEL key delete a key
TYPE key return the type of the value stored at key
KEYS pattern return all the keys matching a given pattern
RANDOMKEY - return a random key from the key space
RENAME oldname newname rename the old key in the new one, destroying the newname key if it already exists
RENAMENX oldname newname rename the oldname key to newname, if the newname key does not already exist
DBSIZE - return the number of keys in the current db
EXPIRE - set a time to live in seconds on a key
PERSIST - remove the expire from a key
TTL - get the time to live in seconds of a key
SELECT index Select the DB with the specified index
MOVE key dbindex Move the key from the currently selected DB to the dbindex DB
FLUSHDB - Remove all the keys from the currently selected DB
FLUSHALL - Remove all the keys from all the databases
+

Commands operating on string values

Command Parameters Description
SET key value Set a key to a string value
GET key Return the string value of the key
GETSET key value Set a key to a string returning the old value of the key
MGET key1 key2 ... keyN Multi-get, return the strings values of the keys
SETNX key value Set a key to a string value if the key does not exist
SETEX key time value Set+Expire combo command
MSET key1 value1 key2 value2 ... keyN valueN Set multiple keys to multiple values in a single atomic operation
MSETNX key1 value1 key2 value2 ... keyN valueN Set multiple keys to multiple values in a single atomic operation if none of the keys already exist
INCR key Increment the integer value of key
INCRBY key integer Increment the integer value of key by integer
DECR key Decrement the integer value of key
DECRBY key integer Decrement the integer value of key by integer
APPEND key value Append the specified string to the string stored at key
SUBSTR key start end Return a substring of a larger string
+

Commands operating on lists

Command Parameters Description
RPUSH key value Append an element to the tail of the List value at key
LPUSH key value Append an element to the head of the List value at key
LLEN key Return the length of the List value at key
LRANGE key start end Return a range of elements from the List at key
LTRIM key start end Trim the list at key to the specified range of elements
LINDEX key index Return the element at index position from the List at key
LSET key index value Set a new value as the element at index position of the List at key
LREM key count value Remove the first-N, last-N, or all the elements matching value from the List at key
LPOP key Return and remove (atomically) the first element of the List at key
RPOP key Return and remove (atomically) the last element of the List at key
BLPOP key1 key2 ... keyN timeout Blocking LPOP
BRPOP key1 key2 ... keyN timeout Blocking RPOP
RPOPLPUSH srckey dstkey Return and remove (atomically) the last element of the source List stored at srckey and push the same element to the destination List stored at dstkey
+

Commands operating on sets

Command Parameters Description
SADD key member Add the specified member to the Set value at key
SREM key member Remove the specified member from the Set value at key
SPOP key Remove and return (pop) a random element from the Set value at key
SMOVE srckey dstkey member Move the specified member from one Set to another atomically
SCARD key Return the number of elements (the cardinality) of the Set at key
SISMEMBER key member Test if the specified value is a member of the Set at key
SINTER key1 key2 ... keyN Return the intersection between the Sets stored at key1, key2, ..., keyN
SINTERSTORE dstkey key1 key2 ... keyN Compute the intersection between the Sets stored at key1, key2, ..., keyN, and store the resulting Set at dstkey
SUNION key1 key2 ... keyN Return the union between the Sets stored at key1, key2, ..., keyN
SUNIONSTORE dstkey key1 key2 ... keyN Compute the union between the Sets stored at key1, key2, ..., keyN, and store the resulting Set at dstkey
SDIFF key1 key2 ... keyN Return the difference between the Set stored at key1 and all the Sets key2, ..., keyN
SDIFFSTORE dstkey key1 key2 ... keyN Compute the difference between the Set key1 and all the Sets key2, ..., keyN, and store the resulting Set at dstkey
SMEMBERS key Return all the members of the Set value at key
SRANDMEMBER key Return a random member of the Set value at key
+

Commands operating on sorted zsets (sorted sets)

Command Parameters Description
ZADD key score member Add the specified member to the Sorted Set value at key or update the score if it already exist
ZREM key member Remove the specified member from the Sorted Set value at key
ZINCRBY key increment member If the member already exists increment its score by increment, otherwise add the member setting increment as score
ZRANK key member Return the rank (or index) or member in the sorted set at key, with scores being ordered from low to high
ZREVRANK key member Return the rank (or index) or member in the sorted set at key, with scores being ordered from high to low
ZRANGE key start end Return a range of elements from the sorted set at key
ZREVRANGE key start end Return a range of elements from the sorted set at key, exactly like ZRANGE, but the sorted set is ordered in traversed in reverse order, from the greatest to the smallest score
ZRANGEBYSCORE key min max Return all the elements with score >= min and score <= max (a range query) from the sorted set
ZCOUNT key min max Return the number of elements with score >= min and score <= max in the sorted set
ZCARD key Return the cardinality (number of elements) of the sorted set at key
ZSCORE key element Return the score associated with the specified element of the sorted set at key
ZREMRANGEBYRANK key min max Remove all the elements with rank >= min and rank <= max from the sorted set
ZREMRANGEBYSCORE key min max Remove all the elements with score >= min and score <= max from the sorted set
ZUNIONSTORE / ZINTERSTORE dstkey N key1 ... keyN WEIGHTS w1 ... wN AGGREGATE SUM|MIN|MAX Perform a union or intersection over a number of sorted sets with optional weight and aggregate
+

Commands operating on hashes

Command Parameters Description
HSET key field value Set the hash field to the specified value. Creates the hash if needed.
HGET key field Retrieve the value of the specified hash field.
HMGET key field1 ... fieldN Get the hash values associated to the specified fields.
HMSET key field1 value1 ... fieldN valueN Set the hash fields to their respective values.
HINCRBY key field integer Increment the integer value of the hash at key on field with integer.
HEXISTS key field Test for existence of a specified field in a hash
HDEL key field Remove the specified field from a hash
HLEN key Return the number of items in a hash.
HKEYS key Return all the fields in a hash.
HVALS key Return all the values in a hash.
HGETALL key Return all the fields and associated values in a hash.
+

Sorting

Command Parameters Description
SORT key BY pattern LIMIT start end GET pattern ASC|DESC ALPHA Sort a Set or a List accordingly to the specified parameters
+

Transactions

Command Parameters Description
MULTI/EXEC/DISCARD/WATCH/UNWATCH - Redis atomic transactions
+

Publish/Subscribe

Command Parameters Description
SUBSCRIBE/UNSUBSCRIBE/PUBLISH - Redis Public/Subscribe messaging paradigm implementation
+

Persistence control commands

Command Parameters Description
SAVE - Synchronously save the DB on disk
BGSAVE - Asynchronously save the DB on disk
LASTSAVE - Return the UNIX time stamp of the last successfully saving of the dataset on disk
SHUTDOWN - Synchronously save the DB on disk, then shutdown the server
BGREWRITEAOF - Rewrite the append only file in background when it gets too big
+

Remote server control commands

Command Parameters Description
INFO - Provide information and statistics about the server
MONITOR - Dump all the received requests in real time
SLAVEOF - Change the replication settings
CONFIG - Configure a Redis server at runtime
diff --git a/doc/DelCommand.html b/doc/DelCommand.html index 8d063ce7..3a7ac69c 100644 --- a/doc/DelCommand.html +++ b/doc/DelCommand.html @@ -27,12 +27,11 @@
#sidebar GenericCommandsSidebar

DEL _key1_ _key2_ ... _keyN_

-Time complexity: O(1)
Remove the specified keys. If a given key does not existno operation is performed for this key. The commnad returns the number ofkeys removed.
+Time complexity: O(1)
Remove the specified keys. If a given key does not existno operation is performed for this key. The command returns the number ofkeys removed.

Return value

Integer reply, specifically:

 an integer greater than 0 if one or more keys were removed
 0 if none of the specified key existed
 
-
diff --git a/doc/ExpireCommand.html b/doc/ExpireCommand.html index a3dbbe5b..cebac8b8 100644 --- a/doc/ExpireCommand.html +++ b/doc/ExpireCommand.html @@ -16,7 +16,7 @@

ExpireCommand

@@ -28,12 +28,15 @@
#sidebar GenericCommandsSidebar

EXPIRE _key_ _seconds_

EXPIREAT _key_ _unixtime_ (Redis >

1.1)= +

PERSIST _key_

Time complexity: O(1)
Set a timeout on the specified key. After the timeout the key will beautomatically delete by the server. A key with an associated timeout issaid to be volatile in Redis terminology.
-
Voltile keys are stored on disk like the other keys, the timeout is persistenttoo like all the other aspects of the dataset. Saving a dataset containingthe dataset and stopping the server does not stop the flow of time as Redisregisters on disk when the key will no longer be available as Unix time, andnot the remaining seconds.
+
Voltile keys are stored on disk like the other keys, the timeout is persistenttoo like all the other aspects of the dataset. Saving a dataset containingexpires and stopping the server does not stop the flow of time as Redisstores on disk the time when the key will no longer be available as Unixtime, and not the remaining seconds.
EXPIREAT works exctly like EXPIRE but instead to get the number of secondsrepresenting the Time To Live of the key as a second argument (that is arelative way of specifing the TTL), it takes an absolute one in the form ofa UNIX timestamp (Number of seconds elapsed since 1 Gen 1970).
-
EXPIREAT was introduced in order to implement [Persistence append only saving mode] so that EXPIRE commands are automatically translated into EXPIREAT commands for the append only file. Of course EXPIREAT can alsoused by programmers that need a way to simply specify that a given key should expire at a given time in the future.
-

How the expire is removed from a key

When the key is set to a new value using the SET command, the INCR commandor any other command that modify the value stored at key the timeout isremoved from the key and the key becomes non volatile.
-

Restrictions with write operations against volatile keys

Write operations like LPUSH, LSET and every other command that has theeffect of modifying the value stored at a volatile key have a special semantic:basically a volatile key is destroyed when it is target of a write operation.See for example the following usage pattern:
+
EXPIREAT was introduced in order to implement the Append Only File persistence modeso that EXPIRE commands are automatically translated into EXPIREAT commands for the append only file. Of course EXPIREAT can alsoused by programmers that need a way to simply specify that a given key should expire at a given time in the future.
+
Since Redis 2.1.3 you can update the value of the timeout of a key alreadyhaving an expire set. It is also possible to undo the expire at allturning the key into a normal key using the PERSIST command.
+

How the expire is removed from a key

When the key is set to a new value using the SET command, or when a keyis destroied via DEL, the timeout is removed from the key.
+

Restrictions with write operations against volatile keys

IMPORTANT: Since Redis 2.1.3 or greater, there are no restrictions aboutthe operations you can perform against volatile keys, however older versionsof Redis, including the current stable version 2.0.0, has the followinglimitations:
+
Write operations like LPUSH, LSET and every other command that has theeffect of modifying the value stored at a volatile key have a special semantic:basically a volatile key is destroyed when it is target of a write operation.See for example the following usage pattern:
 % ./redis-cli lpush mylist foobar /Users/antirez/hack/redis
 OK
@@ -45,8 +48,13 @@ OK
 OK
 % ./redis-cli lrange mylist 0 -1  /Users/antirez/hack/redis
 1. newelement
-
What happened here is that lpush against the key with a timeout set deletedthe key before to perform the operation. There is so a simple rule, writeoperations against volatile keys will destroy the key before to perform theoperation. Why Redis uses this behavior? In order to retain an importantproperty: a server that receives a given number of commands in the samesequence will end with the same dataset in memory. Without the delete-on-writesemantic what happens is that the state of the server depends on the timeof the commands to. This is not a desirable property in a distributed databasethat supports replication.
-

Setting the timeout again on already volatile keys

Trying to call EXPIRE against a key that already has an associated timeoutwill not change the timeout of the key, but will just return 0. If insteadthe key does not have a timeout associated the timeout will be set and EXPIREwill return 1.
+
What happened here is that LPUSH against the key with a timeout set deletedthe key before to perform the operation. There is so a simple rule, writeoperations against volatile keys will destroy the key before to perform theoperation. Why Redis uses this behavior? In order to retain an importantproperty: a server that receives a given number of commands in the samesequence will end with the same dataset in memory. Without the delete-on-writesemantic what happens is that the state of the server depends on the timethe commands were issued. This is not a desirable property in a distributed databasethat supports replication.
+

Restrictions for write operations with volatile keys as sources

Even when the volatile key is not modified as part of a write operation, if it is +read in a composite write operation (such as SINTERSTORE) it will be cleared at the +start of the operation. This is done to avoid concurrency issues in replication. +Imagine a key that is about to expire and the composite operation is run against it. +On a slave node, this key might already be expired, which leaves you with a +desync in your dataset.

Setting the timeout again on already volatile keys

Trying to call EXPIRE against a key that already has an associated timeoutwill not change the timeout of the key, but will just return 0. If insteadthe key does not have a timeout associated the timeout will be set and EXPIREwill return 1.

Enhanced Lazy Expiration algorithm

Redis does not constantly monitor keys that are going to be expired.Keys are expired simply when some client tries to access a key, andthe key is found to be timed out.
Of course this is not enough as there are expired keys that will neverbe accessed again. This keys should be expired anyway, so once everysecond Redis test a few keys at random among keys with an expire set.All the keys that are already expired are deleted from the keyspace.

Version 1.0

Each time a fixed number of keys where tested (100 by default). So ifyou had a client setting keys with a very short expire faster than 100for second the memory continued to grow. When you stopped to insertnew keys the memory started to be freed, 100 keys every second in thebest conditions. Under a peak Redis continues to use more and more RAMeven if most keys are expired in each sweep.
@@ -56,8 +64,10 @@ OK
This means that at any given moment the maximum amount of keys alreadyexpired that are using memory is at max equal to max setting operations per second divided by 4.

Return value

Integer reply, specifically:

 1: the timeout was set.
-0: the timeout was not set since the key already has an associated timeout, or the key does not exist.
-

FAQ: Can you explain better why Redis deletes keys with an EXPIRE on write operations?

+0: the timeout was not set since the key already has an associated timeout + (this may happen only in Redis versions < 2.1.3, Redis >= 2.1.3 will + happily update the timeout), or the key does not exist. +

FAQ: Can you explain better why Redis < 2.1.3 deletes keys with an EXPIRE on write operations?

Ok let's start with the problem:
 redis> set a 100
@@ -76,7 +86,8 @@ EXPIRE a 5
 INCR a
 
Imagine a Redis version that does not implement the "Delete keys with an expire set on write operation" semantic. -Running the above example with the 10 seconds pause will lead to 'a' being set to the value of 1, as it no longer exists when INCR is called 10 seconds later.

Instead if we drop the 10 seconds pause, the result is that 'a' is set to 101.

And in the practice timing changes! For instance the client may wait 10 seconds before INCR, but the sequence written in the Append Only File (and later replayed-back as fast as possible when Redis is restarted) will not have the pause. Even if we add a timestamp in the AOF, when the time difference is smaller than our timer resolution, we have a race condition.

The same happens with master-slave replication. Again, consider the example above: the client will use the same sequence of commands without the 10 seconds pause, but the replication link will slow down for a few seconds due to a network problem. Result? The master will contain 'a' set to 101, the slave 'a' set to 1.

The only way to avoid this but at the same time have reliable non time dependent timeouts on keys is to destroy volatile keys when a write operation is attempted against it.

After all Redis is one of the rare fully persistent databases that will give you EXPIRE. This comes to a cost :) +Running the above example with the 10 seconds pause will lead to 'a' being set to the value of 1, as it no longer exists when INCR is called 10 seconds later.

Instead if we drop the 10 seconds pause, the result is that 'a' is set to 101.

And in the practice timing changes! For instance the client may wait 10 seconds before INCR, but the sequence written in the Append Only File (and later replayed-back as fast as possible when Redis is restarted) will not have the pause. Even if we add a timestamp in the AOF, when the time difference is smaller than our timer resolution, we have a race condition.

The same happens with master-slave replication. Again, consider the example above: the client will use the same sequence of commands without the 10 seconds pause, but the replication link will slow down for a few seconds due to a network problem. Result? The master will contain 'a' set to 101, the slave 'a' set to 1.

The only way to avoid this but at the same time have reliable non time dependent timeouts on keys is to destroy volatile keys when a write operation is attempted against it.

After all Redis is one of the rare fully persistent databases that will give you EXPIRE. This comes to a cost :)

FAQ: How this limitations were solved in Redis versions > 2.1.3?

Since Redis 2.1.3 there are no longer restrictions in the use you can do of write commands against volatile keys, still the replication and AOF file are guaranteed to be fully consistent.

In order to obtain a correct behavior without sacrificing consistency now when a key expires, a DEL operation is synthesized in both the AOF file and against all the attached slaves. This way the expiration process is centralized in the master instance, and there is no longer a chance of consistency errors.

However while the slaves while connected to a master will not expire keys independently, they'll still take the full state of the expires existing in the dataset, so when a slave is elected to a master it will be able to expire the keys independently, fully acting as a master. +
diff --git a/doc/FAQ.html b/doc/FAQ.html index 7c012b2c..531fb708 100644 --- a/doc/FAQ.html +++ b/doc/FAQ.html @@ -58,7 +58,7 @@ Redis for the same objects. This happens because when data is in memory is full of pointers, reference counters and other metadata. Add to this malloc fragmentation and need to return word-aligned chunks of memory and you have a clear picture of what happens. So this means to -have 10 times the I/O between memory and disk than otherwise needed.

Is there something I can do to lower the Redis memory usage?

Yes, try to compile it with 32 bit target if you are using a 64 bit box.

If you are using Redis >= 1.3, try using the Hash data type, it can save a lot of memory.

If you are using hashes or any other type with values bigger than 128 bytes try also this to lower the RSS usage (Resident Set Size): EXPORT MMAP_THRESHOLD=4096

I have an empty Redis server but INFO and logs are reporting megabytes of memory in use!

This may happen and it's prefectly ok. Redis objects are small C structures allocated and freed a lot of times. This costs a lot of CPU so instead of being freed, released objects are taken into a free list and reused when needed. This memory is taken exactly by this free objects ready to be reused.

What happens if Redis runs out of memory?

With modern operating systems malloc() returning NULL is not common, usually the server will start swapping and Redis performances will be disastrous so you'll know it's time to use more Redis servers or get more RAM.

The INFO command (work in progress in this days) will report the amount of memory Redis is using so you can write scripts that monitor your Redis servers checking for critical conditions.

You can also use the "maxmemory" option in the config file to put a limit to the memory Redis can use. If this limit is reached Redis will start to reply with an error to write commands (but will continue to accept read-only commands).

Does Redis use more memory running in 64 bit boxes? Can I use 32 bit Redis in 64 bit systems?

Redis uses a lot more memory when compiled for 64 bit target, especially if the dataset is composed of many small keys and values. Such a database will, for instance, consume 50 MB of RAM when compiled for the 32 bit target, and 80 MB for 64 bit! That's a big difference.

You can run 32 bit Redis binaries in a 64 bit Linux and Mac OS X system without problems. For OS X just use make 32bit. For Linux instead, make sure you have libc6-dev-i386 installed, then use make 32bit if you are using the latest Git version. Instead for Redis <= 1.2.2 you have to edit the Makefile and replace "-arch i386" with "-m32".

If your application is already able to perform application-level sharding, it is very advisable to run N instances of Redis 32bit against a big 64 bit Redis box (with more than 4GB of RAM) instead than a single 64 bit instance, as this is much more memory efficient.

How much time it takes to load a big database at server startup?

Just an example on normal hardware: It takes about 45 seconds to restore a 2 GB database on a fairly standard system, no RAID. This can give you some kind of feeling about the order of magnitude of the time needed to load data when you restart the server.

Background saving is failing with a fork() error under Linux even if I've a lot of free RAM!

Short answer: echo 1 > /proc/sys/vm/overcommit_memory :)

And now the long one:

Redis background saving schema relies on the copy-on-write semantic of fork in modern operating systems: Redis forks (creates a child process) that is an exact copy of the parent. The child process dumps the DB on disk and finally exits. In theory the child should use as much memory as the parent being a copy, but actually thanks to the copy-on-write semantic implemented by most modern operating systems the parent and child process will share the common memory pages. A page will be duplicated only when it changes in the child or in the parent. Since in theory all the pages may change while the child process is saving, Linux can't tell in advance how much memory the child will take, so if the overcommit_memory setting is set to zero fork will fail unless there is as much free RAM as required to really duplicate all the parent memory pages, with the result that if you have a Redis dataset of 3 GB and just 2 GB of free memory it will fail.

Setting overcommit_memory to 1 says Linux to relax and perform the fork in a more optimistic allocation fashion, and this is indeed what you want for Redis.

Are Redis on disk snapshots atomic?

Yes, redis background saving process is always fork(2)ed when the server is outside of the execution of a command, so every command reported to be atomic in RAM is also atomic from the point of view of the disk snapshot.

Redis is single threaded, how can I exploit multiple CPU / cores?

Simply start multiple instances of Redis in different ports in the same box and threat them as different servers! Given that Redis is a distributed database anyway in order to scale you need to think in terms of multiple computational units. At some point a single box may not be enough anyway.

In general key-value databases are very scalable because of the property that different keys can stay on different servers independently.

In Redis there are client libraries such Redis-rb (the Ruby client) that are able to handle multiple servers automatically using consistent hashing. We are going to implement consistent hashing in all the other major client libraries. If you use a different language you can implement it yourself otherwise just hash the key before to SET / GET it from a given server. For example imagine to have N Redis servers, server-0, server-1, ..., server-N. You want to store the key "foo", what's the right server where to put "foo" in order to distribute keys evenly among different servers? Just perform the crc = CRC32("foo"), then servernum = crc % N (the rest of the division for N). This will give a number between 0 and N-1 for every key. Connect to this server and store the key. The same for gets.

This is a basic way of performing key partitioning, consistent hashing is much better and this is why after Redis 1.0 will be released we'll try to implement this in every widely used client library starting from Python and PHP (Ruby already implements this support).

I'm using some form of key hashing for partitioning, but what about SORT BY?

With SORT BY you need that all the weight keys are in the same Redis instance of the list/set you are trying to sort. In order to make this possible we developed a concept called key tags. A key tag is a special pattern inside a key that, if preset, is the only part of the key hashed in order to select the server for this key. For example in order to hash the key "foo" I simply perform the CRC32 checksum of the whole string, but if this key has a pattern in the form of the characters {...} I only hash this substring. So for example for the key "foo{bared}" the key hashing code will simply perform the CRC32 of "bared". This way using key tags you can ensure that related keys will be stored on the same Redis instance just using the same key tag for all this keys. Redis-rb already implements key tags.

What is the maximum number of keys a single Redis instance can hold? and what the max number of elements in a List, Set, Ordered Set?

In theory Redis can handle up to 232 keys, and was tested in practice to handle at least 150 million of keys per instance. We are working in order to experiment with larger values.

Every list, set, and ordered set, can hold 2
32 elements.

Actually Redis internals are ready to allow up to 264 elements but the current disk dump format don't support this, and there is a lot time to fix this issues in the future as currently even with 128 GB of RAM it's impossible to reach 232 elements.

What Redis means actually?

Redis means two things: +have 10 times the I/O between memory and disk than otherwise needed.

Is there something I can do to lower the Redis memory usage?

Yes, try to compile it with 32 bit target if you are using a 64 bit box.

If you are using Redis >= 1.3, try using the Hash data type, it can save a lot of memory.

If you are using hashes or any other type with values bigger than 128 bytes try also this to lower the RSS usage (Resident Set Size): EXPORT MMAP_THRESHOLD=4096

I have an empty Redis server but INFO and logs are reporting megabytes of memory in use!

This may happen and it's prefectly ok. Redis objects are small C structures allocated and freed a lot of times. This costs a lot of CPU so instead of being freed, released objects are taken into a free list and reused when needed. This memory is taken exactly by this free objects ready to be reused.

What happens if Redis runs out of memory?

With modern operating systems malloc() returning NULL is not common, usually the server will start swapping and Redis performances will be disastrous so you'll know it's time to use more Redis servers or get more RAM.

The INFO command (work in progress in this days) will report the amount of memory Redis is using so you can write scripts that monitor your Redis servers checking for critical conditions.

You can also use the "maxmemory" option in the config file to put a limit to the memory Redis can use. If this limit is reached Redis will start to reply with an error to write commands (but will continue to accept read-only commands).

Does Redis use more memory running in 64 bit boxes? Can I use 32 bit Redis in 64 bit systems?

Redis uses a lot more memory when compiled for 64 bit target, especially if the dataset is composed of many small keys and values. Such a database will, for instance, consume 50 MB of RAM when compiled for the 32 bit target, and 80 MB for 64 bit! That's a big difference.

You can run 32 bit Redis binaries in a 64 bit Linux and Mac OS X system without problems. For OS X just use make 32bit. For Linux instead, make sure you have libc6-dev-i386 installed, then use make 32bit if you are using the latest Git version. Instead for Redis <= 1.2.2 you have to edit the Makefile and replace "-arch i386" with "-m32".

If your application is already able to perform application-level sharding, it is very advisable to run N instances of Redis 32bit against a big 64 bit Redis box (with more than 4GB of RAM) instead than a single 64 bit instance, as this is much more memory efficient.

How much time it takes to load a big database at server startup?

Just an example on normal hardware: It takes about 45 seconds to restore a 2 GB database on a fairly standard system, no RAID. This can give you some kind of feeling about the order of magnitude of the time needed to load data when you restart the server.

Background saving is failing with a fork() error under Linux even if I've a lot of free RAM!

Short answer: echo 1 > /proc/sys/vm/overcommit_memory :)

And now the long one:

Redis background saving schema relies on the copy-on-write semantic of fork in modern operating systems: Redis forks (creates a child process) that is an exact copy of the parent. The child process dumps the DB on disk and finally exits. In theory the child should use as much memory as the parent being a copy, but actually thanks to the copy-on-write semantic implemented by most modern operating systems the parent and child process will share the common memory pages. A page will be duplicated only when it changes in the child or in the parent. Since in theory all the pages may change while the child process is saving, Linux can't tell in advance how much memory the child will take, so if the overcommit_memory setting is set to zero fork will fail unless there is as much free RAM as required to really duplicate all the parent memory pages, with the result that if you have a Redis dataset of 3 GB and just 2 GB of free memory it will fail.

Setting overcommit_memory to 1 says Linux to relax and perform the fork in a more optimistic allocation fashion, and this is indeed what you want for Redis.

A good source to understand how Linux Virtual Memory work and other alternatives for overcommit_memory and overcommit_ratio is this classic from Red Hat Magaize, "Understanding Virtual Memory": http://www.redhat.com/magazine/001nov04/features/vm/

Are Redis on disk snapshots atomic?

Yes, redis background saving process is always fork(2)ed when the server is outside of the execution of a command, so every command reported to be atomic in RAM is also atomic from the point of view of the disk snapshot.

Redis is single threaded, how can I exploit multiple CPU / cores?

Simply start multiple instances of Redis in different ports in the same box and threat them as different servers! Given that Redis is a distributed database anyway in order to scale you need to think in terms of multiple computational units. At some point a single box may not be enough anyway.

In general key-value databases are very scalable because of the property that different keys can stay on different servers independently.

In Redis there are client libraries such Redis-rb (the Ruby client) that are able to handle multiple servers automatically using consistent hashing. We are going to implement consistent hashing in all the other major client libraries. If you use a different language you can implement it yourself otherwise just hash the key before to SET / GET it from a given server. For example imagine to have N Redis servers, server-0, server-1, ..., server-N. You want to store the key "foo", what's the right server where to put "foo" in order to distribute keys evenly among different servers? Just perform the crc = CRC32("foo"), then servernum = crc % N (the rest of the division for N). This will give a number between 0 and N-1 for every key. Connect to this server and store the key. The same for gets.

This is a basic way of performing key partitioning, consistent hashing is much better and this is why after Redis 1.0 will be released we'll try to implement this in every widely used client library starting from Python and PHP (Ruby already implements this support).

I'm using some form of key hashing for partitioning, but what about SORT BY?

With SORT BY you need that all the weight keys are in the same Redis instance of the list/set you are trying to sort. In order to make this possible we developed a concept called key tags. A key tag is a special pattern inside a key that, if preset, is the only part of the key hashed in order to select the server for this key. For example in order to hash the key "foo" I simply perform the CRC32 checksum of the whole string, but if this key has a pattern in the form of the characters {...} I only hash this substring. So for example for the key "foo{bared}" the key hashing code will simply perform the CRC32 of "bared". This way using key tags you can ensure that related keys will be stored on the same Redis instance just using the same key tag for all this keys. Redis-rb already implements key tags.

What is the maximum number of keys a single Redis instance can hold? and what the max number of elements in a List, Set, Ordered Set?

In theory Redis can handle up to 232 keys, and was tested in practice to handle at least 150 million of keys per instance. We are working in order to experiment with larger values.

Every list, set, and ordered set, can hold 2
32 elements.

Actually Redis internals are ready to allow up to 264 elements but the current disk dump format don't support this, and there is a lot time to fix this issues in the future as currently even with 128 GB of RAM it's impossible to reach 232 elements.

What Redis means actually?

Redis means two things:
  • it's a joke on the word Redistribute (instead to use just a Relational DB redistribute your workload among Redis servers)
  • it means REmote DIctionary Server

Why did you started the Redis project?

In order to scale LLOOGG. But after I got the basic server working I liked the idea to share the work with other guys, and Redis was turned into an open source project. diff --git a/doc/GenericCommandsSidebar.html b/doc/GenericCommandsSidebar.html index d2dd6aa7..0d25cb22 100644 --- a/doc/GenericCommandsSidebar.html +++ b/doc/GenericCommandsSidebar.html @@ -26,7 +26,7 @@ diff --git a/doc/HashCommandsSidebar.html b/doc/HashCommandsSidebar.html index f4808af2..c0b84670 100644 --- a/doc/HashCommandsSidebar.html +++ b/doc/HashCommandsSidebar.html @@ -26,7 +26,7 @@ diff --git a/doc/KeysCommand.html b/doc/KeysCommand.html index f1a6e070..6d79428a 100644 --- a/doc/KeysCommand.html +++ b/doc/KeysCommand.html @@ -32,7 +32,8 @@
the slow commands that may ruin the DB performance if not usedwith care*.
In other words this command is intended only for debugging and *special* operations like creating a script to change the DB schema. Don't use it in your normal code. Use Redis Sets in order to group together a subset of objects.
Glob style patterns examples: -
* h?llo will match hello hallo hhllo* h*llo will match hllo heeeello* h[ae]llo will match hello and hallo, but not hillo
Use \ to escape special chars if you want to match them verbatim.

Return value

Bulk reply, specifically a string in the form of space separated list of keys. Note that most client libraries will return an Array of keys and not a single string with space separated keys (that is, split by " " is performed in the client library usually). +
* h?llo will match hello hallo hhllo* h*llo will match hllo heeeello* h[ae]llo will match hello and hallo, but not hillo
Use \ to escape special chars if you want to match them verbatim.

Return value

+Multi bulk reply diff --git a/doc/LindexCommand.html b/doc/LindexCommand.html index 4af80530..0e36634b 100644 --- a/doc/LindexCommand.html +++ b/doc/LindexCommand.html @@ -28,10 +28,9 @@
#sidebar ListCommandsSidebar

LINDEX _key_ _index_

Time complexity: O(n) (with n being the length of the list)
Return the specified element of the list stored at the specifiedkey. 0 is the first element, 1 the second and so on. Negative indexesare supported, for example -1 is the last element, -2 the penultimateand so on.
-
If the value stored at key is not of list type an error is returned.If the index is out of range an empty string is returned.
+
If the value stored at key is not of list type an error is returned.If the index is out of range a 'nil' reply is returned.
Note that even if the average time complexity is O(n) asking forthe first or the last element of the list is O(1).

Return value

Bulk reply, specifically the requested element. -
diff --git a/doc/MultiExecCommand.html b/doc/MultiExecCommand.html index e0a41983..65ec67a0 100644 --- a/doc/MultiExecCommand.html +++ b/doc/MultiExecCommand.html @@ -16,7 +16,7 @@

MultiExecCommand

@@ -26,15 +26,21 @@
- #sidebar GenericCommandsSidebar

MULTI

+ #sidebar GenericCommandsSidebar

WATCH key1 key2 ... keyN (Redis >

2.1.0)= +

UNWATCH

+

MULTI

COMMAND_1 ...

COMMAND_2 ...

COMMAND_N ...

-

EXEC or DISCARD

MULTI, EXEC and DISCARD commands are the fundation of Redis Transactions.A Redis Transaction allows to execute a group of Redis commands in a singlestep, with two important guarantees:
-
  • All the commands in a transaction are serialized and executed sequentially. It can never happen that a request issued by another client is served in the middle of the execution of a Redis transaction. This guarantees that the commands are executed as a single atomic operation.
  • Either all of the commands or none are processed. The EXEC command triggers the execution of all the commands in the transaction, so if a client loses the connection to the server in the context of a transaction before calling the MULTI command none of the operations are performed, instead if the EXEC command is called, all the operations are performed. An exception to this rule is when the Append Only File is enabled: every command that is part of a Redis transaction will log in the AOF as long as the operation is completed, so if the Redis server crashes or is killed by the system administrator in some hard way it is possible that only a partial number of operations are registered.
-

Usage

A Redis transaction is entered using the MULTI command. The command alwaysreplies with OK. At this point the user can issue multiple commands. Insteadto execute this commands Redis will "queue" them. All the commands areexecuted once EXEC is called.
-
Calling DISCARD instead will flush the transaction queue and will exitthe transaction.
-
The following is an example using the Ruby client:
+

EXEC or DISCARD

MULTI, EXEC, DISCARD and WATCH commands are the fundation of Redis Transactions. +A Redis Transaction allows the execution of a group of Redis commands in a single +step, with two important guarantees:

  • All the commands in a transaction are serialized and executed sequentially. It can never happen that a request issued by another client is served in the middle of the execution of a Redis transaction. This guarantees that the commands are executed as a single atomic operation.
  • Either all of the commands or none are processed. The EXEC command triggers the execution of all the commands in the transaction, so if a client loses the connection to the server in the context of a transaction before calling the MULTI command none of the operations are performed, instead if the EXEC command is called, all the operations are performed. An exception to this rule is when the Append Only File is enabled: every command that is part of a Redis transaction will log in the AOF as long as the operation is completed, so if the Redis server crashes or is killed by the system administrator in some hard way it is possible that only a partial number of operations are registered.
+Since Redis 2.1.0, it's also possible to add a further guarantee to the above two, in the form of optimistic locking of a set of keys in a way very similar to a CAS (check and set) operation. This is documented later in this manual page.

Usage

A Redis transaction is entered using the MULTI command. The command always +replies with OK. At this point the user can issue multiple commands. Instead +to execute this commands Redis will "queue" them. All the commands are +executed once EXEC is called.

Calling DISCARD instead will flush the transaction queue and will exit +the transaction.

The following is an example using the Ruby client: +
 ?> r.multi
 => "OK"
 >> r.incr "foo"
@@ -46,9 +52,14 @@
 >> r.exec
 => [1, 1, 2]
 
-
As it is possible to see from the session above, MULTI returns an "array" ofreplies, where every element is the reply of a single command in thetransaction, in the same order the commands were queued.
-
When a Redis connection is in the context of a MULTI request, all the commandswill reply with a simple string "QUEUED" if they are correct from thepoint of view of the syntax and arity (number of arguments) of the commaand.Some command is still allowed to fail during execution time.
-
This is more clear if at protocol level: in the following example one commandwill fail when executed even if the syntax is right:
+As it is possible to see from the session above, MULTI returns an "array" of
+replies, where every element is the reply of a single command in the
+transaction, in the same order the commands were queued.

When a Redis connection is in the context of a MULTI request, all the commands +will reply with a simple string "QUEUED" if they are correct from the +point of view of the syntax and arity (number of arguments) of the commaand. +Some command is still allowed to fail during execution time.

This is more clear if at protocol level: in the following example one command +will fail when executed even if the syntax is right: +
 Trying 127.0.0.1...
 Connected to localhost.
 Escape character is '^]'.
@@ -64,16 +75,21 @@ EXEC
 +OK
 -ERR Operation against a key holding the wrong kind of value
 
-
MULTI returned a two elements bulk reply in witch one of this is a +OKcode and one is a -ERR reply. It's up to the client lib to find a sensibleway to provide the error to the user.
-
IMPORTANT: even when a command will raise an error, all the other commandsin the queue will be processed. Redis will NOT stop the processing ofcommands once an error is found.
-
Another example, again using the write protocol with telnet, shows howsyntax errors are reported ASAP instead:
+MULTI returned a two elements bulk reply in witch one of this is a +OK
+code and one is a -ERR reply. It's up to the client lib to find a sensible
+way to provide the error to the user.

IMPORTANT: even when a command will raise an error, all the other commandsin the queue will be processed. Redis will NOT stop the processing ofcommands once an error is found.
+Another example, again using the write protocol with telnet, shows how +syntax errors are reported ASAP instead: +
 MULTI
 +OK
 INCR a b c
 -ERR wrong number of arguments for 'incr' command
 
-
This time due to the syntax error the "bad" INCR command is not queuedat all.
-

The DISCARD command

DISCARD can be used in order to abort a transaction. No command will beexecuted, and the state of the client is again the normal one, outsideof a transaction. Example using the Ruby client:
+This time due to the syntax error the "bad" INCR command is not queued
+at all.

The DISCARD command

DISCARD can be used in order to abort a transaction. No command will be +executed, and the state of the client is again the normal one, outside +
of a transaction. Example using the Ruby client:
 ?> r.set("foo",1)
 => true
 >> r.multi
@@ -84,9 +100,64 @@ INCR a b c
 => "OK"
 >> r.get("foo")
 => "1"
-

Return value

Multi bulk reply, specifically:

-The result of a MULTI/EXEC command is a multi bulk reply where every element is the return value of every command in the atomic transaction.
+

Check and Set (CAS) transactions using WATCH

WATCH is used in order to provide a CAS (Check and Set) behavior to +Redis Transactions.

WATCHed keys are monitored in order to detect changes against this keys. +If at least a watched key will be modified before the EXEC call, the +whole transaction will abort, and EXEC will return a nil object +(A Null Multi Bulk reply) to notify that the transaction failed.

For example imagine we have the need to atomically increment the value +of a key by 1 (I know we have INCR, let's suppose we don't have it).

The first try may be the following: +
+val = GET mykey
+val = val + 1
+SET mykey $val
 
+This will work reliably only if we have a single client performing the operation in a given time. +If multiple clients will try to increment the key about at the same time +there will be a race condition. For instance client A and B will read the +old value, for instance, 10. The value will be incremented to 11 by both +the clients, and finally SET as the value of the key. So the final value +will be "11" instead of "12".

Thanks to WATCH we are able to model the problem very well: +
+WATCH mykey
+val = GET mykey
+val = val + 1
+MULTI
+SET mykey $val
+EXEC
+
+Using the above code, if there are race conditions and another client +modified the result of val in the time between our call to WATCH and +our call to EXEC, the transaction will fail.

We'll have just to re-iterate the operation hoping this time we'll not get +a new race. This form of locking is called optimistic locking and is +a very powerful form of locking as in many problems there are multiple +clients accessing a much bigger number of keys, so it's very unlikely that +there are collisions: usually operations don't need to be performed +multiple times.

WATCH explained

So what is WATCH really about? It is a command that will make the EXEC +conditional: we are asking Redis to perform the transaction only if no +other client modified any of the WATCHed keys. Otherwise the transaction is not +entered at all. (Note that if you WATCH a volatile key and Redis expires the key after you WATCHed it, EXEC will still work. More.)

WATCH can be called multiple times. Simply all the WATCH calls will +have the effects to watch for changes starting from the call, up to the +moment EXEC is called.

When EXEC is called, either if it will fail or succeed, all keys are +UNWATCHed. Also when a client connection is closed, everything gets +UNWATCHed.

It is also possible to use the UNWATCH command (without arguments) in order +to flush all the watched keys. Sometimes this is useful as we +optimistically lock a few keys, since possibly we need to perform a transaction +to alter those keys, but after reading the current content of the keys +we don't want to proceed. When this happens we just call UNWATCH so that +the connection can already be used freely for new transactions.

WATCH used to implement ZPOP

A good example to illustrate how WATCH can be used to create new atomic +operations otherwise not supported by Redis is to implement ZPOP, that is +a command that pops the element with the lower score from a sorted set +in an atomic way. This is the simplest implementation: +
+WATCH zset
+ele = ZRANGE zset 0 0
+MULTI
+ZREM zset ele
+EXEC
+
+If EXEC fails (returns a nil value) we just re-iterate the operation.

Return value

Multi bulk reply, specifically:

+The result of a MULTI/EXEC command is a multi bulk reply where every element is the return value of every command in the atomic transaction.
+
If a MULTI/EXEC transaction is aborted because of WATCH detected modified keys, a Null Multi Bulk reply is returned.
diff --git a/doc/README.html b/doc/README.html index f70fe83f..c71d6386 100644 --- a/doc/README.html +++ b/doc/README.html @@ -26,11 +26,42 @@
- = Introduction =

Redis is a database. To be specific, Redis is a database implementing a dictionary, where every key is associated with a value. For example I can set the key "surname_1992" to the string "Smith". -What makes Redis different from many other key-value stores, is that every single value has a type. The following types are supported:

-The type of a value determines what operations (called commands) are available for the value itself. -For example you can append elements to a list stored at the key "mylist" using the LPUSH or RPUSH command in O(1). Later you'll be able to get a range of elements with LRANGE or trim the list with LTRIM. Sets are very flexible too, it is possible to add and remove elements from Sets (unsorted collections of strings), and then ask for server-side intersection, union, difference of Sets. Each command is performed through server-side atomic operations. -Please refer to the Command Reference to see the full list of operations associated to these data types.

In other words, you can look at Redis as a data structures server. A Redis user is virtually provided with an interface to Abstract Data Types, saving her from the responsibility to implement concrete data structures and algorithms. Indeed both algorithms and data structures in Redis are properly choosed in order to obtain the best performance.

All data in memory, but saved on disk

Redis loads and mantains the whole dataset into memory, but the dataset is persistent, since at the same time it is saved on disk, so that when the server is restarted data can be loaded back in memory.

There are two kind of persistence supported: the first one is called snapshotting. In this mode Redis, from time to time, writes a dump on disk asynchronously. The dataset is loaded from the dump every time the server is (re)started.

Redis can be configured to save the dataset when a certain number of changes is reached and after a given number of seconds elapses. For example, you can configure Redis to save after 1000 changes and at most 60 seconds since the last save. You can specify any combination for these numbers.

Because data is written asynchronously, when a system crash occurs, the last few queries can get lost (that is acceptable in many applications but not in all). In order to make this a non issue Redis supports another, safer persistence mode, called Append Only File, where every command received altering the dataset (so not a read-only command, but a write command) is written on an append only file ASAP. This commands are replayed when the server is restarted in order to rebuild the dataset in memory.

Redis Append Only File supports a very handy feature: the server is able to safely rebuild the append only file in background in a non-blocking fashion when it gets too long. You can find more details in the Append Only File HOWTO.

Master-Slave replication made trivial

Whatever will be the persistence mode you'll use Redis supports master-slave replications if you want to stay really safe or if you need to scale to huge amounts of reads.

Redis Replication is trivial to setup. So trivial that all you need to do in order to configure a Redis server to be a slave of another one, with automatic synchronization if the link will go down and so forth, is the following config line: slaveof 192.168.1.100 6379. We provide a Replication Howto if you want to know more about this feature.

It's persistent but supports expires

Redis can be used as a memcached on steroids because is as fast as memcached but with a number of features more. Like memcached, Redis also supports setting timeouts to keys so that this key will be automatically removed when a given amount of time passes.

Beyond key-value databases

All these features allow to use Redis as the sole DB for your scalable application without the need of any relational database. We wrote a simple Twitter clone in PHP + Redis to show a real world example, the link points to an article explaining the design and internals in very simple words.

Multiple databases support

Redis supports multiple databases with commands to atomically move keys from one database to the other. By default DB 0 is selected for every new connection, but using the SELECT command it is possible to select a different database. The MOVE operation can move an item from one DB to another atomically. This can be used as a base for locking free algorithms together with the 'RANDOMKEY' commands.

Know more about Redis!

To really get a feeling about what Redis is and how it works please try reading A fifteen minutes introduction to Redis data types.

To know a bit more about how Redis works internally continue reading.

Redis Tutorial

(note, you can skip this section if you are only interested in "formal" doc.)

Later in this document you can find detailed information about Redis commands, + = Introduction =

Redis is an extremely fast and powerful key-value store database and server implemented in ANSI C. Redis offers many different ways to do one straightforward thing: store a value ("antirez") to a key ("redis"). While the format of keys must always be simple strings, the power is with the values, which support the following data types:

+Each value type has an associated list of commands which can operate on them, and the The Redis Command Reference contains an up to date list of these commands, organized primarily by data type. The Redis source also includes a Redis command line interface which allows you to interact directly with the server, and is the means by which this introduction will provide examples. Once you walk through the Redis Quick Start Guide to get your instance of Redis running, you can follow along.

One of the most powerful aspects of Redis is the wide range of commands which are optimized to work with specific data value types and executed as atomic server-side operations. The List type is a great example - Redis implements O(1) operations such as LPUSH or RPUSH, which have accompanying LPOP and RPOP methods:

+redis> lpush programming_languages C
+OK
+redis> lpush programming_languages Ruby
+OK
+redis> rpush programming_languages Python
+OK
+redis> rpop programming_languages
+Python
+redis> lpop programming_languages
+Ruby
+
More complex operations are available for each data type as well. Continuing with lists, you can get a range of elements with LRANGE (O(start+n)) or trim the list with LTRIM (O(n)):

+redis> lpush cities NYC
+OK
+redis> lpush cities SF
+OK
+redis> lpush cities Tokyo
+OK
+redis> lpush cities London
+OK
+redis> lpush cities Paris
+OK
+redis> lrange cities 0 2
+1. Paris
+2. London
+3. Tokyo
+redis> ltrim cities 0 1
+OK
+redis> lpop cities
+Paris
+redis> lpop cities
+London
+redis> lpop cities
+(nil)
+
You can also add and remove elements from a set, and perform intersections, unions, and differences.

Redis can also be looked at as a data structures server. A Redis user is virtually provided with an interface to Abstract Data Types, saving them from the responsibility of implementing concrete data structures and algorithms -- indeed both algorithms and data structures in Redis are properly chosen in order to obtain the best performance.

All data in memory, but saved on disk

Redis loads and mantains the whole dataset into memory, but the dataset is persistent, since at the same time it is saved on disk, so that when the server is restarted data can be loaded back in memory.

There are two kinds of persistence supported: the first one is called snapshotting. In this mode Redis periodically writes to disk asynchronously. The dataset is loaded from the dump every time the server is (re)started.

Redis can be configured to save the dataset when a certain number of changes is reached and after a given number of seconds elapses. For example, you can configure Redis to save after 1000 changes and at most 60 seconds since the last save. You can specify any combination for these numbers.

Because data is written asynchronously, when a system crash occurs, the last few queries can get lost (that is acceptable in many applications but not in all). In order to make this a non issue Redis supports another, safer persistence mode, called Append Only File, where every command received altering the dataset (so not a read-only command, but a write command) is written on an append only file ASAP. This commands are replayed when the server is restarted in order to rebuild the dataset in memory.

Redis Append Only File supports a very handy feature: the server is able to safely rebuild the append only file in background in a non-blocking fashion when it gets too long. You can find more details in the Append Only File HOWTO.

Master-Slave replication made trivial

Whatever will be the persistence mode you'll use Redis supports master-slave replications if you want to stay really safe or if you need to scale to huge amounts of reads.

Redis Replication is trivial to setup. So trivial that all you need to do in order to configure a Redis server to be a slave of another one, with automatic synchronization if the link will go down and so forth, is the following config line: slaveof 192.168.1.100 6379. We provide a Replication Howto if you want to know more about this feature.

It's persistent but supports expires

Redis can be used as a memcached on steroids because is as fast as memcached but with a number of features more. Like memcached, Redis also supports setting timeouts to keys so that this key will be automatically removed when a given amount of time passes.

Beyond key-value databases

All these features allow to use Redis as the sole DB for your scalable application without the need of any relational database. We wrote a simple Twitter clone in PHP + Redis to show a real world example, the link points to an article explaining the design and internals in very simple words.

Multiple databases support

Redis supports multiple databases with commands to atomically move keys from one database to the other. By default DB 0 is selected for every new connection, but using the SELECT command it is possible to select a different database. The MOVE operation can move an item from one DB to another atomically. This can be used as a base for locking free algorithms together with the 'RANDOMKEY' commands.

Know more about Redis!

To really get a feeling about what Redis is and how it works please try reading A fifteen minutes introduction to Redis data types.

To know a bit more about how Redis works internally continue reading.

Redis Tutorial

(note, you can skip this section if you are only interested in "formal" doc.)

Later in this document you can find detailed information about Redis commands, the protocol specification, and so on. This kind of documentation is useful but... if you are new to Redis it is also BORING! The Redis protocol is designed so that is both pretty efficient to be parsed by computers, but simple enough @@ -40,7 +71,7 @@ feeling about it, and how it works.

To start just compile redis with 'm The server will start and log stuff on the standard output, if you want it to log more edit redis.conf, set the loglevel to debug, and restart it.

You can specify a configuration file as unique parameter:

./redis-server /etc/redis.conf
This is NOT required. The server will start even without a configuration file -using a default built-in configuration.

Now let's try to set a key to a given value:

+using a default built-in configuration.

Now let's try to set a key to a given value:

 $ telnet localhost 6379
 Trying 127.0.0.1...
 Connected to localhost.
@@ -59,17 +90,17 @@ the point of view of both the server and client but allows us to play with
 Redis with the telnet command easily.

The last line of the chat between server and client is "+OK". This means our key was added without problems. Actually SET can never fail but the "+OK" sent lets us know that the server received everything and -the command was actually executed.

Let's try to get the key content now:

+the command was actually executed.

Let's try to get the key content now:

 GET foo
 $3
 bar
 
Ok that's very similar to 'set', just the other way around. We sent "get foo", the server replied with a first line that is just the $ character follwed by the number of bytes the value stored at key contained, followed by the actual -bytes. Again "\r\n" are appended both to the bytes count and the actual data. In Redis slang this is called a bulk reply.

What about requesting a non existing key?

+bytes. Again "\r\n" are appended both to the bytes count and the actual data. In Redis slang this is called a bulk reply.

What about requesting a non existing key?

 GET blabla
 $-1
-
When the key does not exist instead of the length, just the "$-1" string is sent. Since a -1 length of a bulk reply has no meaning it is used in order to specifiy a 'nil' value and distinguish it from a zero length value. Another way to check if a given key exists or not is indeed the EXISTS command:

+
When the key does not exist instead of the length, just the "$-1" string is sent. Since a -1 length of a bulk reply has no meaning it is used in order to specifiy a 'nil' value and distinguish it from a zero length value. Another way to check if a given key exists or not is indeed the EXISTS command:

 EXISTS nokey
 :0
 EXISTS foo
diff --git a/doc/SortedSetCommandsSidebar.html b/doc/SortedSetCommandsSidebar.html
index 2534beb2..c2e8fae0 100644
--- a/doc/SortedSetCommandsSidebar.html
+++ b/doc/SortedSetCommandsSidebar.html
@@ -26,7 +26,7 @@
                 
diff --git a/doc/SupportedLanguages.html b/doc/SupportedLanguages.html index 3b8156a2..d0d06aa2 100644 --- a/doc/SupportedLanguages.html +++ b/doc/SupportedLanguages.html @@ -28,7 +28,7 @@

Supported Languages (DRAFT)

Wondering if you can use Redis from your favorite language? Well here is the definitive guide to the available client libraries.

This libraries are intended to expose Redis commands, but you also have the option to use some higher level libraries that provide a Object Hash Mappings pretty much the same idea implemented by a classic ORM.

TODO

Features Support Matrix



The following matrix should give you a quick overviwe of the state of the different client libraries existing for each supported language.

The core command set is the one of Version 1.0, while Sharding and Pipelining are convenient client side features not tied to any Redis server version.

Version 1.1

Compatible client libraries are expected to implement the command sets specified in Version 1.0 plus:

  • String: MSET, MSETNX.
  • List: RPOPLPUSH.
  • Sorted Set (ZSET): ZADD, ZREM, ZRANGE, ZREVRANGE, ZRANGEBYSCORE, ZCARD, ZSCORE.
-

Version 1.0



Compatible client libraries are expected to implement the following command sets:

  • String: GET, SET, SETNX, DEL, EXISTS, INCR, DECR, MGET, INCRBY, DECRBY, GETSET, TYPE.
  • List: RPUSH, LPUSH, RPOP, LPOP, LLEN, LINDEX, LSET, LRANGE, LTRIM, LREM.
  • Set: SADD, SREM, SMOVE, SISMEMBER, SCARD, SPOP, SINTER, SINTERSTORE, SUNION, SUNIONSTORE, SDIFF, SDIFFSTORE, SMEMBERS.
  • Keyspace: KEYS, RANDOMKEY, RENAME, RENAMENX, DBSIZE, EXPIRE, TTL.
  • Databases: SELECT, MOVE, FLUSHDB, FLUSHALL.
  • Sort: SORT
  • Connection: AUTH, QUIT?. ???
  • Persistence: SAVE, BGSAVE, LASTSAVE, SHUTDOWN?. ???
  • Server: INFO, MONITOR? SLAVEOF? ???
Language Name Sharding Pipelining 1.1 1.0
ActionScript 3 as3redis No Yes Yes Yes
Clojure redis-clojure No No Partial Yes
Common Lisp CL-Redis No No No Yes
Erlang erldis No Looks like No Looks like
Go Go-Redis No Yes Yes Yes
Haskell haskell-redis No No No Yes
Java JDBC-Redis No No No Yes
Java JRedis No Yes Yes Yes
LUA redis-lua No No Yes Yes
Perl Redis Client No No No Yes
Perl AnyEvent::Redis No No No Yes
PHP Redis PHP Bindings No No No Yes
PHP phpredis (C) No No No Yes
PHP Predis Yes Yes Yes Yes
PHP Redisent Yes No No Yes
Python Python Client No No No Yes
Python py-redis No No Partial Yes
Python txredis No No No Yes
Ruby redis-rb Yes Yes Yes Yes
Scala scala-redis Yes No No Yes
TCL TCL No No Yes Yes
+

Version 1.0



Compatible client libraries are expected to implement the following command sets:

  • String: GET, SET, SETNX, DEL, EXISTS, INCR, DECR, MGET, INCRBY, DECRBY, GETSET, TYPE.
  • List: RPUSH, LPUSH, RPOP, LPOP, LLEN, LINDEX, LSET, LRANGE, LTRIM, LREM.
  • Set: SADD, SREM, SMOVE, SISMEMBER, SCARD, SPOP, SINTER, SINTERSTORE, SUNION, SUNIONSTORE, SDIFF, SDIFFSTORE, SMEMBERS.
  • Keyspace: KEYS, RANDOMKEY, RENAME, RENAMENX, DBSIZE, EXPIRE, TTL.
  • Databases: SELECT, MOVE, FLUSHDB, FLUSHALL.
  • Sort: SORT
  • Connection: AUTH, QUIT?. ???
  • Persistence: SAVE, BGSAVE, LASTSAVE, SHUTDOWN?. ???
  • Server: INFO, MONITOR? SLAVEOF? ???
Language Name Sharding Pipelining 1.1 1.0
ActionScript 3 as3redis No Yes Yes Yes
Clojure redis-clojure No No Partial Yes
Common Lisp CL-Redis No No No Yes
Erlang erldis No Looks like No Looks like
Go Go-Redis No Yes Yes Yes
Haskell haskell-redis No No No Yes
Java JDBC-Redis No No No Yes
Java JRedis No Yes Yes Yes
Java Jedis No Yes Yes Yes
LUA redis-lua No No Yes Yes
Perl Redis Client No No No Yes
Perl AnyEvent::Redis No No No Yes
PHP Redis PHP Bindings No No No Yes
PHP phpredis (C) No No No Yes
PHP Predis Yes Yes Yes Yes
PHP Redisent Yes No No Yes
Python Python Client No No No Yes
Python py-redis No No Partial Yes
Python txredis No No No Yes
Ruby redis-rb Yes Yes Yes Yes
Scala scala-redis Yes No No Yes
TCL TCL No No Yes Yes

Client Libraries Reference

as3 (ActionScript 3)

redis-clojure (Clojure)

CL-Redis (Common Lisp)

@@ -36,7 +36,8 @@

Go-Redis (Go)

haskell-redis (Haskell)

Java

JDBC-Redis

  • JDBC-Redis is Java driver using the JDBC interface for Redis Database. This project doesn't aim for a complete implementation of the JDBC specification since Redis isn't a relational database, but should provide a familiar interface to Java developers interact with Redis.
  • Repository: http://code.google.com/p/jdbc-redis/
-

JRedis

+

JRedis

+

Jedis

  • Jedis is a small and sane Redis client for Java. It aims to be easier to use by providing a more natural API. It currently supports the binary-safe protocol and pipelining. Sharding and connection pooling is on the way.
  • Author: Jonathan Leibiusky, @xetorthio.
  • Repository: http://github.com/xetorthio/jedis

redis-lua (Lua)

Perl

Perl Client

AnyEvent::Redis

diff --git a/doc/ZincrbyCommand.html b/doc/ZincrbyCommand.html index 7e6a8458..1f8fe294 100644 --- a/doc/ZincrbyCommand.html +++ b/doc/ZincrbyCommand.html @@ -30,8 +30,8 @@ Time complexity O(log(N)) with N being the number of elements in the sorted set
If member already exists in the sorted set adds the increment to its scoreand updates the position of the element in the sorted set accordingly.If member does not already exist in the sorted set it is added with_increment_ as score (that is, like if the previous score was virtually zero).If key does not exist a new sorted set with the specified_member_ as sole member is crated. If the key exists but does not hold asorted set value an error is returned.
The score value can be the string representation of a double precision floatingpoint number. It's possible to provide a negative value to perform a decrement.
For an introduction to sorted sets check the Introduction to Redis data types page.
-

Return value

Integer reply, specifically:

-The score of the member after the increment is performed.
+

Return value

Bulk reply
+The new score (a double precision floating point number) represented as string.
 
diff --git a/doc/ZrangebyscoreCommand.html b/doc/ZrangebyscoreCommand.html index 583e9303..d9b310d4 100644 --- a/doc/ZrangebyscoreCommand.html +++ b/doc/ZrangebyscoreCommand.html @@ -16,7 +16,7 @@

ZrangebyscoreCommand

@@ -28,9 +28,11 @@
#sidebar SortedSetCommandsSidebar

ZRANGEBYSCORE _key_ _min_ _max_ `[`LIMIT _offset_ _count_`]` (Redis >

1.1) =

ZRANGEBYSCORE _key_ _min_ _max_ `[`LIMIT _offset_ _count_`]` `[`WITHSCORES`]` (Redis >

1.3.4) = +

ZCOUNT _key_ _min_ _max_

Time complexity: O(log(N))+O(M) with N being the number of elements in the sorted set and M the number of elements returned by the command, so if M is constant (for instance you always ask for the first ten elements with LIMIT) you can consider it O(log(N))
Return the all the elements in the sorted set at key with a score between_min_ and max (including elements with score equal to min or max).
The elements having the same score are returned sorted lexicographically asASCII strings (this follows from a property of Redis sorted sets and does notinvolve further computation).
-
Using the optional LIMIT it's possible to get only a range of the matchingelements in an SQL-alike way. Note that if offset is large the commandsneeds to traverse the list for offset elements and this adds up to theO(M) figure.

Exclusive intervals and infinity

+
Using the optional LIMIT it's possible to get only a range of the matchingelements in an SQL-alike way. Note that if offset is large the commandsneeds to traverse the list for offset elements and this adds up to theO(M) figure.
+
The ZCOUNT command is similar to ZRANGEBYSCORE but instead of returningthe actual elements in the specified interval, it just returns the numberof matching elements.

Exclusive intervals and infinity

min and max can be -inf and +inf, so that you are not required to know what's the greatest or smallest element in order to take, for instance, elements "up to a given value".

Also while the interval is for default closed (inclusive) it's possible to specify open intervals prefixing the score with a "(" character, so for instance:
 ZRANGEBYSCORE zset (1.3 5
@@ -40,7 +42,7 @@ Will return all the values with score > 1.3 and <= 5, while for ins
 ZRANGEBYSCORE zset (5 (10
 
Will return all the values with score > 5 and < 10 (5 and 10 excluded). -

Return value

Multi bulk reply, specifically a list of elements in the specified score range. +

Return value

ZRANGEBYSCORE returns a Multi bulk reply specifically a list of elements in the specified score range.

ZCOUNT returns a Integer reply specifically the number of elements matching the specified score range.

Examples

 redis> zadd zset 1 foo
@@ -56,6 +58,8 @@ redis> zrangebyscore zset -inf +inf
 2. "bar"
 3. "biz"
 4. "foz"
+redis> zcount zset 1 2
+(integer) 2
 redis> zrangebyscore zset 1 2
 1. "foo"
 2. "bar"
diff --git a/doc/ZunionCommand.html b/doc/ZunionCommand.html
index edb52a9c..cb5b844d 100644
--- a/doc/ZunionCommand.html
+++ b/doc/ZunionCommand.html
@@ -16,7 +16,7 @@
             

ZunionCommand

@@ -27,8 +27,9 @@
-

ZUNION / ZINTER _dstkey_ _N_ _k1_ ... _kN_ `[`WEIGHTS _w1_ ... _wN_`]` `[`AGGREGATE SUM|MIN|MAX`]` (Redis >

1.3.5) =

Time complexity: O(N) + O(M log(M)) with N being the sum of the sizes of the input sorted sets, and M being the number of elements in the resulting sorted set
Creates a union or intersection of N sorted sets given by keys k1 through kN, and stores it at dstkey. It is mandatory to provide the number of input keys N, before passing the input keys and the other (optional) arguments.
-
As the terms imply, the ZINTER command requires an element to be present in each of the given inputs to be inserted in the result. The ZUNION command inserts all elements across all inputs.
+

ZUNIONSTORE _dstkey_ _N_ _k1_ ... _kN_ `[`WEIGHTS _w1_ ... _wN_`]` `[`AGGREGATE SUM|MIN|MAX`]` (Redis >

1.3.12) = +

ZINTERSTORE _dstkey_ _N_ _k1_ ... _kN_ `[`WEIGHTS _w1_ ... _wN_`]` `[`AGGREGATE SUM|MIN|MAX`]` (Redis >

1.3.12) =

Time complexity: O(N) + O(M log(M)) with N being the sum of the sizes of the input sorted sets, and M being the number of elements in the resulting sorted set
Creates a union or intersection of N sorted sets given by keys k1 through kN, and stores it at dstkey. It is mandatory to provide the number of input keys N, before passing the input keys and the other (optional) arguments.
+
As the terms imply, the ZINTERSTORE command requires an element to be present in each of the given inputs to be inserted in the result. The ZUNIONSTORE command inserts all elements across all inputs.
Using the WEIGHTS option, it is possible to add weight to each input sorted set. This means that the score of each element in the sorted set is first multiplied by this weight before being passed to the aggregation. When this option is not given, all weights default to 1.
With the AGGREGATE option, it's possible to specify how the results of the union or intersection are aggregated. This option defaults to SUM, where the score of an element is summed across the inputs where it exists. When this option is set to be either MIN or MAX, the resulting set will contain the minimum or maximum score of an element across the inputs where it exists.

Return value

Integer reply, specifically the number of elements in the sorted set at dstkey. diff --git a/doc/ZunionstoreCommand.html b/doc/ZunionstoreCommand.html index a9f74326..862c38bb 100644 --- a/doc/ZunionstoreCommand.html +++ b/doc/ZunionstoreCommand.html @@ -16,7 +16,7 @@

ZunionstoreCommand

@@ -27,8 +27,10 @@
-

ZUNION / ZINTER _dstkey_ _N_ _k1_ ... _kN_ `[`WEIGHTS _w1_ ... _wN_`]` `[`AGGREGATE SUM|MIN|MAX`]` (Redis >

1.3.5) =

Time complexity: O(N) + O(M log(M)) with N being the sum of the sizes of the input sorted sets, and M being the number of elements in the resulting sorted set
Creates a union or intersection of N sorted sets given by keys k1 through kN, and stores it at dstkey. It is mandatory to provide the number of input keys N, before passing the input keys and the other (optional) arguments.
-
As the terms imply, the ZINTER command requires an element to be present in each of the given inputs to be inserted in the result. The ZUNION command inserts all elements across all inputs.
+

ZUNIONSTORE _dstkey_ _N_ _k1_ ... _kN_ `[`WEIGHTS _w1_ ... _wN_`]` `[`AGGREGATE SUM|MIN|MAX`]` (Redis >

1.3.12) = +

ZINTERSTORE _dstkey_ _N_ _k1_ ... _kN_ `[`WEIGHTS _w1_ ... _wN_`]` `[`AGGREGATE SUM|MIN|MAX`]` (Redis >

1.3.12) = +Time complexity: O(N) + O(M log(M)) with N being the sum of the sizes of the input sorted sets, and M being the number of elements in the resulting sorted set
Creates a union or intersection of N sorted sets given by keys k1 through kN, and stores it at dstkey. It is mandatory to provide the number of input keys N, before passing the input keys and the other (optional) arguments.
+
As the terms imply, the ZINTERSTORE command requires an element to be present in each of the given inputs to be inserted in the result. The ZUNIONSTORE command inserts all elements across all inputs.
Using the WEIGHTS option, it is possible to add weight to each input sorted set. This means that the score of each element in the sorted set is first multiplied by this weight before being passed to the aggregation. When this option is not given, all weights default to 1.
With the AGGREGATE option, it's possible to specify how the results of the union or intersection are aggregated. This option defaults to SUM, where the score of an element is summed across the inputs where it exists. When this option is set to be either MIN or MAX, the resulting set will contain the minimum or maximum score of an element across the inputs where it exists.

Return value

Integer reply, specifically the number of elements in the sorted set at dstkey. diff --git a/doc/index.html b/doc/index.html index 2cf5d9a8..1c72b230 100644 --- a/doc/index.html +++ b/doc/index.html @@ -26,12 +26,12 @@
- = Redis Documentation =

Russian TranslationHello! The followings are pointers to different parts of the Redis Documentation.

-

HOWTOs about selected features

  • The Redis Replication HOWTO is what you need to read in order to understand how Redis master <-> slave replication works.
  • The Append Only File HOWTO explains how the alternative Redis durability mode works. AOF is an alternative to snapshotting on disk from time to time (the default).
  • Virutal Memory User Guide. A simple to understand guide about using and configuring the Redis Virtual Memory.
+ = Redis Documentation =

Russian TranslationHello! The followings are pointers to different parts of the Redis Documentation.

+

HOWTOs about selected features

  • The Redis Replication HOWTO is what you need to read in order to understand how Redis master <-> slave replication works.
  • The Append Only File HOWTO explains how the alternative Redis durability mode works. AOF is an alternative to snapshotting on disk from time to time (the default).
  • Virtual Memory User Guide. A simple to understand guide about using and configuring the Redis Virtual Memory.

Hacking

  • The Protocol Specification is all you need in order to implement a Redis client library for a missing language. PHP, Python, Ruby and Erlang are already supported.
  • Look at Redis Internals if you are interested in the implementation details of the Redis server.
-

Videos

+

Videos

diff --git a/src/solarisfixes.h b/src/solarisfixes.h index ce8e7b6f..3cb091d4 100644 --- a/src/solarisfixes.h +++ b/src/solarisfixes.h @@ -1,6 +1,7 @@ /* Solaris specific fixes */ #if defined(__GNUC__) +#include #undef isnan #define isnan(x) \ __extension__({ __typeof (x) __x_a = (x); \ From 169d2ef1e0259945e667a33db7944947a6b047a0 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Mon, 6 Sep 2010 23:12:41 +0200 Subject: [PATCH 104/139] Fix updating the prevlen field of consecutive entries In the condition where the prevlen field of the next entry on insert and delete operations needs more bytes to be properly encoded, the next entry also needs to be updated with a new prevlen. This patch makes sure that this effect cascades throughout the ziplist. --- src/ziplist.c | 221 +++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 208 insertions(+), 13 deletions(-) diff --git a/src/ziplist.c b/src/ziplist.c index a6383517..f1069e41 100644 --- a/src/ziplist.c +++ b/src/ziplist.c @@ -226,6 +226,14 @@ static unsigned int zipPrevEncodeLength(unsigned char *p, unsigned int len) { } } +/* Encode the length of the previous entry and write it to "p". This only + * uses the larger encoding (required in __ziplistCascadeUpdate). */ +static void zipPrevEncodeLengthForceLarge(unsigned char *p, unsigned int len) { + if (p == NULL) return; + p[0] = ZIP_BIGLEN; + memcpy(p+1,&len,sizeof(len)); +} + /* Return the difference in number of bytes needed to store the new length * "len" on the entry pointed to by "p". */ static int zipPrevLenByteDiff(unsigned char *p, unsigned int len) { @@ -344,11 +352,86 @@ static unsigned char *ziplistResize(unsigned char *zl, unsigned int len) { return zl; } +/* When an entry is inserted, we need to set the prevlen field of the next + * entry to equal the length of the inserted entry. It can occur that this + * length cannot be encoded in 1 byte and the next entry needs to be grow + * a bit larger to hold the 5-byte encoded prevlen. This can be done for free, + * because this only happens when an entry is already being inserted (which + * causes a realloc and memmove). However, encoding the prevlen may require + * that this entry is grown as well. This effect may cascade throughout + * the ziplist when there are consecutive entries with a size close to + * ZIP_BIGLEN, so we need to check that the prevlen can be encoded in every + * consecutive entry. + * + * Note that this effect can also happen in reverse, where the bytes required + * to encode the prevlen field can shrink. This effect is deliberately ignored, + * because it can cause a "flapping" effect where a chain prevlen fields is + * first grown and then shrunk again after consecutive inserts. Rather, the + * field is allowed to stay larger than necessary, because a large prevlen + * field implies the ziplist is holding large entries anyway. + * + * The pointer "p" points to the first entry that does NOT need to be + * updated, i.e. consecutive fields MAY need an update. */ +static unsigned char *__ziplistCascadeUpdate(unsigned char *zl, unsigned char *p) { + unsigned int curlen = ZIPLIST_BYTES(zl), rawlen, rawlensize; + unsigned int offset, noffset, extra; + unsigned char *np; + zlentry cur, next; + + while (p[0] != ZIP_END) { + cur = zipEntry(p); + rawlen = cur.headersize + cur.len; + rawlensize = zipPrevEncodeLength(NULL,rawlen); + + /* Abort if there is no next entry. */ + if (p[rawlen] == ZIP_END) break; + next = zipEntry(p+rawlen); + + /* Abort when "prevlen" has not changed. */ + if (next.prevrawlen == rawlen) break; + + if (next.prevrawlensize < rawlensize) { + /* The "prevlen" field of "next" needs more bytes to hold + * the raw length of "cur". */ + offset = p-zl; + extra = rawlensize-next.prevrawlensize; + zl = ziplistResize(zl,curlen+extra); + ZIPLIST_TAIL_OFFSET(zl) += extra; + p = zl+offset; + + /* Move the tail to the back. */ + np = p+rawlen; + noffset = np-zl; + memmove(np+rawlensize, + np+next.prevrawlensize, + curlen-noffset-next.prevrawlensize-1); + zipPrevEncodeLength(np,rawlen); + + /* Advance the cursor */ + p += rawlen; + } else { + if (next.prevrawlensize > rawlensize) { + /* This would result in shrinking, which we want to avoid. + * So, set "rawlen" in the available bytes. */ + zipPrevEncodeLengthForceLarge(p+rawlen,rawlen); + } else { + zipPrevEncodeLength(p+rawlen,rawlen); + } + + /* Stop here, as the raw length of "next" has not changed. */ + break; + } + } + return zl; +} + /* Delete "num" entries, starting at "p". Returns pointer to the ziplist. */ static unsigned char *__ziplistDelete(unsigned char *zl, unsigned char *p, unsigned int num) { unsigned int i, totlen, deleted = 0; - int nextdiff = 0; - zlentry first = zipEntry(p); + int offset, nextdiff = 0; + zlentry first, tail; + + first = zipEntry(p); for (i = 0; p[0] != ZIP_END && i < num; i++) { p += zipRawEntryLength(p); deleted++; @@ -365,7 +448,14 @@ static unsigned char *__ziplistDelete(unsigned char *zl, unsigned char *p, unsig zipPrevEncodeLength(p-nextdiff,first.prevrawlen); /* Update offset for tail */ - ZIPLIST_TAIL_OFFSET(zl) -= totlen+nextdiff; + ZIPLIST_TAIL_OFFSET(zl) -= totlen; + + /* When the tail contains more than one entry, we need to take + * "nextdiff" in account as well. Otherwise, a change in the + * size of prevlen doesn't have an effect on the *tail* offset. */ + tail = zipEntry(p); + if (p[tail.headersize+tail.len] != ZIP_END) + ZIPLIST_TAIL_OFFSET(zl) += nextdiff; /* Move tail to the front of the ziplist */ memmove(first.p,p-nextdiff,ZIPLIST_BYTES(zl)-(p-zl)-1+nextdiff); @@ -375,8 +465,15 @@ static unsigned char *__ziplistDelete(unsigned char *zl, unsigned char *p, unsig } /* Resize and update length */ + offset = first.p-zl; zl = ziplistResize(zl, ZIPLIST_BYTES(zl)-totlen+nextdiff); ZIPLIST_INCR_LENGTH(zl,-deleted); + p = zl+offset; + + /* When nextdiff != 0, the raw length of the next entry has changed, so + * we need to cascade the update throughout the ziplist */ + if (nextdiff != 0) + zl = __ziplistCascadeUpdate(zl,p); } return zl; } @@ -385,19 +482,18 @@ static unsigned char *__ziplistDelete(unsigned char *zl, unsigned char *p, unsig static unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen) { unsigned int curlen = ZIPLIST_BYTES(zl), reqlen, prevlen = 0; unsigned int offset, nextdiff = 0; - unsigned char *tail; unsigned char encoding = 0; long long value; - zlentry entry; + zlentry entry, tail; /* Find out prevlen for the entry that is inserted. */ if (p[0] != ZIP_END) { entry = zipEntry(p); prevlen = entry.prevrawlen; } else { - tail = ZIPLIST_ENTRY_TAIL(zl); - if (tail[0] != ZIP_END) { - prevlen = zipRawEntryLength(tail); + unsigned char *ptail = ZIPLIST_ENTRY_TAIL(zl); + if (ptail[0] != ZIP_END) { + prevlen = zipRawEntryLength(ptail); } } @@ -429,15 +525,32 @@ static unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsig if (p[0] != ZIP_END) { /* Subtract one because of the ZIP_END bytes */ memmove(p+reqlen,p-nextdiff,curlen-offset-1+nextdiff); + /* Encode this entry's raw length in the next entry. */ zipPrevEncodeLength(p+reqlen,reqlen); + /* Update offset for tail */ - ZIPLIST_TAIL_OFFSET(zl) += reqlen+nextdiff; + ZIPLIST_TAIL_OFFSET(zl) += reqlen; + + /* When the tail contains more than one entry, we need to take + * "nextdiff" in account as well. Otherwise, a change in the + * size of prevlen doesn't have an effect on the *tail* offset. */ + tail = zipEntry(p+reqlen); + if (p[reqlen+tail.headersize+tail.len] != ZIP_END) + ZIPLIST_TAIL_OFFSET(zl) += nextdiff; } else { /* This element will be the new tail. */ ZIPLIST_TAIL_OFFSET(zl) = p-zl; } + /* When nextdiff != 0, the raw length of the next entry has changed, so + * we need to cascade the update throughout the ziplist */ + if (nextdiff != 0) { + offset = p-zl; + zl = __ziplistCascadeUpdate(zl,p+reqlen); + p = zl+offset; + } + /* Write the entry */ p += zipPrevEncodeLength(p,prevlen); p += zipEncodeLength(p,encoding,slen); @@ -510,6 +623,7 @@ unsigned char *ziplistPrev(unsigned char *zl, unsigned char *p) { return NULL; } else { entry = zipEntry(p); + assert(entry.prevrawlen > 0); return p-entry.prevrawlen; } } @@ -615,21 +729,52 @@ unsigned int ziplistSize(unsigned char *zl) { void ziplistRepr(unsigned char *zl) { unsigned char *p; + int index = 0; zlentry entry; - printf("{total bytes %d} {length %u}\n",ZIPLIST_BYTES(zl), ZIPLIST_LENGTH(zl)); + printf( + "{total bytes %d} " + "{length %u}\n" + "{tail offset %u}\n", + ZIPLIST_BYTES(zl), + ZIPLIST_LENGTH(zl), + ZIPLIST_TAIL_OFFSET(zl)); p = ZIPLIST_ENTRY_HEAD(zl); while(*p != ZIP_END) { entry = zipEntry(p); - printf("{offset %ld, header %u, payload %u} ",p-zl,entry.headersize,entry.len); + printf( + "{" + "addr 0x%08lx, " + "index %2d, " + "offset %5ld, " + "rl: %5u, " + "hs %2u, " + "pl: %5u, " + "pls: %2u, " + "payload %5u" + "} ", + (long unsigned int)p, + index, + p-zl, + entry.headersize+entry.len, + entry.headersize, + entry.prevrawlen, + entry.prevrawlensize, + entry.len); p += entry.headersize; if (ZIP_IS_STR(entry.encoding)) { - fwrite(p,entry.len,1,stdout); + if (entry.len > 40) { + fwrite(p,40,1,stdout); + printf("..."); + } else { + fwrite(p,entry.len,1,stdout); + } } else { printf("%lld", (long long) zipLoadInteger(p,entry.encoding)); } printf("\n"); p += entry.len; + index++; } printf("{end}\n\n"); } @@ -1019,7 +1164,57 @@ int main(int argc, char **argv) { printf("ERROR: \"1025\"\n"); return 1; } - printf("SUCCESS\n"); + printf("SUCCESS\n\n"); + } + + printf("Stress with random payloads of different encoding:\n"); + { + int i, idx, where, len; + long long v; + unsigned char *p; + char buf[0x4041]; /* max length of generated string */ + zl = ziplistNew(); + for (i = 0; i < 100000; i++) { + where = (rand() & 1) ? ZIPLIST_HEAD : ZIPLIST_TAIL; + if (rand() & 1) { + /* equally likely create a 16, 32 or 64 bit int */ + v = (rand() & INT16_MAX) + ((1ll << 32) >> ((rand() % 3)*16)); + v *= 2*(rand() & 1)-1; /* randomly flip sign */ + sprintf(buf, "%lld", v); + zl = ziplistPush(zl, (unsigned char*)buf, strlen(buf), where); + } else { + /* equally likely generate 6, 14 or >14 bit length */ + v = rand() & 0x3f; + v += 0x4000 >> ((rand() % 3)*8); + memset(buf, 'x', v); + zl = ziplistPush(zl, (unsigned char*)buf, v, where); + } + + /* delete a random element */ + if ((len = ziplistLen(zl)) >= 10) { + idx = rand() % len; + // printf("Delete index %d\n", idx); + // ziplistRepr(zl); + ziplistDeleteRange(zl, idx, 1); + // ziplistRepr(zl); + len--; + } + + /* iterate from front to back */ + idx = 0; + p = ziplistIndex(zl, 0); + while((p = ziplistNext(zl,p))) + idx++; + assert(len == idx+1); + + /* iterate from back to front */ + idx = 0; + p = ziplistIndex(zl, -1); + while((p = ziplistPrev(zl,p))) + idx++; + assert(len == idx+1); + } + printf("SUCCESS\n\n"); } printf("Stress with variable ziplist size:\n"); From 84403fe7c1ab582c1fff4ddb5d933ba1a5f61759 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Tue, 7 Sep 2010 00:08:42 +0200 Subject: [PATCH 105/139] Allow a random seed argument for the ziplist test binary --- src/ziplist.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/ziplist.c b/src/ziplist.c index f1069e41..5254423d 100644 --- a/src/ziplist.c +++ b/src/ziplist.c @@ -870,6 +870,10 @@ int main(int argc, char **argv) { unsigned int elen; long long value; + /* If an argument is given, use it as the random seed. */ + if (argc == 2) + srand(atoi(argv[1])); + zl = createIntList(); ziplistRepr(zl); From 106bd87a3ca7b90e1e6fb764398bbefccee3e4fa Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Mon, 6 Sep 2010 11:27:22 +0200 Subject: [PATCH 106/139] Fix bug where the client is not present in server.clients when free'ing it When creating the readable event results in an error (this happens when the server hits OS limits), the client was not added to the list of clients when freeClient was called. This results in an assertion error. It is better to check this condition first and free the client immediately when this condition occurs. Port of 00a90feb. --- src/networking.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/networking.c b/src/networking.c index a39be7c4..104444f0 100644 --- a/src/networking.c +++ b/src/networking.c @@ -17,6 +17,14 @@ redisClient *createClient(int fd) { anetNonBlock(NULL,fd); anetTcpNoDelay(NULL,fd); if (!c) return NULL; + if (aeCreateFileEvent(server.el,fd,AE_READABLE, + readQueryFromClient, c) == AE_ERR) + { + close(fd); + zfree(c); + return NULL; + } + selectDb(c,0); c->fd = fd; c->querybuf = sdsempty(); @@ -43,11 +51,6 @@ redisClient *createClient(int fd) { c->pubsub_patterns = listCreate(); listSetFreeMethod(c->pubsub_patterns,decrRefCount); listSetMatchMethod(c->pubsub_patterns,listMatchObjects); - if (aeCreateFileEvent(server.el, c->fd, AE_READABLE, - readQueryFromClient, c) == AE_ERR) { - freeClient(c); - return NULL; - } listAddNodeTail(server.clients,c); initClientMultiState(c); return c; From efc5d4cc0dd285b45061d61d7a717777aa8bc7a0 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Tue, 7 Sep 2010 11:49:33 +0200 Subject: [PATCH 107/139] Fix test that sometimes returned the swapped object instead of encoding --- tests/unit/type/hash.tcl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/type/hash.tcl b/tests/unit/type/hash.tcl index ef49a27d..74f6d058 100644 --- a/tests/unit/type/hash.tcl +++ b/tests/unit/type/hash.tcl @@ -15,8 +15,8 @@ start_server {tags {"hash"}} { } {8} test {Is the small hash encoded with a zipmap?} { - r debug object smallhash - } {*zipmap*} + assert_encoding zipmap smallhash + } test {HSET/HLEN - Big hash creation} { array set bighash {} From 7f00cd226438d3cd91238974346cee624d8920e1 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 8 Sep 2010 13:26:16 +0200 Subject: [PATCH 108/139] Fixed a race condition in VM happening when a key was deleted while there was a client waiting for this key to be resumed from swap to memory. The client would hang forever. --- src/db.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/db.c b/src/db.c index f380bf6e..ca520c82 100644 --- a/src/db.c +++ b/src/db.c @@ -123,6 +123,11 @@ robj *dbRandomKey(redisDb *db) { /* Delete a key, value, and associated expiration entry if any, from the DB */ int dbDelete(redisDb *db, robj *key) { + /* If VM is enabled make sure to awake waiting clients for this key: + * deleting the key will kill the I/O thread bringing the key from swap + * to memory, so the client will never be notified and unblocked if we + * don't do it now. */ + handleClientsBlockedOnSwappedKey(db,key); /* Deleting an entry from the expires dict will not free the sds of * the key, because it is shared with the main dictionary. */ if (dictSize(db->expires) > 0) dictDelete(db->expires,key->ptr); From 155fb4b45e9da66802aed6226189941459fa014f Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 8 Sep 2010 13:45:51 +0200 Subject: [PATCH 109/139] latest fix reverted, there is some problem reported by the CI test --- src/db.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/db.c b/src/db.c index ca520c82..c77c7614 100644 --- a/src/db.c +++ b/src/db.c @@ -127,7 +127,7 @@ int dbDelete(redisDb *db, robj *key) { * deleting the key will kill the I/O thread bringing the key from swap * to memory, so the client will never be notified and unblocked if we * don't do it now. */ - handleClientsBlockedOnSwappedKey(db,key); + /* handleClientsBlockedOnSwappedKey(db,key); */ /* Deleting an entry from the expires dict will not free the sds of * the key, because it is shared with the main dictionary. */ if (dictSize(db->expires) > 0) dictDelete(db->expires,key->ptr); From da14590bd9ecef2efca89a82677d390cc36afafd Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 8 Sep 2010 13:47:28 +0200 Subject: [PATCH 110/139] Fix re-enabled again, I forgot to check if VM was enabled before calling handleClientsBlockedOnSwappedKey() --- src/db.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/db.c b/src/db.c index c77c7614..afca3cdd 100644 --- a/src/db.c +++ b/src/db.c @@ -127,7 +127,7 @@ int dbDelete(redisDb *db, robj *key) { * deleting the key will kill the I/O thread bringing the key from swap * to memory, so the client will never be notified and unblocked if we * don't do it now. */ - /* handleClientsBlockedOnSwappedKey(db,key); */ + if (server.vm_enabled) handleClientsBlockedOnSwappedKey(db,key); /* Deleting an entry from the expires dict will not free the sds of * the key, because it is shared with the main dictionary. */ if (dictSize(db->expires) > 0) dictDelete(db->expires,key->ptr); From 7f9a4db3c013b744e11dc7f58026009392b051de Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 9 Sep 2010 10:24:56 +0200 Subject: [PATCH 111/139] Fix for the init script provided with Redis, thanks to Rowan. This fixes issue 316 --- utils/redis_init_script | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/utils/redis_init_script b/utils/redis_init_script index 35b906fc..b1c56002 100755 --- a/utils/redis_init_script +++ b/utils/redis_init_script @@ -21,15 +21,14 @@ case "$1" in then echo -n "$PIDFILE does not exist, process is not running\n" else + PID=$(cat $PIDFILE) echo -n "Stopping ...\n" - echo -n "Sending SHUTDOWN\r\n" | nc localhost $REDISPORT & - PID=$(cat $PIDFILE) + echo -n "SHUTDOWN\r\n" | nc localhost $REDISPORT & while [ -x /proc/${PIDFILE} ] do echo "Waiting for Redis to shutdown ..." sleep 1 done - rm $PIDFILE echo "Redis stopped" fi ;; From bc63407be6c65d779ba218ad79a4889f86e37fd5 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 9 Sep 2010 16:38:10 +0200 Subject: [PATCH 112/139] redis-cli does no longer try to auto detect if it is used inside a pipe. To read last argument from stdandard input there is to use the -x option. This will make it playing better inside cron scripts and in general when stdin is hacked. --- src/redis-cli.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 761c025e..0e6edbe7 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -62,6 +62,7 @@ static struct config { int pubsub_mode; int raw_output; /* output mode per command */ int tty; /* flag for default output format */ + int stdinarg; /* get last arg from stdin. (-x option) */ char mb_sep; char *auth; char *historyfile; @@ -337,6 +338,8 @@ static int parseOptions(int argc, char **argv) { i++; } else if (!strcmp(argv[i],"-h") && lastarg) { usage(); + } else if (!strcmp(argv[i],"-x")) { + config.stdinarg = 1; } else if (!strcmp(argv[i],"-p") && !lastarg) { config.hostport = atoi(argv[i+1]); i++; @@ -389,9 +392,8 @@ static sds readArgFromStdin(void) { static void usage() { fprintf(stderr, "usage: redis-cli [-iv] [-h host] [-p port] [-a authpw] [-r repeat_times] [-n db_num] cmd arg1 arg2 arg3 ... argN\n"); - fprintf(stderr, "usage: echo \"argN\" | redis-cli [-h host] [-p port] [-a authpw] [-r repeat_times] [-n db_num] cmd arg1 arg2 ... arg(N-1)\n"); - fprintf(stderr, "\nIf a pipe from standard input is detected this data is used as last argument.\n\n"); - fprintf(stderr, "example: cat /etc/passwd | redis-cli set my_passwd\n"); + fprintf(stderr, "usage: echo \"argN\" | redis-cli -x [options] cmd arg1 arg2 ... arg(N-1)\n\n"); + fprintf(stderr, "example: cat /etc/passwd | redis-cli -x set my_passwd\n"); fprintf(stderr, "example: redis-cli get my_passwd\n"); fprintf(stderr, "example: redis-cli -r 100 lpush mylist x\n"); fprintf(stderr, "\nRun in interactive mode: redis-cli -i or just don't pass any command\n"); @@ -456,9 +458,7 @@ static void repl() { static int noninteractive(int argc, char **argv) { int retval = 0; - struct stat s; - fstat(fileno(stdin), &s); - if (S_ISFIFO(s.st_mode) || S_ISREG(s.st_mode)) { /* pipe, regular file */ + if (config.stdinarg) { argv = zrealloc(argv, (argc+1)*sizeof(char*)); argv[argc] = readArgFromStdin(); retval = cliSendCommand(argc+1, argv, config.repeat); @@ -481,6 +481,7 @@ int main(int argc, char **argv) { config.monitor_mode = 0; config.pubsub_mode = 0; config.raw_output = 0; + config.stdinarg = 0; config.auth = NULL; config.historyfile = NULL; config.tty = isatty(fileno(stdout)) || (getenv("FAKETTY") != NULL); From 3c23ee1ba265b6e420579dcbf94bd25ece23e9fe Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Mon, 13 Sep 2010 16:59:46 +0200 Subject: [PATCH 113/139] Fix another test that sometimes returned the swapped object instead of encoding --- tests/unit/type/hash.tcl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/type/hash.tcl b/tests/unit/type/hash.tcl index 74f6d058..2c0bd534 100644 --- a/tests/unit/type/hash.tcl +++ b/tests/unit/type/hash.tcl @@ -34,8 +34,8 @@ start_server {tags {"hash"}} { } {1024} test {Is the big hash encoded with a zipmap?} { - r debug object bighash - } {*hashtable*} + assert_encoding hashtable bighash + } test {HGET against the small hash} { set err {} From e13865033d06ea8d499cc01ab457abedd3ba6d05 Mon Sep 17 00:00:00 2001 From: Pedro Melo Date: Mon, 13 Sep 2010 16:09:11 +0100 Subject: [PATCH 114/139] Rename INSTALL_TOP to PREFIX; update documentation Signed-off-by: Pedro Melo --- INSTALL | 13 ++++++++++++- README | 3 ++- src/Makefile | 2 +- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/INSTALL b/INSTALL index 7c6635aa..9bce0b2e 100644 --- a/INSTALL +++ b/INSTALL @@ -3,7 +3,18 @@ To compile Redis, do the following: cd src; make The compilation will produce a redis-server binary. -Copy this file where you want. + +To install Redis, use + + make install + +and all the binaries will be installed on /usr/local/bin. + +Alternatively: + + make PREFIX=/some/other/directory + +to have the binaries in /some/other/directory/bin. Run the server using the following command line: diff --git a/README b/README index 5eeabf74..1f0a1fe6 100644 --- a/README +++ b/README @@ -16,7 +16,8 @@ It is as simple as: Redis is just a single binary, but if you want to install it you can use the "make install" target that will copy the binary in /usr/local/bin -for default. +for default. You can also use "make PREFIX=/some/other/directory install" +if you wish to use a different destination. You can run a 32 bit Redis binary using: diff --git a/src/Makefile b/src/Makefile index 38007e8d..2dc3065f 100644 --- a/src/Makefile +++ b/src/Makefile @@ -15,7 +15,7 @@ endif CCOPT= $(CFLAGS) $(CCLINK) $(ARCH) $(PROF) DEBUG?= -g -rdynamic -ggdb -INSTALL_TOP= /usr/local +PREFIX= /usr/local INSTALL_BIN= $(INSTALL_TOP)/bin INSTALL= cp -p From e984050fb97939964daf6fb5e28cd7fc081c2c70 Mon Sep 17 00:00:00 2001 From: Pedro Melo Date: Mon, 13 Sep 2010 16:11:55 +0100 Subject: [PATCH 115/139] Make sure INSTALL_TOP exists before we install to it Signed-off-by: Pedro Melo --- src/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Makefile b/src/Makefile index 2dc3065f..05a69cf8 100644 --- a/src/Makefile +++ b/src/Makefile @@ -153,6 +153,7 @@ noopt: make PROF="-pg" ARCH="-arch i386" install: all + mkdir -p $(INSTALL_BIN) $(INSTALL) $(PRGNAME) $(INSTALL_BIN) $(INSTALL) $(BENCHPRGNAME) $(INSTALL_BIN) $(INSTALL) $(CLIPRGNAME) $(INSTALL_BIN) From 0997b4119d79da25b6ca43bc5aec0e3e03f0e64d Mon Sep 17 00:00:00 2001 From: Pedro Melo Date: Mon, 13 Sep 2010 16:50:57 +0100 Subject: [PATCH 116/139] Fixed missed use of INSTALL_TOP Thanks to sylr@github Signed-off-by: Pedro Melo --- src/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index 05a69cf8..e1e989c6 100644 --- a/src/Makefile +++ b/src/Makefile @@ -16,7 +16,7 @@ CCOPT= $(CFLAGS) $(CCLINK) $(ARCH) $(PROF) DEBUG?= -g -rdynamic -ggdb PREFIX= /usr/local -INSTALL_BIN= $(INSTALL_TOP)/bin +INSTALL_BIN= $(PREFIX)/bin INSTALL= cp -p OBJ = adlist.o ae.o anet.o dict.o redis.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o vm.o pubsub.o multi.o debug.o sort.o intset.o From 1d18f50458d2d31122cd0309f554b2443d076f7c Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 14 Sep 2010 15:09:37 +0200 Subject: [PATCH 117/139] Advertise the existence of redis-check dump --fix when logging an error about corrupted AOF file --- src/aof.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aof.c b/src/aof.c index 8f2dc96f..ce4cc191 100644 --- a/src/aof.c +++ b/src/aof.c @@ -307,7 +307,7 @@ readerr: } exit(1); fmterr: - redisLog(REDIS_WARNING,"Bad file format reading the append only file"); + redisLog(REDIS_WARNING,"Bad file format reading the append only file: make a backup of your AOF file, then use ./redis-check-dump --fix "); exit(1); } From 412e457c278395a20dbc985f85a2d3d5316a2919 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 14 Sep 2010 15:18:18 +0200 Subject: [PATCH 118/139] fixed typo in the latest commit --- src/aof.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aof.c b/src/aof.c index ce4cc191..25febb91 100644 --- a/src/aof.c +++ b/src/aof.c @@ -307,7 +307,7 @@ readerr: } exit(1); fmterr: - redisLog(REDIS_WARNING,"Bad file format reading the append only file: make a backup of your AOF file, then use ./redis-check-dump --fix "); + redisLog(REDIS_WARNING,"Bad file format reading the append only file: make a backup of your AOF file, then use ./redis-check-aof --fix "); exit(1); } From 3856f1475936e58d3caebbb3df317421b263f2e6 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 15 Sep 2010 14:09:41 +0200 Subject: [PATCH 119/139] This should fix Issue 332: when there is a background process saving we still allow the hash tables to grow, but only when a critical treshold is reached. Formerly we prevented the resize at all triggering pathological O(N) behavior. Also there is a fix for the statistics in INFO about the number of keys expired --- src/dict.c | 28 +++++++++++++++++++++------- src/redis.c | 1 + 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/src/dict.c b/src/dict.c index 2d1e752b..a1060d45 100644 --- a/src/dict.c +++ b/src/dict.c @@ -49,8 +49,13 @@ /* Using dictEnableResize() / dictDisableResize() we make possible to * enable/disable resizing of the hash table as needed. This is very important * for Redis, as we use copy-on-write and don't want to move too much memory - * around when there is a child performing saving operations. */ + * around when there is a child performing saving operations. + * + * Note that even when dict_can_resize is set to 0, not all resizes are + * prevented: an hash table is still allowed to grow if the ratio between + * the number of elements and the buckets > dict_force_resize_ratio. */ static int dict_can_resize = 1; +static unsigned int dict_force_resize_ratio = 5; /* -------------------------- private prototypes ---------------------------- */ @@ -125,7 +130,7 @@ int _dictInit(dict *d, dictType *type, } /* Resize the table to the minimal size that contains all the elements, - * but with the invariant of a USER/BUCKETS ration near to <= 1 */ + * but with the invariant of a USER/BUCKETS ratio near to <= 1 */ int dictResize(dict *d) { int minimal; @@ -493,14 +498,23 @@ dictEntry *dictGetRandomKey(dict *d) /* Expand the hash table if needed */ static int _dictExpandIfNeeded(dict *d) { - /* If the hash table is empty expand it to the intial size, - * if the table is "full" dobule its size. */ + /* Incremental rehashing already in progress. Return. */ if (dictIsRehashing(d)) return DICT_OK; - if (d->ht[0].size == 0) - return dictExpand(d, DICT_HT_INITIAL_SIZE); - if (d->ht[0].used >= d->ht[0].size && dict_can_resize) + + /* If the hash table is empty expand it to the intial size. */ + if (d->ht[0].size == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE); + + /* If we reached the 1:1 ratio, and we are allowed to resize the hash + * table (global setting) or we should avoid it but the ratio between + * elements/buckets is over the "safe" threshold, we resize doubling + * the number of buckets. */ + if (d->ht[0].used >= d->ht[0].size && + (dict_can_resize || + d->ht[0].used/d->ht[0].size > dict_force_resize_ratio)) + { return dictExpand(d, ((d->ht[0].size > d->ht[0].used) ? d->ht[0].size : d->ht[0].used)*2); + } return DICT_OK; } diff --git a/src/redis.c b/src/redis.c index 8206b5d3..b6b42521 100644 --- a/src/redis.c +++ b/src/redis.c @@ -1343,6 +1343,7 @@ void freeMemoryIfNeeded(void) { } keyobj = createStringObject(minkey,sdslen(minkey)); dbDelete(server.db+j,keyobj); + server.stat_expiredkeys++; decrRefCount(keyobj); } } From f3357792408fd28002847586a27f9043e08a4e2c Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 16 Sep 2010 11:59:53 +0200 Subject: [PATCH 120/139] Static buffer in client struct has a constant size --- src/networking.c | 8 ++------ src/redis.h | 3 +-- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/src/networking.c b/src/networking.c index 55b7475b..26a6a4c0 100644 --- a/src/networking.c +++ b/src/networking.c @@ -11,11 +11,7 @@ int listMatchObjects(void *a, void *b) { } redisClient *createClient(int fd) { - redisClient *c; - - /* Allocate more space to hold a static write buffer. */ - c = zmalloc(sizeof(redisClient)+REDIS_REPLY_CHUNK_BYTES); - c->buflen = REDIS_REPLY_CHUNK_BYTES; + redisClient *c = zmalloc(sizeof(redisClient)); c->bufpos = 0; anetNonBlock(NULL,fd); @@ -84,7 +80,7 @@ robj *dupLastObjectIfNeeded(list *reply) { } int _addReplyToBuffer(redisClient *c, char *s, size_t len) { - size_t available = c->buflen-c->bufpos; + size_t available = sizeof(c->buf)-c->bufpos; /* If there already are entries in the reply list, we cannot * add anything more to the static buffer. */ diff --git a/src/redis.h b/src/redis.h index 1ef56288..38727ae2 100644 --- a/src/redis.h +++ b/src/redis.h @@ -313,8 +313,7 @@ typedef struct redisClient { /* Response buffer */ int bufpos; - int buflen; - char buf[]; + char buf[REDIS_REPLY_CHUNK_BYTES]; } redisClient; struct saveparam { From 4c2e506a393615477a85ee51d69c801d84c5bad2 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 16 Sep 2010 13:08:40 +0200 Subject: [PATCH 121/139] modified a bit addReply() to play better with copy on write now that we have a static buffer. Changed the name of a function from _ensureFileEvent() to _installWriteEvent(). --- src/networking.c | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/src/networking.c b/src/networking.c index aefbc2d7..867032aa 100644 --- a/src/networking.c +++ b/src/networking.c @@ -56,7 +56,7 @@ redisClient *createClient(int fd) { return c; } -int _ensureFileEvent(redisClient *c) { +int _installWriteEvent(redisClient *c) { if (c->fd <= 0) return REDIS_ERR; if (c->bufpos == 0 && listLength(c->reply) == 0 && (c->replstate == REDIS_REPL_NONE || @@ -160,21 +160,29 @@ void _addReplyStringToList(redisClient *c, char *s, size_t len) { } void addReply(redisClient *c, robj *obj) { - if (_ensureFileEvent(c) != REDIS_OK) return; - if (server.vm_enabled && obj->storage != REDIS_VM_MEMORY) { - /* Returns a new object with refcount 1 */ - obj = dupStringObject(obj); + if (_installWriteEvent(c) != REDIS_OK) return; + redisAssert(!server.vm_enabled || obj->storage == REDIS_VM_MEMORY); + + /* This is an important place where we can avoid copy-on-write + * when there is a saving child running, avoiding touching the + * refcount field of the object if it's not needed. + * + * If the encoding is RAW and there is room in the static buffer + * we'll be able to send the object to the client without + * messing with its page. */ + if (obj->encoding == REDIS_ENCODING_RAW) { + if (_addReplyToBuffer(c,obj->ptr,sdslen(obj->ptr)) != REDIS_OK) + _addReplyObjectToList(c,obj); } else { - /* This increments the refcount. */ obj = getDecodedObject(obj); + if (_addReplyToBuffer(c,obj->ptr,sdslen(obj->ptr)) != REDIS_OK) + _addReplyObjectToList(c,obj); + decrRefCount(obj); } - if (_addReplyToBuffer(c,obj->ptr,sdslen(obj->ptr)) != REDIS_OK) - _addReplyObjectToList(c,obj); - decrRefCount(obj); } void addReplySds(redisClient *c, sds s) { - if (_ensureFileEvent(c) != REDIS_OK) { + if (_installWriteEvent(c) != REDIS_OK) { /* The caller expects the sds to be free'd. */ sdsfree(s); return; @@ -188,7 +196,7 @@ void addReplySds(redisClient *c, sds s) { } void addReplyString(redisClient *c, char *s, size_t len) { - if (_ensureFileEvent(c) != REDIS_OK) return; + if (_installWriteEvent(c) != REDIS_OK) return; if (_addReplyToBuffer(c,s,len) != REDIS_OK) _addReplyStringToList(c,s,len); } @@ -234,7 +242,10 @@ void addReplyStatusFormat(redisClient *c, const char *fmt, ...) { /* Adds an empty object to the reply list that will contain the multi bulk * length, which is not known when this function is called. */ void *addDeferredMultiBulkLength(redisClient *c) { - if (_ensureFileEvent(c) != REDIS_OK) return NULL; + /* Note that we install the write event here even if the object is not + * ready to be sent, since we are sure that before returning to the + * event loop setDeferredMultiBulkLength() will be called. */ + if (_installWriteEvent(c) != REDIS_OK) return NULL; listAddNodeTail(c->reply,createObject(REDIS_STRING,NULL)); return listLast(c->reply); } From 2b00385d51cb75c30b47073a74f8edd0c53b942b Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 16 Sep 2010 13:28:58 +0200 Subject: [PATCH 122/139] Added used CPU statistics in INFO output, obtained via getrusage() --- src/redis.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/redis.c b/src/redis.c index 66e088b0..caa96247 100644 --- a/src/redis.c +++ b/src/redis.c @@ -51,6 +51,7 @@ #include #include #include +#include /* Our shared "common" objects */ @@ -1147,6 +1148,10 @@ sds genRedisInfoString(void) { time_t uptime = time(NULL)-server.stat_starttime; int j; char hmem[64]; + struct rusage self_ru, c_ru; + + getrusage(RUSAGE_SELF, &self_ru); + getrusage(RUSAGE_CHILDREN, &c_ru); bytesToHuman(hmem,zmalloc_used_memory()); info = sdscatprintf(sdsempty(), @@ -1158,6 +1163,10 @@ sds genRedisInfoString(void) { "process_id:%ld\r\n" "uptime_in_seconds:%ld\r\n" "uptime_in_days:%ld\r\n" + "used_cpu_sys:%.2f\r\n" + "used_cpu_user:%.2f\r\n" + "used_cpu_sys_childrens:%.2f\r\n" + "used_cpu_user_childrens:%.2f\r\n" "connected_clients:%d\r\n" "connected_slaves:%d\r\n" "blocked_clients:%d\r\n" @@ -1185,6 +1194,10 @@ sds genRedisInfoString(void) { (long) getpid(), uptime, uptime/(3600*24), + (float)self_ru.ru_utime.tv_sec+(float)self_ru.ru_utime.tv_usec/1000000, + (float)self_ru.ru_stime.tv_sec+(float)self_ru.ru_stime.tv_usec/1000000, + (float)c_ru.ru_utime.tv_sec+(float)c_ru.ru_utime.tv_usec/1000000, + (float)c_ru.ru_stime.tv_sec+(float)c_ru.ru_stime.tv_usec/1000000, listLength(server.clients)-listLength(server.slaves), listLength(server.slaves), server.blpop_blocked_clients, From 34a719d25034d6f1140a10eb0429bdee0efa5cd9 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 17 Sep 2010 15:25:32 +0200 Subject: [PATCH 123/139] try to parse the request in a smarter way to gain speed... work in progress --- src/networking.c | 13 +++++++++++-- src/redis.h | 1 + 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/networking.c b/src/networking.c index 867032aa..aaf7518f 100644 --- a/src/networking.c +++ b/src/networking.c @@ -28,6 +28,7 @@ redisClient *createClient(int fd) { selectDb(c,0); c->fd = fd; c->querybuf = sdsempty(); + c->newline = NULL; c->argc = 0; c->argv = NULL; c->bulklen = -1; @@ -631,6 +632,7 @@ void resetClient(redisClient *c) { freeClientArgv(c); c->bulklen = -1; c->multibulk = 0; + c->newline = NULL; } void closeTimedoutClients(void) { @@ -672,13 +674,14 @@ again: if (c->flags & REDIS_BLOCKED || c->flags & REDIS_IO_WAIT) return; if (c->bulklen == -1) { /* Read the first line of the query */ - char *p = strchr(c->querybuf,'\n'); size_t querylen; - if (p) { + if (c->newline) { + char *p = c->newline; sds query, *argv; int argc, j; + c->newline = NULL; query = c->querybuf; c->querybuf = sdsempty(); querylen = 1+(p-(query)); @@ -765,8 +768,14 @@ void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) { return; } if (nread) { + size_t oldlen = sdslen(c->querybuf); c->querybuf = sdscatlen(c->querybuf, buf, nread); c->lastinteraction = time(NULL); + /* Scan this new piece of the query for the newline. We do this + * here in order to make sure we perform this scan just one time + * per piece of buffer, leading to an O(N) scan instead of O(N*N) */ + if (c->bulklen == -1 && c->newline == NULL) + c->newline = strchr(c->querybuf+oldlen,'\n'); } else { return; } diff --git a/src/redis.h b/src/redis.h index 38727ae2..e6166f8b 100644 --- a/src/redis.h +++ b/src/redis.h @@ -286,6 +286,7 @@ typedef struct redisClient { int dictid; sds querybuf; robj **argv, **mbargv; + char *newline; /* pointing to the detected newline in querybuf */ int argc, mbargc; long bulklen; /* bulk read len. -1 if not in bulk read mode */ int multibulk; /* multi bulk command format active */ From a4f3f93b90c7cc688ffff665914bdadc224847fc Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 17 Sep 2010 16:05:01 +0200 Subject: [PATCH 124/139] new parsing code bugfixing --- src/networking.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/networking.c b/src/networking.c index aaf7518f..632fd047 100644 --- a/src/networking.c +++ b/src/networking.c @@ -664,6 +664,8 @@ void closeTimedoutClients(void) { } void processInputBuffer(redisClient *c) { + int seeknewline = 0; + again: /* Before to process the input buffer, make sure the client is not * waitig for a blocking operation such as BLPOP. Note that the first @@ -672,6 +674,9 @@ again: * in the input buffer the client may be blocked, and the "goto again" * will try to reiterate. The following line will make it return asap. */ if (c->flags & REDIS_BLOCKED || c->flags & REDIS_IO_WAIT) return; + + if (seeknewline && c->bulklen == -1) c->newline = strchr(c->querybuf,'\n'); + seeknewline = 1; if (c->bulklen == -1) { /* Read the first line of the query */ size_t querylen; From 5ca2f0c49894878be47161f667ae0daf70bb6fd3 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 22 Sep 2010 16:09:30 +0200 Subject: [PATCH 125/139] preventive conflict resolution to merge pietern/zset-mem --- src/t_zset.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/t_zset.c b/src/t_zset.c index d944e923..e3eb8325 100644 --- a/src/t_zset.c +++ b/src/t_zset.c @@ -355,7 +355,8 @@ void zaddGenericCommand(redisClient *c, robj *key, robj *ele, double scoreval, i *score = scoreval; } if (isnan(*score)) { - addReplyError(c,"resulting score is not a number (NaN)"); + addReplySds(c, + sdsnew("-ERR resulting score is not a number (NaN)\r\n")); zfree(score); /* Note that we don't need to check if the zset may be empty and * should be removed here, as we can only obtain Nan as score if From beb7756dcbb44099352abcb3368fcd3d23b55782 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 22 Sep 2010 16:10:13 +0200 Subject: [PATCH 126/139] error generation format reverted to the new style after merge --- src/t_zset.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/t_zset.c b/src/t_zset.c index eeb8dab3..93ade5aa 100644 --- a/src/t_zset.c +++ b/src/t_zset.c @@ -327,8 +327,7 @@ void zaddGenericCommand(redisClient *c, robj *key, robj *ele, double score, int score += *(double*)dictGetEntryVal(de); if (isnan(score)) { - addReplySds(c, - sdsnew("-ERR resulting score is not a number (NaN)\r\n")); + addReplyError(c,"resulting score is not a number (NaN)"); /* Note that we don't need to check if the zset may be empty and * should be removed here, as we can only obtain Nan as score if * there was already an element in the sorted set. */ From 50a9fad5d50488592447dc599a9ef6a184088ee3 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 22 Sep 2010 17:49:04 +0200 Subject: [PATCH 127/139] two leaks fixed --- src/t_zset.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/src/t_zset.c b/src/t_zset.c index 93ade5aa..114c95d6 100644 --- a/src/t_zset.c +++ b/src/t_zset.c @@ -625,25 +625,23 @@ void zunionInterGenericCommand(redisClient *c, robj *dstkey, int op) { * from small to large, all src[i > 0].dict are non-empty too */ di = dictGetIterator(src[0].dict); while((de = dictNext(di)) != NULL) { - double *score = zmalloc(sizeof(double)), value; - *score = src[0].weight * zunionInterDictValue(de); + double score, value; + score = src[0].weight * zunionInterDictValue(de); for (j = 1; j < setnum; j++) { dictEntry *other = dictFind(src[j].dict,dictGetEntryKey(de)); if (other) { value = src[j].weight * zunionInterDictValue(other); - zunionInterAggregate(score, value, aggregate); + zunionInterAggregate(&score, value, aggregate); } else { break; } } - /* skip entry when not present in every source dict */ - if (j != setnum) { - zfree(score); - } else { + /* accept entry only when present in every source dict */ + if (j == setnum) { robj *o = dictGetEntryKey(de); - znode = zslInsert(dstzset->zsl,*score,o); + znode = zslInsert(dstzset->zsl,score,o); incrRefCount(o); /* added to skiplist */ dictAdd(dstzset->dict,o,&znode->score); incrRefCount(o); /* added to dictionary */ @@ -657,11 +655,12 @@ void zunionInterGenericCommand(redisClient *c, robj *dstkey, int op) { di = dictGetIterator(src[i].dict); while((de = dictNext(di)) != NULL) { - /* skip key when already processed */ - if (dictFind(dstzset->dict,dictGetEntryKey(de)) != NULL) continue; + double score, value; - double *score = zmalloc(sizeof(double)), value; - *score = src[i].weight * zunionInterDictValue(de); + /* skip key when already processed */ + if (dictFind(dstzset->dict,dictGetEntryKey(de)) != NULL) + continue; + score = src[i].weight * zunionInterDictValue(de); /* because the zsets are sorted by size, its only possible * for sets at larger indices to hold this entry */ @@ -669,12 +668,12 @@ void zunionInterGenericCommand(redisClient *c, robj *dstkey, int op) { dictEntry *other = dictFind(src[j].dict,dictGetEntryKey(de)); if (other) { value = src[j].weight * zunionInterDictValue(other); - zunionInterAggregate(score, value, aggregate); + zunionInterAggregate(&score, value, aggregate); } } robj *o = dictGetEntryKey(de); - znode = zslInsert(dstzset->zsl,*score,o); + znode = zslInsert(dstzset->zsl,score,o); incrRefCount(o); /* added to skiplist */ dictAdd(dstzset->dict,o,&znode->score); incrRefCount(o); /* added to dictionary */ From 56e52b69feebb11931cbe8162ce1749909b7ff30 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Wed, 22 Sep 2010 18:07:52 +0200 Subject: [PATCH 128/139] Update rdb.c to properly work with new memory strategy for sorted sets --- src/rdb.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/rdb.c b/src/rdb.c index c15fc6f2..a401a5b9 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -730,13 +730,14 @@ robj *rdbLoadObject(int type, FILE *fp) { /* Load every single element of the list/set */ while(zsetlen--) { robj *ele; - double *score = zmalloc(sizeof(double)); + double score; + zskiplistNode *znode; if ((ele = rdbLoadEncodedStringObject(fp)) == NULL) return NULL; ele = tryObjectEncoding(ele); - if (rdbLoadDoubleValue(fp,score) == -1) return NULL; - dictAdd(zs->dict,ele,score); - zslInsert(zs->zsl,*score,ele); + if (rdbLoadDoubleValue(fp,&score) == -1) return NULL; + znode = zslInsert(zs->zsl,score,ele); + dictAdd(zs->dict,ele,&znode->score); incrRefCount(ele); /* added to skiplist */ } } else if (type == REDIS_HASH) { From 136cf53f22539b33396247d356e7e9d077068ccf Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 23 Sep 2010 16:05:17 +0200 Subject: [PATCH 129/139] minimal C test framework + a first example sds.c tests --- src/sds.c | 30 ++++++++++++++++++++++++---- src/testhelp.h | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 4 deletions(-) create mode 100644 src/testhelp.h diff --git a/src/sds.c b/src/sds.c index 2f3ffedc..cfe94af8 100644 --- a/src/sds.c +++ b/src/sds.c @@ -223,13 +223,16 @@ sds sdsrange(sds s, int start, int end) { } newlen = (start > end) ? 0 : (end-start)+1; if (newlen != 0) { - if (start >= (signed)len) start = len-1; - if (end >= (signed)len) end = len-1; - newlen = (start > end) ? 0 : (end-start)+1; + if (start >= (signed)len) { + newlen = 0; + } else if (end >= (signed)len) { + end = len-1; + newlen = (start > end) ? 0 : (end-start)+1; + } } else { start = 0; } - if (start != 0) memmove(sh->buf, sh->buf+start, newlen); + if (start && newlen) memmove(sh->buf, sh->buf+start, newlen); sh->buf[newlen] = 0; sh->free = sh->free+(sh->len-newlen); sh->len = newlen; @@ -478,3 +481,22 @@ err: if (current) sdsfree(current); return NULL; } + +#ifdef SDS_TEST_MAIN +#include +#include "testhelp.h" + +int main(void) { + { + sds x = sdsnew("foo"); + + /* SDS creation and length */ + test_cond("Can create a string and obtain the length", + sdslen(x) == 3 && memcmp(x,"foo",3) == 0) + + /* Nul term checking */ + test_cond("The string contains the nul term", x[3] == '\0') + } + test_report() +} +#endif diff --git a/src/testhelp.h b/src/testhelp.h new file mode 100644 index 00000000..d699f2ae --- /dev/null +++ b/src/testhelp.h @@ -0,0 +1,54 @@ +/* This is a really minimal testing framework for C. + * + * Example: + * + * test_cond("Check if 1 == 1", 1==1) + * test_cond("Check if 5 > 10", 5 > 10) + * test_report() + * + * Copyright (c) 2010, Salvatore Sanfilippo + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Redis nor the names of its contributors may be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __TESTHELP_H +#define __TESTHELP_H + +int __failed_tests = 0; +int __test_num = 0; +#define test_cond(descr,_c) do { \ + __test_num++; printf("%d - %s: ", __test_num, descr); \ + if(_c) printf("PASSED\n"); else {printf("FAILED\n"); __failed_tests++;} \ +} while(0); +#define test_report() do { \ + printf("%d tests, %d passed, %d failed\n", __test_num, \ + __test_num-__failed_tests, __failed_tests); \ + if (__failed_tests) { \ + printf("=== WARNING === We have failed tests here...\n"); \ + } \ +} while(0); + +#endif From 963238f713f02e538cf0f5851f3337173116ea39 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 23 Sep 2010 16:39:02 +0200 Subject: [PATCH 130/139] more tests for sds.c --- src/sds.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 77 insertions(+), 6 deletions(-) diff --git a/src/sds.c b/src/sds.c index cfe94af8..2d063c4a 100644 --- a/src/sds.c +++ b/src/sds.c @@ -488,14 +488,85 @@ err: int main(void) { { - sds x = sdsnew("foo"); + sds x = sdsnew("foo"), y; - /* SDS creation and length */ - test_cond("Can create a string and obtain the length", - sdslen(x) == 3 && memcmp(x,"foo",3) == 0) + test_cond("Create a string and obtain the length", + sdslen(x) == 3 && memcmp(x,"foo\0",4) == 0) - /* Nul term checking */ - test_cond("The string contains the nul term", x[3] == '\0') + sdsfree(x); + x = sdsnewlen("foo",2); + test_cond("Create a string with specified length", + sdslen(x) == 2 && memcmp(x,"fo\0",3) == 0) + + x = sdscat(x,"bar"); + test_cond("Strings concatenation", + sdslen(x) == 5 && memcmp(x,"fobar\0",6) == 0); + + x = sdscpy(x,"a"); + test_cond("sdscpy() against an originally longer string", + sdslen(x) == 1 && memcmp(x,"a\0",2) == 0) + + x = sdscpy(x,"xyzxxxxxxxxxxyyyyyyyyyykkkkkkkkkk"); + test_cond("sdscpy() against an originally shorter string", + sdslen(x) == 33 && + memcmp(x,"xyzxxxxxxxxxxyyyyyyyyyykkkkkkkkkk\0",33) == 0) + + sdsfree(x); + x = sdscatprintf(sdsempty(),"%d",123); + test_cond("sdscatprintf() seems working in the base case", + sdslen(x) == 3 && memcmp(x,"123\0",4) ==0) + + sdsfree(x); + x = sdstrim(sdsnew("xxciaoyyy"),"xy"); + test_cond("sdstrim() correctly trims characters", + sdslen(x) == 4 && memcmp(x,"ciao\0",5) == 0) + + y = sdsrange(sdsdup(x),1,1); + test_cond("sdsrange(...,1,1)", + sdslen(y) == 1 && memcmp(y,"i\0",2) == 0) + + sdsfree(y); + y = sdsrange(sdsdup(x),1,-1); + test_cond("sdsrange(...,1,-1)", + sdslen(y) == 3 && memcmp(y,"iao\0",4) == 0) + + sdsfree(y); + y = sdsrange(sdsdup(x),-2,-1); + test_cond("sdsrange(...,-2,-1)", + sdslen(y) == 2 && memcmp(y,"ao\0",3) == 0) + + sdsfree(y); + y = sdsrange(sdsdup(x),2,1); + test_cond("sdsrange(...,2,1)", + sdslen(y) == 0 && memcmp(y,"\0",1) == 0) + + sdsfree(y); + y = sdsrange(sdsdup(x),1,100); + test_cond("sdsrange(...,1,100)", + sdslen(y) == 3 && memcmp(y,"iao\0",4) == 0) + + sdsfree(y); + y = sdsrange(sdsdup(x),100,100); + test_cond("sdsrange(...,100,100)", + sdslen(y) == 0 && memcmp(y,"\0",1) == 0) + + sdsfree(y); + sdsfree(x); + x = sdsnew("foo"); + y = sdsnew("foa"); + test_cond("sdscmp(foo,foa)", sdscmp(x,y) > 0) + + sdsfree(y); + sdsfree(x); + x = sdsnew("bar"); + y = sdsnew("bar"); + test_cond("sdscmp(bar,bar)", sdscmp(x,y) == 0) + + sdsfree(y); + sdsfree(x); + x = sdsnew("aar"); + y = sdsnew("bar"); + test_cond("sdscmp(bar,bar)", sdscmp(x,y) < 0) } test_report() } From 30d31cc8bb416f67183a218f1511ef517eb9ae3b Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 23 Sep 2010 18:24:47 +0200 Subject: [PATCH 131/139] Contributing file added --- CONTRIBUTING | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 CONTRIBUTING diff --git a/CONTRIBUTING b/CONTRIBUTING new file mode 100644 index 00000000..644b5991 --- /dev/null +++ b/CONTRIBUTING @@ -0,0 +1,13 @@ +1. Enter irc.freenode.org #redis and start talking with 'antirez' and/or 'pietern' to check if there is interest for such a feature and to understand the probability of it being merged. We'll try hard to keep Redis simple... so you'll likely encounter an high resistence. + +2. Drop a message to the Redis Google Group with a proposal of semantics/API. + +3. If steps 1 and 2 are ok, use the following procedure to submit a patch: + + a. Fork Redis on github + b. Create a topic branch (git checkout -b my_branch) + c. Push to your branch (git push origin my_branch) + d. Create an issue in the Redis google code site with a link to your patch + e. Done :) + +Thanks! From b0d605c1d6bbf5746cc957946138108b928c88a1 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 23 Sep 2010 22:04:19 +0200 Subject: [PATCH 132/139] Add regression test and fix for >255 byte string entries --- src/ziplist.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/src/ziplist.c b/src/ziplist.c index 5254423d..4f44bd58 100644 --- a/src/ziplist.c +++ b/src/ziplist.c @@ -144,7 +144,7 @@ static unsigned int zipDecodeLength(unsigned char *p, unsigned int *lensize) { if (lensize) *lensize = 1; break; case ZIP_STR_14B: - len = ((p[0] & 0x3f) << 6) | p[1]; + len = ((p[0] & 0x3f) << 8) | p[1]; if (lensize) *lensize = 2; break; case ZIP_STR_32B: @@ -1125,6 +1125,25 @@ int main(int argc, char **argv) { ziplistRepr(zl); } + printf("Regression test for >255 byte strings:\n"); + { + char v1[257],v2[257]; + memset(v1,'x',256); + memset(v2,'y',256); + zl = ziplistNew(); + zl = ziplistPush(zl,(unsigned char*)v1,strlen(v1),ZIPLIST_TAIL); + zl = ziplistPush(zl,(unsigned char*)v2,strlen(v2),ZIPLIST_TAIL); + + /* Pop values again and compare their value. */ + p = ziplistIndex(zl,0); + assert(ziplistGet(p,&entry,&elen,&value)); + assert(strncmp(v1,entry,elen) == 0); + p = ziplistIndex(zl,1); + assert(ziplistGet(p,&entry,&elen,&value)); + assert(strncmp(v2,entry,elen) == 0); + printf("SUCCESS\n\n"); + } + printf("Create long list and check indices:\n"); { zl = ziplistNew(); From 1a06bf93c4de5016c746eab0d9d0255a458693b7 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 24 Sep 2010 10:30:15 +0200 Subject: [PATCH 133/139] ziplist implementation fuzzy tests --- tests/unit/type/list.tcl | 53 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/tests/unit/type/list.tcl b/tests/unit/type/list.tcl index bf188fd7..c4a3b217 100644 --- a/tests/unit/type/list.tcl +++ b/tests/unit/type/list.tcl @@ -603,5 +603,58 @@ start_server { assert_equal 1 [r lrem myotherlist 1 2] assert_equal 3 [r llen myotherlist] } + + tags {slow} { + test {ziplist implementation: value encoding and backlink} { + for {set j 0} {$j < 100} {incr j} { + r del l + set l {} + for {set i 0} {$i < 200} {incr i} { + randpath { + set data [string repeat x [randomInt 100000]] + } { + set data [randomInt 65536] + } { + set data [randomInt 4294967296] + } { + set data [randomInt 18446744073709551616] + } + lappend l $data + r rpush l $data + } + assert_equal [llength $l] [r llen l] + # Traverse backward + for {set i 199} {$i >= 0} {incr i -1} { + if {[lindex $l $i] ne [r lindex l $i]} { + assert_equal [lindex $l $i] [r lindex l $i] + } + } + } + } + + test {ziplist implementation: encoding stress testing} { + for {set j 0} {$j < 200} {incr j} { + r del l + set l {} + set len [randomInt 400] + for {set i 0} {$i < $len} {incr i} { + set rv [randomValue] + randpath { + lappend l $rv + r rpush l $rv + } { + set l [concat [list $rv] $l] + r lpush l $rv + } + } + assert_equal [llength $l] [r llen l] + for {set i 0} {$i < 200} {incr i} { + if {[lindex $l $i] ne [r lindex l $i]} { + assert_equal [lindex $l $i] [r lindex l $i] + } + } + } + } + } } } From ef27ba988b10a632057b04ee3abb287cbe87322d Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 24 Sep 2010 10:37:00 +0200 Subject: [PATCH 134/139] explicit regression test for a ziplist bug added --- tests/unit/type/list.tcl | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/unit/type/list.tcl b/tests/unit/type/list.tcl index c4a3b217..e609865c 100644 --- a/tests/unit/type/list.tcl +++ b/tests/unit/type/list.tcl @@ -604,6 +604,15 @@ start_server { assert_equal 3 [r llen myotherlist] } + test {Explicit regression for a list bug} { + set mylist {49376042582 {BkG2o\pIC]4YYJa9cJ4GWZalG[4tin;1D2whSkCOW`mX;SFXGyS8sedcff3fQI^tgPCC@^Nu1J6o]meM@Lko]t_jRyotK?tH[\EvWqS]b`o2OCtjg:?nUTwdjpcUm]y:pg5q24q7LlCOwQE^}} + r del l + r rpush l [lindex $mylist 0] + r rpush l [lindex $mylist 1] + assert_equal [r lindex l 0] [lindex $mylist 0] + assert_equal [r lindex l 1] [lindex $mylist 1] + } + tags {slow} { test {ziplist implementation: value encoding and backlink} { for {set j 0} {$j < 100} {incr j} { From 4610b0332c199563e58de005eb1b10ed54e1ec9c Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 24 Sep 2010 11:15:06 +0200 Subject: [PATCH 135/139] intset stress testing added, ziplist stress testing relocated in a more appropriate place --- tests/unit/type/list.tcl | 109 +++++++++++++++++++++------------------ tests/unit/type/set.tcl | 36 +++++++++++++ 2 files changed, 95 insertions(+), 50 deletions(-) diff --git a/tests/unit/type/list.tcl b/tests/unit/type/list.tcl index e609865c..4c131fc3 100644 --- a/tests/unit/type/list.tcl +++ b/tests/unit/type/list.tcl @@ -604,63 +604,72 @@ start_server { assert_equal 3 [r llen myotherlist] } - test {Explicit regression for a list bug} { - set mylist {49376042582 {BkG2o\pIC]4YYJa9cJ4GWZalG[4tin;1D2whSkCOW`mX;SFXGyS8sedcff3fQI^tgPCC@^Nu1J6o]meM@Lko]t_jRyotK?tH[\EvWqS]b`o2OCtjg:?nUTwdjpcUm]y:pg5q24q7LlCOwQE^}} - r del l - r rpush l [lindex $mylist 0] - r rpush l [lindex $mylist 1] - assert_equal [r lindex l 0] [lindex $mylist 0] - assert_equal [r lindex l 1] [lindex $mylist 1] - } + } +} - tags {slow} { - test {ziplist implementation: value encoding and backlink} { - for {set j 0} {$j < 100} {incr j} { - r del l - set l {} - for {set i 0} {$i < 200} {incr i} { - randpath { - set data [string repeat x [randomInt 100000]] - } { - set data [randomInt 65536] - } { - set data [randomInt 4294967296] - } { - set data [randomInt 18446744073709551616] - } - lappend l $data - r rpush l $data +start_server { + tags {list ziplist} + overrides { + "list-max-ziplist-value" 200000 + "list-max-ziplist-entries" 256 + } +} { + test {Explicit regression for a list bug} { + set mylist {49376042582 {BkG2o\pIC]4YYJa9cJ4GWZalG[4tin;1D2whSkCOW`mX;SFXGyS8sedcff3fQI^tgPCC@^Nu1J6o]meM@Lko]t_jRyotK?tH[\EvWqS]b`o2OCtjg:?nUTwdjpcUm]y:pg5q24q7LlCOwQE^}} + r del l + r rpush l [lindex $mylist 0] + r rpush l [lindex $mylist 1] + assert_equal [r lindex l 0] [lindex $mylist 0] + assert_equal [r lindex l 1] [lindex $mylist 1] + } + + tags {slow} { + test {ziplist implementation: value encoding and backlink} { + for {set j 0} {$j < 100} {incr j} { + r del l + set l {} + for {set i 0} {$i < 200} {incr i} { + randpath { + set data [string repeat x [randomInt 100000]] + } { + set data [randomInt 65536] + } { + set data [randomInt 4294967296] + } { + set data [randomInt 18446744073709551616] } - assert_equal [llength $l] [r llen l] - # Traverse backward - for {set i 199} {$i >= 0} {incr i -1} { - if {[lindex $l $i] ne [r lindex l $i]} { - assert_equal [lindex $l $i] [r lindex l $i] - } + lappend l $data + r rpush l $data + } + assert_equal [llength $l] [r llen l] + # Traverse backward + for {set i 199} {$i >= 0} {incr i -1} { + if {[lindex $l $i] ne [r lindex l $i]} { + assert_equal [lindex $l $i] [r lindex l $i] } } } + } - test {ziplist implementation: encoding stress testing} { - for {set j 0} {$j < 200} {incr j} { - r del l - set l {} - set len [randomInt 400] - for {set i 0} {$i < $len} {incr i} { - set rv [randomValue] - randpath { - lappend l $rv - r rpush l $rv - } { - set l [concat [list $rv] $l] - r lpush l $rv - } + test {ziplist implementation: encoding stress testing} { + for {set j 0} {$j < 200} {incr j} { + r del l + set l {} + set len [randomInt 400] + for {set i 0} {$i < $len} {incr i} { + set rv [randomValue] + randpath { + lappend l $rv + r rpush l $rv + } { + set l [concat [list $rv] $l] + r lpush l $rv } - assert_equal [llength $l] [r llen l] - for {set i 0} {$i < 200} {incr i} { - if {[lindex $l $i] ne [r lindex l $i]} { - assert_equal [lindex $l $i] [r lindex l $i] - } + } + assert_equal [llength $l] [r llen l] + for {set i 0} {$i < 200} {incr i} { + if {[lindex $l $i] ne [r lindex l $i]} { + assert_equal [lindex $l $i] [r lindex l $i] } } } diff --git a/tests/unit/type/set.tcl b/tests/unit/type/set.tcl index 0f9f6abe..5608a648 100644 --- a/tests/unit/type/set.tcl +++ b/tests/unit/type/set.tcl @@ -295,4 +295,40 @@ start_server { r set x 10 assert_error "ERR*wrong kind*" {r smove myset2 x foo} } + + tags {slow} { + test {intsets implementation stress testing} { + for {set j 0} {$j < 20} {incr j} { + unset -nocomplain s + array set s {} + r del s + set len [randomInt 1024] + for {set i 0} {$i < $len} {incr i} { + randpath { + set data [randomInt 65536] + } { + set data [randomInt 4294967296] + } { + set data [randomInt 18446744073709551616] + } + set s($data) {} + r sadd s $data + } + assert_equal [lsort [r smembers s]] [lsort [array names s]] + set len [array size s] + for {set i 0} {$i < $len} {incr i} { + set e [r spop s] + if {![info exists s($e)]} { + puts "Can't find '$e' on local array" + puts "Local array: [lsort [r smembers s]]" + puts "Remote array: [lsort [array names s]]" + error "exception" + } + array unset s $e + } + assert_equal [r scard s] 0 + assert_equal [array size s] 0 + } + } + } } From fdc0bde9359d12dd53b686a6db9c66cf0001670f Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 7 Oct 2010 12:49:14 +0200 Subject: [PATCH 136/139] minor typo fixed, reported by Thomas Bassetto --- src/redis-cli.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 0e6edbe7..5071604b 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -364,7 +364,7 @@ static int parseOptions(int argc, char **argv) { "automatically used as last argument.\n" ); } else if (!strcmp(argv[i],"-v")) { - printf("redis-cli shipped with Redis verison %s\n", REDIS_VERSION); + printf("redis-cli shipped with Redis version %s\n", REDIS_VERSION); exit(0); } else { break; From 144a5e72f2adc9d151438569f680533e41778504 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 7 Oct 2010 16:21:35 +0200 Subject: [PATCH 137/139] fixed an alignment problem with time_t is 32 bit, long is 64 bit, and arch is sparc or any other where unaligned accesses will result to sigbus --- src/db.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/db.c b/src/db.c index 470310a3..44507847 100644 --- a/src/db.c +++ b/src/db.c @@ -478,7 +478,7 @@ int expireIfNeeded(redisDb *db, robj *key) { void expireGenericCommand(redisClient *c, robj *key, robj *param, long offset) { dictEntry *de; - time_t seconds; + long seconds; if (getLongFromObjectOrReply(c, param, &seconds, NULL) != REDIS_OK) return; From 1dd10ca233a533928ec2ff83f675499e4e7c1ff1 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 11 Oct 2010 13:05:09 +0200 Subject: [PATCH 138/139] maxmemory fixed, we now try to release memory just before we check for the memory limit. Before fixing there was code between the attempt to free memory and the check for memory limits, and this code could result into allocations going again after the memory limit. --- src/redis.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/redis.c b/src/redis.c index 7b1b3f4f..27a855d9 100644 --- a/src/redis.c +++ b/src/redis.c @@ -889,9 +889,6 @@ void call(redisClient *c, struct redisCommand *cmd) { int processCommand(redisClient *c) { struct redisCommand *cmd; - /* Free some memory if needed (maxmemory setting) */ - if (server.maxmemory) freeMemoryIfNeeded(); - /* Handle the multi bulk command type. This is an alternative protocol * supported by Redis in order to receive commands that are composed of * multiple binary-safe "bulk" arguments. The latency of processing is @@ -1029,7 +1026,12 @@ int processCommand(redisClient *c) { return 1; } - /* Handle the maxmemory directive */ + /* Handle the maxmemory directive. + * + * First we try to free some memory if possible (if there are volatile + * keys in the dataset). If there are not the only thing we can do + * is returning an error. */ + if (server.maxmemory) freeMemoryIfNeeded(); if (server.maxmemory && (cmd->flags & REDIS_CMD_DENYOOM) && zmalloc_used_memory() > server.maxmemory) { From b4f2e412d087bae0a523fe6ea40fcad30fe74b5b Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 11 Oct 2010 16:46:21 +0200 Subject: [PATCH 139/139] free memory if the maxmemory parameter is reduced via CONFIG SET --- src/config.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/config.c b/src/config.c index 8a5ad6c2..1bd678c7 100644 --- a/src/config.c +++ b/src/config.c @@ -241,6 +241,7 @@ void configSetCommand(redisClient *c) { if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt; server.maxmemory = ll; + if (server.maxmemory) freeMemoryIfNeeded(); } else if (!strcasecmp(c->argv[2]->ptr,"timeout")) { if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0 || ll > LONG_MAX) goto badfmt;