diff --git a/src/bitops.c b/src/bitops.c index 28f77243..9ce46344 100644 --- a/src/bitops.c +++ b/src/bitops.c @@ -261,6 +261,7 @@ void getbitCommand(redisClient *c) { byte = bitoffset >> 3; bit = 7 - (bitoffset & 0x7); + if (lzfEncodedObject(o)) o = dbUnshareStringValue(c->db,c->argv[1],o); if (sdsEncodedObject(o)) { if (byte < sdslen(o->ptr)) bitval = ((uint8_t*)o->ptr)[byte] & (1 << bit); diff --git a/src/debug.c b/src/debug.c index a9c7cc5e..07418972 100644 --- a/src/debug.c +++ b/src/debug.c @@ -308,8 +308,10 @@ void debugCommand(redisClient *c) { val = dictGetVal(de); key = dictGetKey(de); - if (val->type != REDIS_STRING || !sdsEncodedObject(val)) { - addReplyError(c,"Not an sds encoded string."); + if (val->type != REDIS_STRING || + (!sdsEncodedObject(val) && val->encoding != REDIS_ENCODING_LZF)) + { + addReplyError(c,"Not an sds/lzf encoded string."); } else { addReplyStatusFormat(c, "key_sds_len:%lld, key_sds_avail:%lld, " diff --git a/src/networking.c b/src/networking.c index 9e3e4a21..f1e5235d 100644 --- a/src/networking.c +++ b/src/networking.c @@ -319,6 +319,11 @@ void addReply(redisClient *c, robj *obj) { if (_addReplyToBuffer(c,obj->ptr,sdslen(obj->ptr)) != REDIS_OK) _addReplyObjectToList(c,obj); decrRefCount(obj); + } else if (obj->encoding == REDIS_ENCODING_LZF) { + obj = getDecodedObject(obj); + if (_addReplyToBuffer(c,obj->ptr,sdslen(obj->ptr)) != REDIS_OK) + _addReplyObjectToList(c,obj); + decrRefCount(obj); } else { redisPanic("Wrong obj->encoding in addReply()"); } @@ -488,7 +493,7 @@ void addReplyBulkLen(redisClient *c, robj *obj) { if (sdsEncodedObject(obj)) { len = sdslen(obj->ptr); - } else { + } else if (obj->encoding == REDIS_ENCODING_INT) { long n = (long)obj->ptr; /* Compute how many bytes will take this integer as a radix 10 string */ @@ -500,6 +505,9 @@ void addReplyBulkLen(redisClient *c, robj *obj) { while((n = n/10) != 0) { len++; } + } else { + /* LZF and others not handled explicitly. */ + len = stringObjectLen(obj); } if (len < REDIS_SHARED_BULKHDR_LEN) diff --git a/src/object.c b/src/object.c index 5602b688..d2f76e48 100644 --- a/src/object.c +++ b/src/object.c @@ -29,6 +29,7 @@ */ #include "redis.h" +#include "lzf.h" /* LZF compression library */ #include #include @@ -210,7 +211,8 @@ robj *createZsetZiplistObject(void) { } void freeStringObject(robj *o) { - if (o->encoding == REDIS_ENCODING_RAW) { + if (o->encoding == REDIS_ENCODING_RAW || + o->encoding == REDIS_ENCODING_LZF) { sdsfree(o->ptr); } } @@ -335,7 +337,9 @@ int isObjectRepresentableAsLongLong(robj *o, long long *llval) { } } -/* Try to encode a string object in order to save space */ +/* Try to encode a string object in order to save space. */ +#define REDIS_ENCODING_LZF_MAX_SIZE (1024*64) +#define REDIS_ENCODING_LZF_MAX_COMPR_SIZE (1024*32) robj *tryObjectEncoding(robj *o) { long value; sds s = o->ptr; @@ -394,6 +398,35 @@ robj *tryObjectEncoding(robj *o) { return emb; } + /* Try LZF compression for objects up to REDIS_ENCODING_LZF_MAX_SIZE + * and greater than REDIS_ENCODING_EMBSTR_SIZE_LIMIT. + * + * TODO: add fast compressibility test using LZF against a few + * characters and don't going forward if this test does not passes. */ + if (len <= REDIS_ENCODING_LZF_MAX_SIZE) { + /* Allocate four more bytes in our buffer since we need to store + * the size of the compressed string as header. */ + unsigned char compr[4+REDIS_ENCODING_LZF_MAX_COMPR_SIZE]; + size_t comprlen, outlen; + + /* Save want to save at least 25% of memory for this to make sense. */ + outlen = len-4-(len/4); + if (outlen > REDIS_ENCODING_LZF_MAX_SIZE) + outlen = REDIS_ENCODING_LZF_MAX_SIZE; + comprlen = lzf_compress(s,len,compr+4,outlen); + if (comprlen != 0) { + /* Object successfully compressed within the required space. */ + compr[0] = len & 0xff; + compr[1] = (len >> 8) & 0xff; + compr[2] = (len >> 16) & 0xff; + compr[3] = (len >> 24) & 0xff; + if (o->encoding == REDIS_ENCODING_RAW) sdsfree(o->ptr); + o->encoding = REDIS_ENCODING_LZF; + o->ptr = sdsnewlen(compr,comprlen+4); + return o; + } + } + /* We can't encode the object... * * Do the last try, and at least optimize the SDS string inside @@ -428,6 +461,14 @@ robj *getDecodedObject(robj *o) { ll2string(buf,32,(long)o->ptr); dec = createStringObject(buf,strlen(buf)); return dec; + } else if (o->type == REDIS_STRING && o->encoding == REDIS_ENCODING_LZF) { + int origlen = stringObjectLen(o); + sds orig = sdsnewlen(NULL,origlen); + unsigned char *p = o->ptr; + + if (lzf_decompress(p+4,sdslen(o->ptr)-4,orig,origlen) == 0) + redisPanic("LZF error during object decoding."); + return createObject(REDIS_STRING,orig); } else { redisPanic("Unknown encoding type"); } @@ -501,13 +542,21 @@ int equalStringObjects(robj *a, robj *b) { } } +/* Returns the original (uncompressed) size of an LZF encoded object. + * Only called by stringObjectLen() that should be the main interface. */ +size_t stringObjectUncompressedLen(robj *o) { + unsigned char *p = o->ptr; + return p[0] | (p[1]<<8) | (p[2]<<16) | (p[3]<<24); +} + size_t stringObjectLen(robj *o) { redisAssertWithInfo(NULL,o,o->type == REDIS_STRING); if (sdsEncodedObject(o)) { return sdslen(o->ptr); + } else if (o->encoding == REDIS_ENCODING_LZF) { + return stringObjectUncompressedLen(o); } else { char buf[32]; - return ll2string(buf,32,(long)o->ptr); } } @@ -656,6 +705,7 @@ char *strEncoding(int encoding) { case REDIS_ENCODING_INTSET: return "intset"; case REDIS_ENCODING_SKIPLIST: return "skiplist"; case REDIS_ENCODING_EMBSTR: return "embstr"; + case REDIS_ENCODING_LZF: return "lzf"; default: return "unknown"; } } diff --git a/src/rdb.c b/src/rdb.c index eb27bfa7..680cbe60 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -209,11 +209,41 @@ int rdbTryIntegerEncoding(char *s, size_t len, unsigned char *enc) { return rdbEncodeInteger(value,enc); } -int rdbSaveLzfStringObject(rio *rdb, unsigned char *s, size_t len) { - size_t comprlen, outlen; +/* Save an already compressed object in LZF encoding. + * + * On success the length of the strored object is returned, otherwise + * 0 is returned. */ +int rdbSaveLzfStringObject(rio *rdb, unsigned char *out, size_t len, size_t comprlen) { unsigned char byte; int n, nwritten = 0; + + /* Data compressed! Let's save it on disk */ + byte = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_LZF; + if ((n = rdbWriteRaw(rdb,&byte,1)) == -1) goto writeerr; + nwritten += n; + if ((n = rdbSaveLen(rdb,comprlen)) == -1) goto writeerr; + nwritten += n; + if ((n = rdbSaveLen(rdb,len)) == -1) goto writeerr; + nwritten += n; + if ((n = rdbWriteRaw(rdb,out,comprlen)) == -1) goto writeerr; + nwritten += n; + return nwritten; + +writeerr: + zfree(out); + return -1; +} + +/* Try to compress the string at 's' for 'len' bytes using LZF. + * If successful save the object with LZF encoding, otherwise + * returns 0 if the string can't be compressed, or -1 if the + * compressed string can't be saved. + * + * On success the number of bytes used is returned. */ +int rdbTrySaveLzfStringObject(rio *rdb, unsigned char *s, size_t len) { + size_t comprlen, outlen; void *out; + int retval; /* We require at least four bytes compression for this to be worth it */ if (len <= 4) return 0; @@ -224,26 +254,9 @@ int rdbSaveLzfStringObject(rio *rdb, unsigned char *s, size_t len) { zfree(out); return 0; } - /* Data compressed! Let's save it on disk */ - byte = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_LZF; - if ((n = rdbWriteRaw(rdb,&byte,1)) == -1) goto writeerr; - nwritten += n; - - if ((n = rdbSaveLen(rdb,comprlen)) == -1) goto writeerr; - nwritten += n; - - if ((n = rdbSaveLen(rdb,len)) == -1) goto writeerr; - nwritten += n; - - if ((n = rdbWriteRaw(rdb,out,comprlen)) == -1) goto writeerr; - nwritten += n; - + retval = rdbSaveLzfStringObject(rdb,out,len,comprlen); zfree(out); - return nwritten; - -writeerr: - zfree(out); - return -1; + return retval; } robj *rdbLoadLzfStringObject(rio *rdb) { @@ -283,7 +296,7 @@ int rdbSaveRawString(rio *rdb, unsigned char *s, size_t len) { /* Try LZF compression - under 20 bytes it's unable to compress even * aaaaaaaaaaaaaaaaaa so skip it */ if (server.rdb_compression && len > 20) { - n = rdbSaveLzfStringObject(rdb,s,len); + n = rdbTrySaveLzfStringObject(rdb,s,len); if (n == -1) return -1; if (n > 0) return n; /* Return value of 0 means data can't be compressed, save the old way */ @@ -324,6 +337,11 @@ int rdbSaveStringObject(rio *rdb, robj *obj) { * object is already integer encoded. */ if (obj->encoding == REDIS_ENCODING_INT) { return rdbSaveLongLongAsStringObject(rdb,(long)obj->ptr); + } else if (obj->encoding == REDIS_ENCODING_LZF) { + /* Data is already compressed, save it with LZF encoding. */ + int len = stringObjectLen(obj); + unsigned char *p = obj->ptr; + return rdbSaveLzfStringObject(rdb,p+4,len,sdslen(obj->ptr)-4); } else { redisAssertWithInfo(NULL,obj,sdsEncodedObject(obj)); return rdbSaveRawString(rdb,obj->ptr,sdslen(obj->ptr)); diff --git a/src/redis.h b/src/redis.h index e95b939e..00cfbdd2 100644 --- a/src/redis.h +++ b/src/redis.h @@ -172,7 +172,7 @@ /* Objects encoding. Some kind of objects like Strings and Hashes can be * internally represented in multiple ways. The 'encoding' field of the object - * is set to one of this fields for this object. */ + * is set to one of this values. */ #define REDIS_ENCODING_RAW 0 /* Raw representation */ #define REDIS_ENCODING_INT 1 /* Encoded as integer */ #define REDIS_ENCODING_HT 2 /* Encoded as hash table */ @@ -182,6 +182,7 @@ #define REDIS_ENCODING_INTSET 6 /* Encoded as intset */ #define REDIS_ENCODING_SKIPLIST 7 /* Encoded as skiplist */ #define REDIS_ENCODING_EMBSTR 8 /* Embedded sds string encoding */ +#define REDIS_ENCODING_LZF 9 /* LZF compressed string. */ /* Defines related to the dump file format. To store 32 bits lengths for short * keys requires a lot of space, so we check the most significant 2 bits of @@ -1081,6 +1082,7 @@ int collateStringObjects(robj *a, robj *b); int equalStringObjects(robj *a, robj *b); unsigned long long estimateObjectIdleTime(robj *o); #define sdsEncodedObject(objptr) (objptr->encoding == REDIS_ENCODING_RAW || objptr->encoding == REDIS_ENCODING_EMBSTR) +#define lzfEncodedObject(objptr) (objptr->encoding == REDIS_ENCODING_LZF) /* Synchronous I/O with timeout */ ssize_t syncWrite(int fd, char *ptr, ssize_t size, long long timeout);