Transparent LZF compression initial implementation.

This commit shapes the main ideas for the implementation but doesn't
fix all the command implementations, nor handles loading of LZF
compressed objects in a way able to perserve the compression.
This commit is contained in:
antirez 2014-04-04 12:11:48 +02:00
parent 2a7da8a736
commit f7501fb1d3
6 changed files with 110 additions and 29 deletions

View File

@ -261,6 +261,7 @@ void getbitCommand(redisClient *c) {
byte = bitoffset >> 3;
bit = 7 - (bitoffset & 0x7);
if (lzfEncodedObject(o)) o = dbUnshareStringValue(c->db,c->argv[1],o);
if (sdsEncodedObject(o)) {
if (byte < sdslen(o->ptr))
bitval = ((uint8_t*)o->ptr)[byte] & (1 << bit);

View File

@ -308,8 +308,10 @@ void debugCommand(redisClient *c) {
val = dictGetVal(de);
key = dictGetKey(de);
if (val->type != REDIS_STRING || !sdsEncodedObject(val)) {
addReplyError(c,"Not an sds encoded string.");
if (val->type != REDIS_STRING ||
(!sdsEncodedObject(val) && val->encoding != REDIS_ENCODING_LZF))
{
addReplyError(c,"Not an sds/lzf encoded string.");
} else {
addReplyStatusFormat(c,
"key_sds_len:%lld, key_sds_avail:%lld, "

View File

@ -319,6 +319,11 @@ void addReply(redisClient *c, robj *obj) {
if (_addReplyToBuffer(c,obj->ptr,sdslen(obj->ptr)) != REDIS_OK)
_addReplyObjectToList(c,obj);
decrRefCount(obj);
} else if (obj->encoding == REDIS_ENCODING_LZF) {
obj = getDecodedObject(obj);
if (_addReplyToBuffer(c,obj->ptr,sdslen(obj->ptr)) != REDIS_OK)
_addReplyObjectToList(c,obj);
decrRefCount(obj);
} else {
redisPanic("Wrong obj->encoding in addReply()");
}
@ -488,7 +493,7 @@ void addReplyBulkLen(redisClient *c, robj *obj) {
if (sdsEncodedObject(obj)) {
len = sdslen(obj->ptr);
} else {
} else if (obj->encoding == REDIS_ENCODING_INT) {
long n = (long)obj->ptr;
/* Compute how many bytes will take this integer as a radix 10 string */
@ -500,6 +505,9 @@ void addReplyBulkLen(redisClient *c, robj *obj) {
while((n = n/10) != 0) {
len++;
}
} else {
/* LZF and others not handled explicitly. */
len = stringObjectLen(obj);
}
if (len < REDIS_SHARED_BULKHDR_LEN)

View File

@ -29,6 +29,7 @@
*/
#include "redis.h"
#include "lzf.h" /* LZF compression library */
#include <math.h>
#include <ctype.h>
@ -210,7 +211,8 @@ robj *createZsetZiplistObject(void) {
}
void freeStringObject(robj *o) {
if (o->encoding == REDIS_ENCODING_RAW) {
if (o->encoding == REDIS_ENCODING_RAW ||
o->encoding == REDIS_ENCODING_LZF) {
sdsfree(o->ptr);
}
}
@ -335,7 +337,9 @@ int isObjectRepresentableAsLongLong(robj *o, long long *llval) {
}
}
/* Try to encode a string object in order to save space */
/* Try to encode a string object in order to save space. */
#define REDIS_ENCODING_LZF_MAX_SIZE (1024*64)
#define REDIS_ENCODING_LZF_MAX_COMPR_SIZE (1024*32)
robj *tryObjectEncoding(robj *o) {
long value;
sds s = o->ptr;
@ -394,6 +398,35 @@ robj *tryObjectEncoding(robj *o) {
return emb;
}
/* Try LZF compression for objects up to REDIS_ENCODING_LZF_MAX_SIZE
* and greater than REDIS_ENCODING_EMBSTR_SIZE_LIMIT.
*
* TODO: add fast compressibility test using LZF against a few
* characters and don't going forward if this test does not passes. */
if (len <= REDIS_ENCODING_LZF_MAX_SIZE) {
/* Allocate four more bytes in our buffer since we need to store
* the size of the compressed string as header. */
unsigned char compr[4+REDIS_ENCODING_LZF_MAX_COMPR_SIZE];
size_t comprlen, outlen;
/* Save want to save at least 25% of memory for this to make sense. */
outlen = len-4-(len/4);
if (outlen > REDIS_ENCODING_LZF_MAX_SIZE)
outlen = REDIS_ENCODING_LZF_MAX_SIZE;
comprlen = lzf_compress(s,len,compr+4,outlen);
if (comprlen != 0) {
/* Object successfully compressed within the required space. */
compr[0] = len & 0xff;
compr[1] = (len >> 8) & 0xff;
compr[2] = (len >> 16) & 0xff;
compr[3] = (len >> 24) & 0xff;
if (o->encoding == REDIS_ENCODING_RAW) sdsfree(o->ptr);
o->encoding = REDIS_ENCODING_LZF;
o->ptr = sdsnewlen(compr,comprlen+4);
return o;
}
}
/* We can't encode the object...
*
* Do the last try, and at least optimize the SDS string inside
@ -428,6 +461,14 @@ robj *getDecodedObject(robj *o) {
ll2string(buf,32,(long)o->ptr);
dec = createStringObject(buf,strlen(buf));
return dec;
} else if (o->type == REDIS_STRING && o->encoding == REDIS_ENCODING_LZF) {
int origlen = stringObjectLen(o);
sds orig = sdsnewlen(NULL,origlen);
unsigned char *p = o->ptr;
if (lzf_decompress(p+4,sdslen(o->ptr)-4,orig,origlen) == 0)
redisPanic("LZF error during object decoding.");
return createObject(REDIS_STRING,orig);
} else {
redisPanic("Unknown encoding type");
}
@ -501,13 +542,21 @@ int equalStringObjects(robj *a, robj *b) {
}
}
/* Returns the original (uncompressed) size of an LZF encoded object.
* Only called by stringObjectLen() that should be the main interface. */
size_t stringObjectUncompressedLen(robj *o) {
unsigned char *p = o->ptr;
return p[0] | (p[1]<<8) | (p[2]<<16) | (p[3]<<24);
}
size_t stringObjectLen(robj *o) {
redisAssertWithInfo(NULL,o,o->type == REDIS_STRING);
if (sdsEncodedObject(o)) {
return sdslen(o->ptr);
} else if (o->encoding == REDIS_ENCODING_LZF) {
return stringObjectUncompressedLen(o);
} else {
char buf[32];
return ll2string(buf,32,(long)o->ptr);
}
}
@ -656,6 +705,7 @@ char *strEncoding(int encoding) {
case REDIS_ENCODING_INTSET: return "intset";
case REDIS_ENCODING_SKIPLIST: return "skiplist";
case REDIS_ENCODING_EMBSTR: return "embstr";
case REDIS_ENCODING_LZF: return "lzf";
default: return "unknown";
}
}

View File

@ -209,11 +209,41 @@ int rdbTryIntegerEncoding(char *s, size_t len, unsigned char *enc) {
return rdbEncodeInteger(value,enc);
}
int rdbSaveLzfStringObject(rio *rdb, unsigned char *s, size_t len) {
size_t comprlen, outlen;
/* Save an already compressed object in LZF encoding.
*
* On success the length of the strored object is returned, otherwise
* 0 is returned. */
int rdbSaveLzfStringObject(rio *rdb, unsigned char *out, size_t len, size_t comprlen) {
unsigned char byte;
int n, nwritten = 0;
/* Data compressed! Let's save it on disk */
byte = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_LZF;
if ((n = rdbWriteRaw(rdb,&byte,1)) == -1) goto writeerr;
nwritten += n;
if ((n = rdbSaveLen(rdb,comprlen)) == -1) goto writeerr;
nwritten += n;
if ((n = rdbSaveLen(rdb,len)) == -1) goto writeerr;
nwritten += n;
if ((n = rdbWriteRaw(rdb,out,comprlen)) == -1) goto writeerr;
nwritten += n;
return nwritten;
writeerr:
zfree(out);
return -1;
}
/* Try to compress the string at 's' for 'len' bytes using LZF.
* If successful save the object with LZF encoding, otherwise
* returns 0 if the string can't be compressed, or -1 if the
* compressed string can't be saved.
*
* On success the number of bytes used is returned. */
int rdbTrySaveLzfStringObject(rio *rdb, unsigned char *s, size_t len) {
size_t comprlen, outlen;
void *out;
int retval;
/* We require at least four bytes compression for this to be worth it */
if (len <= 4) return 0;
@ -224,26 +254,9 @@ int rdbSaveLzfStringObject(rio *rdb, unsigned char *s, size_t len) {
zfree(out);
return 0;
}
/* Data compressed! Let's save it on disk */
byte = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_LZF;
if ((n = rdbWriteRaw(rdb,&byte,1)) == -1) goto writeerr;
nwritten += n;
if ((n = rdbSaveLen(rdb,comprlen)) == -1) goto writeerr;
nwritten += n;
if ((n = rdbSaveLen(rdb,len)) == -1) goto writeerr;
nwritten += n;
if ((n = rdbWriteRaw(rdb,out,comprlen)) == -1) goto writeerr;
nwritten += n;
retval = rdbSaveLzfStringObject(rdb,out,len,comprlen);
zfree(out);
return nwritten;
writeerr:
zfree(out);
return -1;
return retval;
}
robj *rdbLoadLzfStringObject(rio *rdb) {
@ -283,7 +296,7 @@ int rdbSaveRawString(rio *rdb, unsigned char *s, size_t len) {
/* Try LZF compression - under 20 bytes it's unable to compress even
* aaaaaaaaaaaaaaaaaa so skip it */
if (server.rdb_compression && len > 20) {
n = rdbSaveLzfStringObject(rdb,s,len);
n = rdbTrySaveLzfStringObject(rdb,s,len);
if (n == -1) return -1;
if (n > 0) return n;
/* Return value of 0 means data can't be compressed, save the old way */
@ -324,6 +337,11 @@ int rdbSaveStringObject(rio *rdb, robj *obj) {
* object is already integer encoded. */
if (obj->encoding == REDIS_ENCODING_INT) {
return rdbSaveLongLongAsStringObject(rdb,(long)obj->ptr);
} else if (obj->encoding == REDIS_ENCODING_LZF) {
/* Data is already compressed, save it with LZF encoding. */
int len = stringObjectLen(obj);
unsigned char *p = obj->ptr;
return rdbSaveLzfStringObject(rdb,p+4,len,sdslen(obj->ptr)-4);
} else {
redisAssertWithInfo(NULL,obj,sdsEncodedObject(obj));
return rdbSaveRawString(rdb,obj->ptr,sdslen(obj->ptr));

View File

@ -172,7 +172,7 @@
/* Objects encoding. Some kind of objects like Strings and Hashes can be
* internally represented in multiple ways. The 'encoding' field of the object
* is set to one of this fields for this object. */
* is set to one of this values. */
#define REDIS_ENCODING_RAW 0 /* Raw representation */
#define REDIS_ENCODING_INT 1 /* Encoded as integer */
#define REDIS_ENCODING_HT 2 /* Encoded as hash table */
@ -182,6 +182,7 @@
#define REDIS_ENCODING_INTSET 6 /* Encoded as intset */
#define REDIS_ENCODING_SKIPLIST 7 /* Encoded as skiplist */
#define REDIS_ENCODING_EMBSTR 8 /* Embedded sds string encoding */
#define REDIS_ENCODING_LZF 9 /* LZF compressed string. */
/* Defines related to the dump file format. To store 32 bits lengths for short
* keys requires a lot of space, so we check the most significant 2 bits of
@ -1081,6 +1082,7 @@ int collateStringObjects(robj *a, robj *b);
int equalStringObjects(robj *a, robj *b);
unsigned long long estimateObjectIdleTime(robj *o);
#define sdsEncodedObject(objptr) (objptr->encoding == REDIS_ENCODING_RAW || objptr->encoding == REDIS_ENCODING_EMBSTR)
#define lzfEncodedObject(objptr) (objptr->encoding == REDIS_ENCODING_LZF)
/* Synchronous I/O with timeout */
ssize_t syncWrite(int fd, char *ptr, ssize_t size, long long timeout);