mirror of
https://github.com/fluencelabs/redis
synced 2025-06-01 19:41:20 +00:00
Transparent LZF compression initial implementation.
This commit shapes the main ideas for the implementation but doesn't fix all the command implementations, nor handles loading of LZF compressed objects in a way able to perserve the compression.
This commit is contained in:
parent
2a7da8a736
commit
f7501fb1d3
@ -261,6 +261,7 @@ void getbitCommand(redisClient *c) {
|
||||
|
||||
byte = bitoffset >> 3;
|
||||
bit = 7 - (bitoffset & 0x7);
|
||||
if (lzfEncodedObject(o)) o = dbUnshareStringValue(c->db,c->argv[1],o);
|
||||
if (sdsEncodedObject(o)) {
|
||||
if (byte < sdslen(o->ptr))
|
||||
bitval = ((uint8_t*)o->ptr)[byte] & (1 << bit);
|
||||
|
@ -308,8 +308,10 @@ void debugCommand(redisClient *c) {
|
||||
val = dictGetVal(de);
|
||||
key = dictGetKey(de);
|
||||
|
||||
if (val->type != REDIS_STRING || !sdsEncodedObject(val)) {
|
||||
addReplyError(c,"Not an sds encoded string.");
|
||||
if (val->type != REDIS_STRING ||
|
||||
(!sdsEncodedObject(val) && val->encoding != REDIS_ENCODING_LZF))
|
||||
{
|
||||
addReplyError(c,"Not an sds/lzf encoded string.");
|
||||
} else {
|
||||
addReplyStatusFormat(c,
|
||||
"key_sds_len:%lld, key_sds_avail:%lld, "
|
||||
|
@ -319,6 +319,11 @@ void addReply(redisClient *c, robj *obj) {
|
||||
if (_addReplyToBuffer(c,obj->ptr,sdslen(obj->ptr)) != REDIS_OK)
|
||||
_addReplyObjectToList(c,obj);
|
||||
decrRefCount(obj);
|
||||
} else if (obj->encoding == REDIS_ENCODING_LZF) {
|
||||
obj = getDecodedObject(obj);
|
||||
if (_addReplyToBuffer(c,obj->ptr,sdslen(obj->ptr)) != REDIS_OK)
|
||||
_addReplyObjectToList(c,obj);
|
||||
decrRefCount(obj);
|
||||
} else {
|
||||
redisPanic("Wrong obj->encoding in addReply()");
|
||||
}
|
||||
@ -488,7 +493,7 @@ void addReplyBulkLen(redisClient *c, robj *obj) {
|
||||
|
||||
if (sdsEncodedObject(obj)) {
|
||||
len = sdslen(obj->ptr);
|
||||
} else {
|
||||
} else if (obj->encoding == REDIS_ENCODING_INT) {
|
||||
long n = (long)obj->ptr;
|
||||
|
||||
/* Compute how many bytes will take this integer as a radix 10 string */
|
||||
@ -500,6 +505,9 @@ void addReplyBulkLen(redisClient *c, robj *obj) {
|
||||
while((n = n/10) != 0) {
|
||||
len++;
|
||||
}
|
||||
} else {
|
||||
/* LZF and others not handled explicitly. */
|
||||
len = stringObjectLen(obj);
|
||||
}
|
||||
|
||||
if (len < REDIS_SHARED_BULKHDR_LEN)
|
||||
|
56
src/object.c
56
src/object.c
@ -29,6 +29,7 @@
|
||||
*/
|
||||
|
||||
#include "redis.h"
|
||||
#include "lzf.h" /* LZF compression library */
|
||||
#include <math.h>
|
||||
#include <ctype.h>
|
||||
|
||||
@ -210,7 +211,8 @@ robj *createZsetZiplistObject(void) {
|
||||
}
|
||||
|
||||
void freeStringObject(robj *o) {
|
||||
if (o->encoding == REDIS_ENCODING_RAW) {
|
||||
if (o->encoding == REDIS_ENCODING_RAW ||
|
||||
o->encoding == REDIS_ENCODING_LZF) {
|
||||
sdsfree(o->ptr);
|
||||
}
|
||||
}
|
||||
@ -335,7 +337,9 @@ int isObjectRepresentableAsLongLong(robj *o, long long *llval) {
|
||||
}
|
||||
}
|
||||
|
||||
/* Try to encode a string object in order to save space */
|
||||
/* Try to encode a string object in order to save space. */
|
||||
#define REDIS_ENCODING_LZF_MAX_SIZE (1024*64)
|
||||
#define REDIS_ENCODING_LZF_MAX_COMPR_SIZE (1024*32)
|
||||
robj *tryObjectEncoding(robj *o) {
|
||||
long value;
|
||||
sds s = o->ptr;
|
||||
@ -394,6 +398,35 @@ robj *tryObjectEncoding(robj *o) {
|
||||
return emb;
|
||||
}
|
||||
|
||||
/* Try LZF compression for objects up to REDIS_ENCODING_LZF_MAX_SIZE
|
||||
* and greater than REDIS_ENCODING_EMBSTR_SIZE_LIMIT.
|
||||
*
|
||||
* TODO: add fast compressibility test using LZF against a few
|
||||
* characters and don't going forward if this test does not passes. */
|
||||
if (len <= REDIS_ENCODING_LZF_MAX_SIZE) {
|
||||
/* Allocate four more bytes in our buffer since we need to store
|
||||
* the size of the compressed string as header. */
|
||||
unsigned char compr[4+REDIS_ENCODING_LZF_MAX_COMPR_SIZE];
|
||||
size_t comprlen, outlen;
|
||||
|
||||
/* Save want to save at least 25% of memory for this to make sense. */
|
||||
outlen = len-4-(len/4);
|
||||
if (outlen > REDIS_ENCODING_LZF_MAX_SIZE)
|
||||
outlen = REDIS_ENCODING_LZF_MAX_SIZE;
|
||||
comprlen = lzf_compress(s,len,compr+4,outlen);
|
||||
if (comprlen != 0) {
|
||||
/* Object successfully compressed within the required space. */
|
||||
compr[0] = len & 0xff;
|
||||
compr[1] = (len >> 8) & 0xff;
|
||||
compr[2] = (len >> 16) & 0xff;
|
||||
compr[3] = (len >> 24) & 0xff;
|
||||
if (o->encoding == REDIS_ENCODING_RAW) sdsfree(o->ptr);
|
||||
o->encoding = REDIS_ENCODING_LZF;
|
||||
o->ptr = sdsnewlen(compr,comprlen+4);
|
||||
return o;
|
||||
}
|
||||
}
|
||||
|
||||
/* We can't encode the object...
|
||||
*
|
||||
* Do the last try, and at least optimize the SDS string inside
|
||||
@ -428,6 +461,14 @@ robj *getDecodedObject(robj *o) {
|
||||
ll2string(buf,32,(long)o->ptr);
|
||||
dec = createStringObject(buf,strlen(buf));
|
||||
return dec;
|
||||
} else if (o->type == REDIS_STRING && o->encoding == REDIS_ENCODING_LZF) {
|
||||
int origlen = stringObjectLen(o);
|
||||
sds orig = sdsnewlen(NULL,origlen);
|
||||
unsigned char *p = o->ptr;
|
||||
|
||||
if (lzf_decompress(p+4,sdslen(o->ptr)-4,orig,origlen) == 0)
|
||||
redisPanic("LZF error during object decoding.");
|
||||
return createObject(REDIS_STRING,orig);
|
||||
} else {
|
||||
redisPanic("Unknown encoding type");
|
||||
}
|
||||
@ -501,13 +542,21 @@ int equalStringObjects(robj *a, robj *b) {
|
||||
}
|
||||
}
|
||||
|
||||
/* Returns the original (uncompressed) size of an LZF encoded object.
|
||||
* Only called by stringObjectLen() that should be the main interface. */
|
||||
size_t stringObjectUncompressedLen(robj *o) {
|
||||
unsigned char *p = o->ptr;
|
||||
return p[0] | (p[1]<<8) | (p[2]<<16) | (p[3]<<24);
|
||||
}
|
||||
|
||||
size_t stringObjectLen(robj *o) {
|
||||
redisAssertWithInfo(NULL,o,o->type == REDIS_STRING);
|
||||
if (sdsEncodedObject(o)) {
|
||||
return sdslen(o->ptr);
|
||||
} else if (o->encoding == REDIS_ENCODING_LZF) {
|
||||
return stringObjectUncompressedLen(o);
|
||||
} else {
|
||||
char buf[32];
|
||||
|
||||
return ll2string(buf,32,(long)o->ptr);
|
||||
}
|
||||
}
|
||||
@ -656,6 +705,7 @@ char *strEncoding(int encoding) {
|
||||
case REDIS_ENCODING_INTSET: return "intset";
|
||||
case REDIS_ENCODING_SKIPLIST: return "skiplist";
|
||||
case REDIS_ENCODING_EMBSTR: return "embstr";
|
||||
case REDIS_ENCODING_LZF: return "lzf";
|
||||
default: return "unknown";
|
||||
}
|
||||
}
|
||||
|
62
src/rdb.c
62
src/rdb.c
@ -209,11 +209,41 @@ int rdbTryIntegerEncoding(char *s, size_t len, unsigned char *enc) {
|
||||
return rdbEncodeInteger(value,enc);
|
||||
}
|
||||
|
||||
int rdbSaveLzfStringObject(rio *rdb, unsigned char *s, size_t len) {
|
||||
size_t comprlen, outlen;
|
||||
/* Save an already compressed object in LZF encoding.
|
||||
*
|
||||
* On success the length of the strored object is returned, otherwise
|
||||
* 0 is returned. */
|
||||
int rdbSaveLzfStringObject(rio *rdb, unsigned char *out, size_t len, size_t comprlen) {
|
||||
unsigned char byte;
|
||||
int n, nwritten = 0;
|
||||
|
||||
/* Data compressed! Let's save it on disk */
|
||||
byte = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_LZF;
|
||||
if ((n = rdbWriteRaw(rdb,&byte,1)) == -1) goto writeerr;
|
||||
nwritten += n;
|
||||
if ((n = rdbSaveLen(rdb,comprlen)) == -1) goto writeerr;
|
||||
nwritten += n;
|
||||
if ((n = rdbSaveLen(rdb,len)) == -1) goto writeerr;
|
||||
nwritten += n;
|
||||
if ((n = rdbWriteRaw(rdb,out,comprlen)) == -1) goto writeerr;
|
||||
nwritten += n;
|
||||
return nwritten;
|
||||
|
||||
writeerr:
|
||||
zfree(out);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Try to compress the string at 's' for 'len' bytes using LZF.
|
||||
* If successful save the object with LZF encoding, otherwise
|
||||
* returns 0 if the string can't be compressed, or -1 if the
|
||||
* compressed string can't be saved.
|
||||
*
|
||||
* On success the number of bytes used is returned. */
|
||||
int rdbTrySaveLzfStringObject(rio *rdb, unsigned char *s, size_t len) {
|
||||
size_t comprlen, outlen;
|
||||
void *out;
|
||||
int retval;
|
||||
|
||||
/* We require at least four bytes compression for this to be worth it */
|
||||
if (len <= 4) return 0;
|
||||
@ -224,26 +254,9 @@ int rdbSaveLzfStringObject(rio *rdb, unsigned char *s, size_t len) {
|
||||
zfree(out);
|
||||
return 0;
|
||||
}
|
||||
/* Data compressed! Let's save it on disk */
|
||||
byte = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_LZF;
|
||||
if ((n = rdbWriteRaw(rdb,&byte,1)) == -1) goto writeerr;
|
||||
nwritten += n;
|
||||
|
||||
if ((n = rdbSaveLen(rdb,comprlen)) == -1) goto writeerr;
|
||||
nwritten += n;
|
||||
|
||||
if ((n = rdbSaveLen(rdb,len)) == -1) goto writeerr;
|
||||
nwritten += n;
|
||||
|
||||
if ((n = rdbWriteRaw(rdb,out,comprlen)) == -1) goto writeerr;
|
||||
nwritten += n;
|
||||
|
||||
retval = rdbSaveLzfStringObject(rdb,out,len,comprlen);
|
||||
zfree(out);
|
||||
return nwritten;
|
||||
|
||||
writeerr:
|
||||
zfree(out);
|
||||
return -1;
|
||||
return retval;
|
||||
}
|
||||
|
||||
robj *rdbLoadLzfStringObject(rio *rdb) {
|
||||
@ -283,7 +296,7 @@ int rdbSaveRawString(rio *rdb, unsigned char *s, size_t len) {
|
||||
/* Try LZF compression - under 20 bytes it's unable to compress even
|
||||
* aaaaaaaaaaaaaaaaaa so skip it */
|
||||
if (server.rdb_compression && len > 20) {
|
||||
n = rdbSaveLzfStringObject(rdb,s,len);
|
||||
n = rdbTrySaveLzfStringObject(rdb,s,len);
|
||||
if (n == -1) return -1;
|
||||
if (n > 0) return n;
|
||||
/* Return value of 0 means data can't be compressed, save the old way */
|
||||
@ -324,6 +337,11 @@ int rdbSaveStringObject(rio *rdb, robj *obj) {
|
||||
* object is already integer encoded. */
|
||||
if (obj->encoding == REDIS_ENCODING_INT) {
|
||||
return rdbSaveLongLongAsStringObject(rdb,(long)obj->ptr);
|
||||
} else if (obj->encoding == REDIS_ENCODING_LZF) {
|
||||
/* Data is already compressed, save it with LZF encoding. */
|
||||
int len = stringObjectLen(obj);
|
||||
unsigned char *p = obj->ptr;
|
||||
return rdbSaveLzfStringObject(rdb,p+4,len,sdslen(obj->ptr)-4);
|
||||
} else {
|
||||
redisAssertWithInfo(NULL,obj,sdsEncodedObject(obj));
|
||||
return rdbSaveRawString(rdb,obj->ptr,sdslen(obj->ptr));
|
||||
|
@ -172,7 +172,7 @@
|
||||
|
||||
/* Objects encoding. Some kind of objects like Strings and Hashes can be
|
||||
* internally represented in multiple ways. The 'encoding' field of the object
|
||||
* is set to one of this fields for this object. */
|
||||
* is set to one of this values. */
|
||||
#define REDIS_ENCODING_RAW 0 /* Raw representation */
|
||||
#define REDIS_ENCODING_INT 1 /* Encoded as integer */
|
||||
#define REDIS_ENCODING_HT 2 /* Encoded as hash table */
|
||||
@ -182,6 +182,7 @@
|
||||
#define REDIS_ENCODING_INTSET 6 /* Encoded as intset */
|
||||
#define REDIS_ENCODING_SKIPLIST 7 /* Encoded as skiplist */
|
||||
#define REDIS_ENCODING_EMBSTR 8 /* Embedded sds string encoding */
|
||||
#define REDIS_ENCODING_LZF 9 /* LZF compressed string. */
|
||||
|
||||
/* Defines related to the dump file format. To store 32 bits lengths for short
|
||||
* keys requires a lot of space, so we check the most significant 2 bits of
|
||||
@ -1081,6 +1082,7 @@ int collateStringObjects(robj *a, robj *b);
|
||||
int equalStringObjects(robj *a, robj *b);
|
||||
unsigned long long estimateObjectIdleTime(robj *o);
|
||||
#define sdsEncodedObject(objptr) (objptr->encoding == REDIS_ENCODING_RAW || objptr->encoding == REDIS_ENCODING_EMBSTR)
|
||||
#define lzfEncodedObject(objptr) (objptr->encoding == REDIS_ENCODING_LZF)
|
||||
|
||||
/* Synchronous I/O with timeout */
|
||||
ssize_t syncWrite(int fd, char *ptr, ssize_t size, long long timeout);
|
||||
|
Loading…
x
Reference in New Issue
Block a user