Merge branch 'unstable' into 5.0 branch

This commit is contained in:
antirez 2018-07-23 18:29:33 +02:00
commit 77a7ec7200
84 changed files with 2545 additions and 2733 deletions

View File

@ -8,7 +8,9 @@ each source file that you contribute.
# IMPORTANT: HOW TO USE REDIS GITHUB ISSUES
* Github issues SHOULD ONLY BE USED to report bugs, and for DETAILED feature
requests. Everything else belongs to the Redis Google Group.
requests. Everything else belongs to the Redis Google Group:
https://groups.google.com/forum/m/#!forum/Redis-db
PLEASE DO NOT POST GENERAL QUESTIONS that are not about bugs or suspected
bugs in the Github issues system. We'll be very happy to help you and provide
@ -30,7 +32,7 @@ each source file that you contribute.
a. Fork Redis on github ( http://help.github.com/fork-a-repo/ )
b. Create a topic branch (git checkout -b my_branch)
c. Push to your branch (git push origin my_branch)
d. Initiate a pull request on github ( http://help.github.com/send-pull-requests/ )
d. Initiate a pull request on github ( https://help.github.com/articles/creating-a-pull-request/ )
e. Done :)
For minor fixes just open a pull request on Github.

View File

@ -435,7 +435,7 @@ top comment inside `server.c`.
After the command operates in some way, it returns a reply to the client,
usually using `addReply()` or a similar function defined inside `networking.c`.
There are tons of commands implementations inside th Redis source code
There are tons of commands implementations inside the Redis source code
that can serve as examples of actual commands implementations. To write
a few toy commands can be a good exercise to familiarize with the code base.

6
deps/README.md vendored
View File

@ -22,7 +22,7 @@ just following tose steps:
1. Remove the jemalloc directory.
2. Substitute it with the new jemalloc source tree.
3. Edit the Makefile localted in the same directoy as the README you are
3. Edit the Makefile localted in the same directory as the README you are
reading, and change the --with-version in the Jemalloc configure script
options with the version you are using. This is required because otherwise
Jemalloc configuration script is broken and will not work nested in another
@ -50,7 +50,7 @@ This is never upgraded since it's part of the Redis project. If there are change
Hiredis
---
Hiredis uses the SDS string library, that must be the same version used inside Redis itself. Hiredis is also very critical for Sentinel. Historically Redis often used forked versions of hiredis in a way or the other. In order to upgrade it is adviced to take a lot of care:
Hiredis uses the SDS string library, that must be the same version used inside Redis itself. Hiredis is also very critical for Sentinel. Historically Redis often used forked versions of hiredis in a way or the other. In order to upgrade it is advised to take a lot of care:
1. Check with diff if hiredis API changed and what impact it could have in Redis.
2. Make sure thet the SDS library inside Hiredis and inside Redis are compatible.
@ -83,6 +83,6 @@ and our version:
1. Makefile is modified to allow a different compiler than GCC.
2. We have the implementation source code, and directly link to the following external libraries: `lua_cjson.o`, `lua_struct.o`, `lua_cmsgpack.o` and `lua_bit.o`.
3. There is a security fix in `ldo.c`, line 498: The check for `LUA_SIGNATURE[0]` is removed in order toa void direct bytecode exectuion.
3. There is a security fix in `ldo.c`, line 498: The check for `LUA_SIGNATURE[0]` is removed in order toa void direct bytecode execution.

View File

@ -215,4 +215,32 @@ ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra,
return arena_ralloc_no_move(tsdn, ptr, oldsize, size, extra, zero);
}
JEMALLOC_ALWAYS_INLINE int
iget_defrag_hint(tsdn_t *tsdn, void* ptr, int *bin_util, int *run_util) {
int defrag = 0;
rtree_ctx_t rtree_ctx_fallback;
rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
szind_t szind;
bool is_slab;
rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr, true, &szind, &is_slab);
if (likely(is_slab)) {
/* Small allocation. */
extent_t *slab = iealloc(tsdn, ptr);
arena_t *arena = extent_arena_get(slab);
szind_t binind = extent_szind_get(slab);
bin_t *bin = &arena->bins[binind];
malloc_mutex_lock(tsdn, &bin->lock);
/* don't bother moving allocations from the slab currently used for new allocations */
if (slab != bin->slabcur) {
const bin_info_t *bin_info = &bin_infos[binind];
size_t availregs = bin_info->nregs * bin->stats.curslabs;
*bin_util = ((long long)bin->stats.curregs<<16) / availregs;
*run_util = ((long long)(bin_info->nregs - extent_nfree_get(slab))<<16) / bin_info->nregs;
defrag = 1;
}
malloc_mutex_unlock(tsdn, &bin->lock);
}
return defrag;
}
#endif /* JEMALLOC_INTERNAL_INLINES_C_H */

View File

@ -120,3 +120,7 @@
# define JEMALLOC_RESTRICT_RETURN
# define JEMALLOC_ALLOCATOR
#endif
/* This version of Jemalloc, modified for Redis, has the je_get_defrag_hint()
* function. */
#define JEMALLOC_FRAG_HINT

View File

@ -3324,3 +3324,14 @@ jemalloc_postfork_child(void) {
}
/******************************************************************************/
/* Helps the application decide if a pointer is worth re-allocating in order to reduce fragmentation.
* returns 0 if the allocation is in the currently active run,
* or when it is not causing any frag issue (large or huge bin)
* returns the bin utilization and run utilization both in fixed point 16:16.
* If the application decides to re-allocate it should use MALLOCX_TCACHE_NONE when doing so. */
JEMALLOC_EXPORT int JEMALLOC_NOTHROW
get_defrag_hint(void* ptr, int *bin_util, int *run_util) {
assert(ptr != NULL);
return iget_defrag_hint(TSDN_NULL, ptr, bin_util, run_util);
}

View File

@ -639,7 +639,7 @@ slave-priority 100
# it with the specified string.
# 4) During replication, when a slave performs a full resynchronization with
# its master, the content of the whole database is removed in order to
# load the RDB file just transfered.
# load the RDB file just transferred.
#
# In all the above cases the default is to delete objects in a blocking way,
# like if DEL was called. However you can configure each case specifically
@ -1211,6 +1211,12 @@ hz 10
# big latency spikes.
aof-rewrite-incremental-fsync yes
# When redis saves RDB file, if the following option is enabled
# the file will be fsync-ed every 32 MB of data generated. This is useful
# in order to commit the file to the disk more incrementally and avoid
# big latency spikes.
rdb-save-incremental-fsync yes
# Redis LFU eviction (see maxmemory setting) can be tuned. However it is a good
# idea to start with the default settings and only change them after investigating
# how to improve the performances and how the keys LFU change over time, which

View File

@ -194,3 +194,31 @@ sentinel failover-timeout mymaster 180000
#
# sentinel client-reconfig-script mymaster /var/redis/reconfig.sh
# SECURITY
#
# By default SENTINEL SET will not be able to change the notification-script
# and client-reconfig-script at runtime. This avoids a trivial security issue
# where clients can set the script to anything and trigger a failover in order
# to get the program executed.
sentinel deny-scripts-reconfig yes
# REDIS COMMANDS RENAMING
#
# Sometimes the Redis server has certain commands, that are needed for Sentinel
# to work correctly, renamed to unguessable strings. This is often the case
# of CONFIG and SLAVEOF in the context of providers that provide Redis as
# a service, and don't want the customers to reconfigure the instances outside
# of the administration console.
#
# In such case it is possible to tell Sentinel to use different command names
# instead of the normal ones. For example if the master "mymaster", and the
# associated slaves, have "CONFIG" all renamed to "GUESSME", I could use:
#
# sentinel rename-command mymaster CONFIG GUESSME
#
# After such configuration is set, every time Sentinel would use CONFIG it will
# use GUESSME instead. Note that there is no actual need to respect the command
# case, so writing "config guessme" is the same in the example above.
#
# SENTINEL SET can also be used in order to perform this configuration at runtime.

View File

@ -144,7 +144,7 @@ endif
REDIS_SERVER_NAME=redis-server
REDIS_SENTINEL_NAME=redis-sentinel
REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o
REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o
REDIS_CLI_NAME=redis-cli
REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o siphash.o crc16.o
REDIS_BENCHMARK_NAME=redis-benchmark

View File

@ -433,7 +433,7 @@ int aeProcessEvents(aeEventLoop *eventLoop, int flags)
* before replying to a client. */
int invert = fe->mask & AE_BARRIER;
/* Note the "fe->mask & mask & ..." code: maybe an already
/* Note the "fe->mask & mask & ..." code: maybe an already
* processed event removed an element that fired and we still
* didn't processed, so we check if the event is still valid.
*
@ -485,7 +485,7 @@ int aeWait(int fd, int mask, long long milliseconds) {
if ((retval = poll(&pfd, 1, milliseconds))== 1) {
if (pfd.revents & POLLIN) retmask |= AE_READABLE;
if (pfd.revents & POLLOUT) retmask |= AE_WRITABLE;
if (pfd.revents & POLLERR) retmask |= AE_WRITABLE;
if (pfd.revents & POLLERR) retmask |= AE_WRITABLE;
if (pfd.revents & POLLHUP) retmask |= AE_WRITABLE;
return retmask;
} else {

View File

@ -228,7 +228,7 @@ static void killAppendOnlyChild(void) {
void stopAppendOnly(void) {
serverAssert(server.aof_state != AOF_OFF);
flushAppendOnlyFile(1);
aof_fsync(server.aof_fd);
redis_fsync(server.aof_fd);
close(server.aof_fd);
server.aof_fd = -1;
@ -261,7 +261,7 @@ int startAppendOnly(void) {
serverLog(LL_WARNING,"AOF was enabled but there is already a child process saving an RDB file on disk. An AOF background was scheduled to start when possible.");
} else {
/* If there is a pending AOF rewrite, we need to switch it off and
* start a new one: the old one cannot be reused becuase it is not
* start a new one: the old one cannot be reused because it is not
* accumulating the AOF buffer. */
if (server.aof_child_pid != -1) {
serverLog(LL_WARNING,"AOF was enabled but there is already an AOF rewriting in background. Stopping background AOF and starting a rewrite now.");
@ -476,10 +476,10 @@ void flushAppendOnlyFile(int force) {
/* Perform the fsync if needed. */
if (server.aof_fsync == AOF_FSYNC_ALWAYS) {
/* aof_fsync is defined as fdatasync() for Linux in order to avoid
/* redis_fsync is defined as fdatasync() for Linux in order to avoid
* flushing metadata. */
latencyStartMonitor(latency);
aof_fsync(server.aof_fd); /* Let's try to get this data on the disk */
redis_fsync(server.aof_fd); /* Let's try to get this data on the disk */
latencyEndMonitor(latency);
latencyAddSampleIfNeeded("aof-fsync-always",latency);
server.aof_last_fsync = server.unixtime;
@ -645,7 +645,7 @@ struct client *createFakeClient(void) {
c->obuf_soft_limit_reached_time = 0;
c->watched_keys = listCreate();
c->peerid = NULL;
listSetFreeMethod(c->reply,decrRefCountVoid);
listSetFreeMethod(c->reply,freeClientReplyValue);
listSetDupMethod(c->reply,dupClientReplyValue);
initClientMultiState(c);
return c;
@ -683,7 +683,7 @@ int loadAppendOnlyFile(char *filename) {
exit(1);
}
/* Handle a zero-length AOF file as a special case. An emtpy AOF file
/* Handle a zero-length AOF file as a special case. An empty AOF file
* is a valid AOF because an empty server with AOF enabled will create
* a zero length file at startup, that will remain like that if no write
* operation is received. */
@ -1221,7 +1221,6 @@ int rewriteAppendOnlyFileRio(rio *aof) {
dictIterator *di = NULL;
dictEntry *de;
size_t processed = 0;
long long now = mstime();
int j;
for (j = 0; j < server.dbnum; j++) {
@ -1247,9 +1246,6 @@ int rewriteAppendOnlyFileRio(rio *aof) {
expiretime = getExpire(db,&key);
/* If this key is already expired skip it */
if (expiretime != -1 && expiretime < now) continue;
/* Save the key and associated value */
if (o->type == OBJ_STRING) {
/* Emit a SET command */
@ -1322,7 +1318,7 @@ int rewriteAppendOnlyFile(char *filename) {
rioInitWithFile(&aof,fp);
if (server.aof_rewrite_incremental_fsync)
rioSetAutoSync(&aof,AOF_AUTOSYNC_BYTES);
rioSetAutoSync(&aof,REDIS_AUTOSYNC_BYTES);
if (server.aof_use_rdb_preamble) {
int error;
@ -1690,7 +1686,7 @@ void backgroundRewriteDoneHandler(int exitcode, int bysignal) {
oldfd = server.aof_fd;
server.aof_fd = newfd;
if (server.aof_fsync == AOF_FSYNC_ALWAYS)
aof_fsync(newfd);
redis_fsync(newfd);
else if (server.aof_fsync == AOF_FSYNC_EVERYSEC)
aof_background_fsync(newfd);
server.aof_selected_db = -1; /* Make sure SELECT is re-issued */
@ -1717,7 +1713,7 @@ void backgroundRewriteDoneHandler(int exitcode, int bysignal) {
"Background AOF rewrite signal handler took %lldus", ustime()-now);
} else if (!bysignal && exitcode != 0) {
/* SIGUSR1 is whitelisted, so we have a way to kill a child without
* tirggering an error conditon. */
* tirggering an error condition. */
if (bysignal != SIGUSR1)
server.aof_lastbgrewrite_status = C_ERR;
serverLog(LL_WARNING,

View File

@ -16,7 +16,7 @@
* pthread_mutex_t myvar_mutex;
* atomicSet(myvar,12345);
*
* If atomic primitives are availble (tested in config.h) the mutex
* If atomic primitives are available (tested in config.h) the mutex
* is not used.
*
* Never use return value from the macros, instead use the AtomicGetIncr()

View File

@ -187,7 +187,7 @@ void *bioProcessBackgroundJobs(void *arg) {
if (type == BIO_CLOSE_FILE) {
close((long)job->arg1);
} else if (type == BIO_AOF_FSYNC) {
aof_fsync((long)job->arg1);
redis_fsync((long)job->arg1);
} else if (type == BIO_LAZY_FREE) {
/* What we free changes depending on what arguments are set:
* arg1 -> free the object at pointer.

View File

@ -918,7 +918,7 @@ void bitfieldCommand(client *c) {
struct bitfieldOp *ops = NULL; /* Array of ops to execute at end. */
int owtype = BFOVERFLOW_WRAP; /* Overflow type. */
int readonly = 1;
size_t higest_write_offset = 0;
size_t highest_write_offset = 0;
for (j = 2; j < c->argc; j++) {
int remargs = c->argc-j-1; /* Remaining args other than current. */
@ -968,8 +968,8 @@ void bitfieldCommand(client *c) {
if (opcode != BITFIELDOP_GET) {
readonly = 0;
if (higest_write_offset < bitoffset + bits - 1)
higest_write_offset = bitoffset + bits - 1;
if (highest_write_offset < bitoffset + bits - 1)
highest_write_offset = bitoffset + bits - 1;
/* INCRBY and SET require another argument. */
if (getLongLongFromObjectOrReply(c,c->argv[j+3],&i64,NULL) != C_OK){
zfree(ops);
@ -999,7 +999,7 @@ void bitfieldCommand(client *c) {
/* Lookup by making room up to the farest bit reached by
* this operation. */
if ((o = lookupStringForBitCommand(c,
higest_write_offset)) == NULL) return;
highest_write_offset)) == NULL) return;
}
addReplyMultiBulkLen(c,numops);

View File

@ -370,32 +370,46 @@ void handleClientsBlockedOnKeys(void) {
if (receiver->btype != BLOCKED_STREAM) continue;
streamID *gt = dictFetchValue(receiver->bpop.keys,
rl->key);
if (s->last_id.ms > gt->ms ||
(s->last_id.ms == gt->ms &&
s->last_id.seq > gt->seq))
{
/* If we blocked in the context of a consumer
* group, we need to resolve the group and update the
* last ID the client is blocked for: this is needed
* because serving other clients in the same consumer
* group will alter the "last ID" of the consumer
* group, and clients blocked in a consumer group are
* always blocked for the ">" ID: we need to deliver
* only new messages and avoid unblocking the client
* otherwise. */
streamCG *group = NULL;
if (receiver->bpop.xread_group) {
group = streamLookupCG(s,
receiver->bpop.xread_group->ptr);
/* If the group was not found, send an error
* to the consumer. */
if (!group) {
addReplyError(receiver,
"-NOGROUP the consumer group this client "
"was blocked on no longer exists");
unblockClient(receiver);
continue;
} else {
*gt = group->last_id;
}
}
if (streamCompareID(&s->last_id, gt) > 0) {
streamID start = *gt;
start.seq++; /* Can't overflow, it's an uint64_t */
/* If we blocked in the context of a consumer
* group, we need to resolve the group and
* consumer here. */
streamCG *group = NULL;
/* Lookup the consumer for the group, if any. */
streamConsumer *consumer = NULL;
if (receiver->bpop.xread_group) {
group = streamLookupCG(s,
receiver->bpop.xread_group->ptr);
/* In theory if the group is not found we
* just perform the read without the group,
* but actually when the group, or the key
* itself is deleted (triggering the removal
* of the group), we check for blocked clients
* and send them an error. */
}
int noack = 0;
if (group) {
consumer = streamLookupConsumer(group,
receiver->bpop.xread_consumer->ptr,
1);
noack = receiver->bpop.xread_group_noack;
}
/* Emit the two elements sub-array consisting of
@ -412,7 +426,7 @@ void handleClientsBlockedOnKeys(void) {
};
streamReplyWithRange(receiver,s,&start,NULL,
receiver->bpop.xread_count,
0, group, consumer, 0, &pi);
0, group, consumer, noack, &pi);
/* Note that after we unblock the client, 'gt'
* and other receiver->bpop stuff are no longer

View File

@ -3100,7 +3100,7 @@ void clusterHandleSlaveFailover(void) {
(unsigned long long) myself->configEpoch);
}
/* Take responsability for the cluster slots. */
/* Take responsibility for the cluster slots. */
clusterFailoverReplaceYourMaster();
} else {
clusterLogCantFailover(CLUSTER_CANT_FAILOVER_WAITING_VOTES);
@ -3151,11 +3151,11 @@ void clusterHandleSlaveMigration(int max_slaves) {
!nodeTimedOut(mymaster->slaves[j])) okslaves++;
if (okslaves <= server.cluster_migration_barrier) return;
/* Step 3: Idenitfy a candidate for migration, and check if among the
/* Step 3: Identify a candidate for migration, and check if among the
* masters with the greatest number of ok slaves, I'm the one with the
* smallest node ID (the "candidate slave").
*
* Note: this means that eventually a replica migration will occurr
* Note: this means that eventually a replica migration will occur
* since slaves that are reachable again always have their FAIL flag
* cleared, so eventually there must be a candidate. At the same time
* this does not mean that there are no race conditions possible (two
@ -3736,7 +3736,7 @@ void clusterCloseAllSlots(void) {
* -------------------------------------------------------------------------- */
/* The following are defines that are only used in the evaluation function
* and are based on heuristics. Actaully the main point about the rejoin and
* and are based on heuristics. Actually the main point about the rejoin and
* writable delay is that they should be a few orders of magnitude larger
* than the network latency. */
#define CLUSTER_MAX_REJOIN_DELAY 5000
@ -4178,27 +4178,27 @@ void clusterCommand(client *c) {
if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
const char *help[] = {
"addslots <slot> [slot ...] -- Assign slots to current node.",
"bumpepoch -- Advance the cluster config epoch.",
"count-failure-reports <node-id> -- Return number of failure reports for <node-id>.",
"countkeysinslot <slot> - Return the number of keys in <slot>.",
"delslots <slot> [slot ...] -- Delete slots information from current node.",
"failover [force|takeover] -- Promote current slave node to being a master.",
"forget <node-id> -- Remove a node from the cluster.",
"getkeysinslot <slot> <count> -- Return key names stored by current node in a slot.",
"flushslots -- Delete current node own slots information.",
"info - Return onformation about the cluster.",
"keyslot <key> -- Return the hash slot for <key>.",
"meet <ip> <port> [bus-port] -- Connect nodes into a working cluster.",
"myid -- Return the node id.",
"nodes -- Return cluster configuration seen by node. Output format:",
"ADDSLOTS <slot> [slot ...] -- Assign slots to current node.",
"BUMPEPOCH -- Advance the cluster config epoch.",
"COUNT-failure-reports <node-id> -- Return number of failure reports for <node-id>.",
"COUNTKEYSINSLOT <slot> - Return the number of keys in <slot>.",
"DELSLOTS <slot> [slot ...] -- Delete slots information from current node.",
"FAILOVER [force|takeover] -- Promote current slave node to being a master.",
"FORGET <node-id> -- Remove a node from the cluster.",
"GETKEYSINSLOT <slot> <count> -- Return key names stored by current node in a slot.",
"FLUSHSLOTS -- Delete current node own slots information.",
"INFO - Return onformation about the cluster.",
"KEYSLOT <key> -- Return the hash slot for <key>.",
"MEET <ip> <port> [bus-port] -- Connect nodes into a working cluster.",
"MYID -- Return the node id.",
"NODES -- Return cluster configuration seen by node. Output format:",
" <id> <ip:port> <flags> <master> <pings> <pongs> <epoch> <link> <slot> ... <slot>",
"replicate <node-id> -- Configure current node as slave to <node-id>.",
"reset [hard|soft] -- Reset current node (default: soft).",
"set-config-epoch <epoch> - Set config epoch of current node.",
"setslot <slot> (importing|migrating|stable|node <node-id>) -- Set slot state.",
"slaves <node-id> -- Return <node-id> slaves.",
"slots -- Return information about slots range mappings. Each range is made of:",
"REPLICATE <node-id> -- Configure current node as slave to <node-id>.",
"RESET [hard|soft] -- Reset current node (default: soft).",
"SET-config-epoch <epoch> - Set config epoch of current node.",
"SETSLOT <slot> (importing|migrating|stable|node <node-id>) -- Set slot state.",
"SLAVES <node-id> -- Return <node-id> slaves.",
"SLOTS -- Return information about slots range mappings. Each range is made of:",
" start, end, master and replicas IP addresses, ports and ids",
NULL
};
@ -4746,8 +4746,7 @@ NULL
clusterReset(hard);
addReply(c,shared.ok);
} else {
addReplyErrorFormat(c, "Unknown subcommand or wrong number of arguments for '%s'. Try CLUSTER HELP",
(char*)c->argv[1]->ptr);
addReplySubcommandSyntaxError(c);
return;
}
}
@ -4835,15 +4834,39 @@ void dumpCommand(client *c) {
/* RESTORE key ttl serialized-value [REPLACE] */
void restoreCommand(client *c) {
long long ttl;
long long ttl, lfu_freq = -1, lru_idle = -1, lru_clock = -1;
rio payload;
int j, type, replace = 0;
int j, type, replace = 0, absttl = 0;
robj *obj;
/* Parse additional options */
for (j = 4; j < c->argc; j++) {
int additional = c->argc-j-1;
if (!strcasecmp(c->argv[j]->ptr,"replace")) {
replace = 1;
} else if (!strcasecmp(c->argv[j]->ptr,"absttl")) {
absttl = 1;
} else if (!strcasecmp(c->argv[j]->ptr,"idletime") && additional >= 1 &&
lfu_freq == -1)
{
if (getLongLongFromObjectOrReply(c,c->argv[j+1],&lru_idle,NULL)
!= C_OK) return;
if (lru_idle < 0) {
addReplyError(c,"Invalid IDLETIME value, must be >= 0");
return;
}
lru_clock = LRU_CLOCK();
j++; /* Consume additional arg. */
} else if (!strcasecmp(c->argv[j]->ptr,"freq") && additional >= 1 &&
lru_idle == -1)
{
if (getLongLongFromObjectOrReply(c,c->argv[j+1],&lfu_freq,NULL)
!= C_OK) return;
if (lfu_freq < 0 || lfu_freq > 255) {
addReplyError(c,"Invalid FREQ value, must be >= 0 and <= 255");
return;
}
j++; /* Consume additional arg. */
} else {
addReply(c,shared.syntaxerr);
return;
@ -4884,7 +4907,11 @@ void restoreCommand(client *c) {
/* Create the key and set the TTL if any */
dbAdd(c->db,c->argv[1],obj);
if (ttl) setExpire(c,c->db,c->argv[1],mstime()+ttl);
if (ttl) {
if (!absttl) ttl+=mstime();
setExpire(c,c->db,c->argv[1],ttl);
}
objectSetLRUOrLFU(obj,lfu_freq,lru_idle,lru_clock);
signalModifiedKey(c->db,c->argv[1]);
addReply(c,shared.ok);
server.dirty++;
@ -5557,7 +5584,7 @@ void clusterRedirectClient(client *c, clusterNode *n, int hashslot, int error_co
if (error_code == CLUSTER_REDIR_CROSS_SLOT) {
addReplySds(c,sdsnew("-CROSSSLOT Keys in request don't hash to the same slot\r\n"));
} else if (error_code == CLUSTER_REDIR_UNSTABLE) {
/* The request spawns mutliple keys in the same slot,
/* The request spawns multiple keys in the same slot,
* but the slot is not "stable" currently as there is
* a migration or import in progress. */
addReplySds(c,sdsnew("-TRYAGAIN Multiple keys request during rehashing of slot\r\n"));
@ -5589,7 +5616,11 @@ void clusterRedirectClient(client *c, clusterNode *n, int hashslot, int error_co
* longer handles, the client is sent a redirection error, and the function
* returns 1. Otherwise 0 is returned and no operation is performed. */
int clusterRedirectBlockedClientIfNeeded(client *c) {
if (c->flags & CLIENT_BLOCKED && c->btype == BLOCKED_LIST) {
if (c->flags & CLIENT_BLOCKED &&
(c->btype == BLOCKED_LIST ||
c->btype == BLOCKED_ZSET ||
c->btype == BLOCKED_STREAM))
{
dictEntry *de;
dictIterator *di;

View File

@ -243,7 +243,7 @@ union clusterMsgData {
#define CLUSTER_PROTO_VER 1 /* Cluster bus protocol version. */
typedef struct {
char sig[4]; /* Siganture "RCmb" (Redis Cluster message bus). */
char sig[4]; /* Signature "RCmb" (Redis Cluster message bus). */
uint32_t totlen; /* Total length of this message */
uint16_t ver; /* Protocol version, currently set to 1. */
uint16_t port; /* TCP base port number. */

View File

@ -390,7 +390,7 @@ void loadServerConfigFromString(char *config) {
}
} else if (!strcasecmp(argv[0],"masterauth") && argc == 2) {
zfree(server.masterauth);
server.masterauth = zstrdup(argv[1]);
server.masterauth = argv[1][0] ? zstrdup(argv[1]) : NULL;
} else if (!strcasecmp(argv[0],"slave-serve-stale-data") && argc == 2) {
if ((server.repl_serve_stale_data = yesnotoi(argv[1])) == -1) {
err = "argument must be 'yes' or 'no'"; goto loaderr;
@ -431,6 +431,11 @@ void loadServerConfigFromString(char *config) {
if ((server.active_defrag_enabled = yesnotoi(argv[1])) == -1) {
err = "argument must be 'yes' or 'no'"; goto loaderr;
}
if (server.active_defrag_enabled) {
#ifndef HAVE_DEFRAG
err = "active defrag can't be enabled without proper jemalloc support"; goto loaderr;
#endif
}
} else if (!strcasecmp(argv[0],"daemonize") && argc == 2) {
if ((server.daemonize = yesnotoi(argv[1])) == -1) {
err = "argument must be 'yes' or 'no'"; goto loaderr;
@ -483,6 +488,13 @@ void loadServerConfigFromString(char *config) {
yesnotoi(argv[1])) == -1) {
err = "argument must be 'yes' or 'no'"; goto loaderr;
}
} else if (!strcasecmp(argv[0],"rdb-save-incremental-fsync") &&
argc == 2)
{
if ((server.rdb_save_incremental_fsync =
yesnotoi(argv[1])) == -1) {
err = "argument must be 'yes' or 'no'"; goto loaderr;
}
} else if (!strcasecmp(argv[0],"aof-load-truncated") && argc == 2) {
if ((server.aof_load_truncated = yesnotoi(argv[1])) == -1) {
err = "argument must be 'yes' or 'no'"; goto loaderr;
@ -496,7 +508,7 @@ void loadServerConfigFromString(char *config) {
err = "Password is longer than CONFIG_AUTHPASS_MAX_LEN";
goto loaderr;
}
server.requirepass = zstrdup(argv[1]);
server.requirepass = argv[1][0] ? zstrdup(argv[1]) : NULL;
} else if (!strcasecmp(argv[0],"pidfile") && argc == 2) {
zfree(server.pidfile);
server.pidfile = zstrdup(argv[1]);
@ -1021,6 +1033,8 @@ void configSetCommand(client *c) {
"cluster-slave-no-failover",server.cluster_slave_no_failover) {
} config_set_bool_field(
"aof-rewrite-incremental-fsync",server.aof_rewrite_incremental_fsync) {
} config_set_bool_field(
"rdb-save-incremental-fsync",server.rdb_save_incremental_fsync) {
} config_set_bool_field(
"aof-load-truncated",server.aof_load_truncated) {
} config_set_bool_field(
@ -1062,15 +1076,15 @@ void configSetCommand(client *c) {
/* Numerical fields.
* config_set_numerical_field(name,var,min,max) */
} config_set_numerical_field(
"tcp-keepalive",server.tcpkeepalive,0,LLONG_MAX) {
"tcp-keepalive",server.tcpkeepalive,0,INT_MAX) {
} config_set_numerical_field(
"maxmemory-samples",server.maxmemory_samples,1,LLONG_MAX) {
"maxmemory-samples",server.maxmemory_samples,1,INT_MAX) {
} config_set_numerical_field(
"lfu-log-factor",server.lfu_log_factor,0,LLONG_MAX) {
"lfu-log-factor",server.lfu_log_factor,0,INT_MAX) {
} config_set_numerical_field(
"lfu-decay-time",server.lfu_decay_time,0,LLONG_MAX) {
"lfu-decay-time",server.lfu_decay_time,0,INT_MAX) {
} config_set_numerical_field(
"timeout",server.maxidletime,0,LONG_MAX) {
"timeout",server.maxidletime,0,INT_MAX) {
} config_set_numerical_field(
"active-defrag-threshold-lower",server.active_defrag_threshold_lower,0,1000) {
} config_set_numerical_field(
@ -1082,13 +1096,17 @@ void configSetCommand(client *c) {
} config_set_numerical_field(
"active-defrag-cycle-max",server.active_defrag_cycle_max,1,99) {
} config_set_numerical_field(
"active-defrag-max-scan-fields",server.active_defrag_max_scan_fields,1,LLONG_MAX) {
"active-defrag-max-scan-fields",server.active_defrag_max_scan_fields,1,LONG_MAX) {
} config_set_numerical_field(
"auto-aof-rewrite-percentage",server.aof_rewrite_perc,0,LLONG_MAX){
"auto-aof-rewrite-percentage",server.aof_rewrite_perc,0,INT_MAX){
} config_set_numerical_field(
"hash-max-ziplist-entries",server.hash_max_ziplist_entries,0,LLONG_MAX) {
"hash-max-ziplist-entries",server.hash_max_ziplist_entries,0,LONG_MAX) {
} config_set_numerical_field(
"hash-max-ziplist-value",server.hash_max_ziplist_value,0,LLONG_MAX) {
"hash-max-ziplist-value",server.hash_max_ziplist_value,0,LONG_MAX) {
} config_set_numerical_field(
"stream-node-max-bytes",server.stream_node_max_bytes,0,LONG_MAX) {
} config_set_numerical_field(
"stream-node-max-entries",server.stream_node_max_entries,0,LLONG_MAX) {
} config_set_numerical_field(
"stream-node-max-bytes",server.stream_node_max_bytes,0,LLONG_MAX) {
} config_set_numerical_field(
@ -1098,40 +1116,40 @@ void configSetCommand(client *c) {
} config_set_numerical_field(
"list-compress-depth",server.list_compress_depth,0,INT_MAX) {
} config_set_numerical_field(
"set-max-intset-entries",server.set_max_intset_entries,0,LLONG_MAX) {
"set-max-intset-entries",server.set_max_intset_entries,0,LONG_MAX) {
} config_set_numerical_field(
"zset-max-ziplist-entries",server.zset_max_ziplist_entries,0,LLONG_MAX) {
"zset-max-ziplist-entries",server.zset_max_ziplist_entries,0,LONG_MAX) {
} config_set_numerical_field(
"zset-max-ziplist-value",server.zset_max_ziplist_value,0,LLONG_MAX) {
"zset-max-ziplist-value",server.zset_max_ziplist_value,0,LONG_MAX) {
} config_set_numerical_field(
"hll-sparse-max-bytes",server.hll_sparse_max_bytes,0,LLONG_MAX) {
"hll-sparse-max-bytes",server.hll_sparse_max_bytes,0,LONG_MAX) {
} config_set_numerical_field(
"lua-time-limit",server.lua_time_limit,0,LLONG_MAX) {
"lua-time-limit",server.lua_time_limit,0,LONG_MAX) {
} config_set_numerical_field(
"slowlog-log-slower-than",server.slowlog_log_slower_than,0,LLONG_MAX) {
"slowlog-log-slower-than",server.slowlog_log_slower_than,-1,LLONG_MAX) {
} config_set_numerical_field(
"slowlog-max-len",ll,0,LLONG_MAX) {
"slowlog-max-len",ll,0,LONG_MAX) {
/* Cast to unsigned. */
server.slowlog_max_len = (unsigned long)ll;
} config_set_numerical_field(
"latency-monitor-threshold",server.latency_monitor_threshold,0,LLONG_MAX){
} config_set_numerical_field(
"repl-ping-slave-period",server.repl_ping_slave_period,1,LLONG_MAX) {
"repl-ping-slave-period",server.repl_ping_slave_period,1,INT_MAX) {
} config_set_numerical_field(
"repl-timeout",server.repl_timeout,1,LLONG_MAX) {
"repl-timeout",server.repl_timeout,1,INT_MAX) {
} config_set_numerical_field(
"repl-backlog-ttl",server.repl_backlog_time_limit,0,LLONG_MAX) {
"repl-backlog-ttl",server.repl_backlog_time_limit,0,LONG_MAX) {
} config_set_numerical_field(
"repl-diskless-sync-delay",server.repl_diskless_sync_delay,0,LLONG_MAX) {
"repl-diskless-sync-delay",server.repl_diskless_sync_delay,0,INT_MAX) {
} config_set_numerical_field(
"slave-priority",server.slave_priority,0,LLONG_MAX) {
"slave-priority",server.slave_priority,0,INT_MAX) {
} config_set_numerical_field(
"slave-announce-port",server.slave_announce_port,0,65535) {
} config_set_numerical_field(
"min-slaves-to-write",server.repl_min_slaves_to_write,0,LLONG_MAX) {
"min-slaves-to-write",server.repl_min_slaves_to_write,0,INT_MAX) {
refreshGoodSlavesCount();
} config_set_numerical_field(
"min-slaves-max-lag",server.repl_min_slaves_max_lag,0,LLONG_MAX) {
"min-slaves-max-lag",server.repl_min_slaves_max_lag,0,INT_MAX) {
refreshGoodSlavesCount();
} config_set_numerical_field(
"cluster-node-timeout",server.cluster_node_timeout,0,LLONG_MAX) {
@ -1140,17 +1158,17 @@ void configSetCommand(client *c) {
} config_set_numerical_field(
"cluster-announce-bus-port",server.cluster_announce_bus_port,0,65535) {
} config_set_numerical_field(
"cluster-migration-barrier",server.cluster_migration_barrier,0,LLONG_MAX){
"cluster-migration-barrier",server.cluster_migration_barrier,0,INT_MAX){
} config_set_numerical_field(
"cluster-slave-validity-factor",server.cluster_slave_validity_factor,0,LLONG_MAX) {
"cluster-slave-validity-factor",server.cluster_slave_validity_factor,0,INT_MAX) {
} config_set_numerical_field(
"hz",server.hz,0,LLONG_MAX) {
"hz",server.hz,0,INT_MAX) {
/* Hz is more an hint from the user, so we accept values out of range
* but cap them to reasonable values. */
if (server.hz < CONFIG_MIN_HZ) server.hz = CONFIG_MIN_HZ;
if (server.hz > CONFIG_MAX_HZ) server.hz = CONFIG_MAX_HZ;
} config_set_numerical_field(
"watchdog-period",ll,0,LLONG_MAX) {
"watchdog-period",ll,0,INT_MAX) {
if (ll)
enableWatchdog(ll);
else
@ -1347,6 +1365,8 @@ void configGetCommand(client *c) {
server.repl_diskless_sync);
config_get_bool_field("aof-rewrite-incremental-fsync",
server.aof_rewrite_incremental_fsync);
config_get_bool_field("rdb-save-incremental-fsync",
server.rdb_save_incremental_fsync);
config_get_bool_field("aof-load-truncated",
server.aof_load_truncated);
config_get_bool_field("aof-use-rdb-preamble",
@ -2084,6 +2104,7 @@ int rewriteConfig(char *path) {
rewriteConfigClientoutputbufferlimitOption(state);
rewriteConfigNumericalOption(state,"hz",server.hz,CONFIG_DEFAULT_HZ);
rewriteConfigYesNoOption(state,"aof-rewrite-incremental-fsync",server.aof_rewrite_incremental_fsync,CONFIG_DEFAULT_AOF_REWRITE_INCREMENTAL_FSYNC);
rewriteConfigYesNoOption(state,"rdb-save-incremental-fsync",server.rdb_save_incremental_fsync,CONFIG_DEFAULT_RDB_SAVE_INCREMENTAL_FSYNC);
rewriteConfigYesNoOption(state,"aof-load-truncated",server.aof_load_truncated,CONFIG_DEFAULT_AOF_LOAD_TRUNCATED);
rewriteConfigYesNoOption(state,"aof-use-rdb-preamble",server.aof_use_rdb_preamble,CONFIG_DEFAULT_AOF_USE_RDB_PREAMBLE);
rewriteConfigEnumOption(state,"supervised",server.supervised_mode,supervised_mode_enum,SUPERVISED_NONE);
@ -2123,10 +2144,10 @@ void configCommand(client *c) {
if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
const char *help[] = {
"get <pattern> -- Return parameters matching the glob-like <pattern> and their values.",
"set <parameter> <value> -- Set parameter to value.",
"resetstat -- Reset statistics reported by INFO.",
"rewrite -- Rewrite the configuration file.",
"GET <pattern> -- Return parameters matching the glob-like <pattern> and their values.",
"SET <parameter> <value> -- Set parameter to value.",
"RESETSTAT -- Reset statistics reported by INFO.",
"REWRITE -- Rewrite the configuration file.",
NULL
};
addReplyHelp(c, help);
@ -2151,8 +2172,7 @@ NULL
addReply(c,shared.ok);
}
} else {
addReplyErrorFormat(c, "Unknown subcommand or wrong number of arguments for '%s'. Try CONFIG HELP",
(char*)c->argv[1]->ptr);
addReplySubcommandSyntaxError(c);
return;
}
}

View File

@ -87,11 +87,11 @@
#endif
#endif
/* Define aof_fsync to fdatasync() in Linux and fsync() for all the rest */
/* Define redis_fsync to fdatasync() in Linux and fsync() for all the rest */
#ifdef __linux__
#define aof_fsync fdatasync
#define redis_fsync fdatasync
#else
#define aof_fsync fsync
#define redis_fsync fsync
#endif
/* Define rdb_fsync_range to sync_file_range() on Linux, otherwise we use

View File

@ -90,7 +90,7 @@ robj *lookupKey(redisDb *db, robj *key, int flags) {
* LOOKUP_NONE (or zero): no special flags are passed.
* LOOKUP_NOTOUCH: don't alter the last access time of the key.
*
* Note: this function also returns NULL is the key is logically expired
* Note: this function also returns NULL if the key is logically expired
* but still existing, in case this is a slave, since this API is called only
* for read operations. Even if the key expiry is master-driven, we can
* correctly report a key is expired on slaves even if the master is lagging
@ -113,7 +113,7 @@ robj *lookupKeyReadWithFlags(redisDb *db, robj *key, int flags) {
* safety measure, the command invoked is a read-only command, we can
* safely return NULL here, and provide a more consistent behavior
* to clients accessign expired values in a read-only fashion, that
* will say the key as non exisitng.
* will say the key as non existing.
*
* Notably this covers GETs when slaves are used to scale reads. */
if (server.current_client &&
@ -223,6 +223,8 @@ int dbExists(redisDb *db, robj *key) {
* The function makes sure to return keys not already expired. */
robj *dbRandomKey(redisDb *db) {
dictEntry *de;
int maxtries = 100;
int allvolatile = dictSize(db->dict) == dictSize(db->expires);
while(1) {
sds key;
@ -234,6 +236,17 @@ robj *dbRandomKey(redisDb *db) {
key = dictGetKey(de);
keyobj = createStringObject(key,sdslen(key));
if (dictFind(db->expires,key)) {
if (allvolatile && server.masterhost && --maxtries == 0) {
/* If the DB is composed only of keys with an expire set,
* it could happen that all the keys are already logically
* expired in the slave, so the function cannot stop because
* expireIfNeeded() is false, nor it can stop because
* dictGetRandomKey() returns NULL (there are keys to return).
* To prevent the infinite loop we do some tries, but if there
* are the conditions for an infinite loop, eventually we
* return a key name that may be already expired. */
return keyobj;
}
if (expireIfNeeded(db,keyobj)) {
decrRefCount(keyobj);
continue; /* search for another key. This expired. */
@ -305,7 +318,7 @@ robj *dbUnshareStringValue(redisDb *db, robj *key, robj *o) {
* If callback is given the function is called from time to time to
* signal that work is in progress.
*
* The dbnum can be -1 if all teh DBs should be flushed, or the specified
* The dbnum can be -1 if all the DBs should be flushed, or the specified
* DB number if we want to flush only a single Redis database number.
*
* Flags are be EMPTYDB_NO_FLAGS if no special flags are specified or
@ -467,8 +480,7 @@ void existsCommand(client *c) {
int j;
for (j = 1; j < c->argc; j++) {
expireIfNeeded(c->db,c->argv[j]);
if (dbExists(c->db,c->argv[j])) count++;
if (lookupKeyRead(c->db,c->argv[j])) count++;
}
addReplyLongLong(c,count);
}
@ -1173,7 +1185,7 @@ int *getKeysUsingCommandTable(struct redisCommand *cmd,robj **argv, int argc, in
for (j = cmd->firstkey; j <= last; j += cmd->keystep) {
if (j >= argc) {
/* Modules commands, and standard commands with a not fixed number
* of arugments (negative arity parameter) do not have dispatch
* of arguments (negative arity parameter) do not have dispatch
* time arity checks, so we need to handle the case where the user
* passed an invalid number of arguments here. In this case we
* return no keys and expect the command implementation to report
@ -1228,7 +1240,7 @@ int *zunionInterGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *nu
num = atoi(argv[2]->ptr);
/* Sanity check. Don't return any key if the command is going to
* reply with syntax error. */
if (num > (argc-3)) {
if (num < 1 || num > (argc-3)) {
*numkeys = 0;
return NULL;
}
@ -1257,7 +1269,7 @@ int *evalGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys)
num = atoi(argv[2]->ptr);
/* Sanity check. Don't return any key if the command is going to
* reply with syntax error. */
if (num > (argc-3)) {
if (num <= 0 || num > (argc-3)) {
*numkeys = 0;
return NULL;
}
@ -1386,23 +1398,37 @@ int *georadiusGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numk
}
/* XREAD [BLOCK <milliseconds>] [COUNT <count>] [GROUP <groupname> <ttl>]
* [RETRY <milliseconds> <ttl>] STREAMS key_1 key_2 ... key_N
* ID_1 ID_2 ... ID_N */
* STREAMS key_1 key_2 ... key_N ID_1 ID_2 ... ID_N */
int *xreadGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys) {
int i, num, *keys;
int i, num = 0, *keys;
UNUSED(cmd);
/* We need to seek the last argument that contains "STREAMS", because other
* arguments before may contain it (for example the group name). */
/* We need to parse the options of the command in order to seek the first
* "STREAMS" string which is actually the option. This is needed because
* "STREAMS" could also be the name of the consumer group and even the
* name of the stream key. */
int streams_pos = -1;
for (i = 1; i < argc; i++) {
char *arg = argv[i]->ptr;
if (!strcasecmp(arg, "streams")) streams_pos = i;
if (!strcasecmp(arg, "block")) {
i++; /* Skip option argument. */
} else if (!strcasecmp(arg, "count")) {
i++; /* Skip option argument. */
} else if (!strcasecmp(arg, "group")) {
i += 2; /* Skip option argument. */
} else if (!strcasecmp(arg, "noack")) {
/* Nothing to do. */
} else if (!strcasecmp(arg, "streams")) {
streams_pos = i;
break;
} else {
break; /* Syntax error. */
}
}
if (streams_pos != -1) num = argc - streams_pos - 1;
/* Syntax error. */
if (streams_pos == -1 || num % 2 != 0) {
if (streams_pos == -1 || num == 0 || num % 2 != 0) {
*numkeys = 0;
return NULL;
}
@ -1410,7 +1436,7 @@ int *xreadGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys)
there are also the IDs, one per key. */
keys = zmalloc(sizeof(int) * num);
for (i = streams_pos+1; i < argc; i++) keys[i-streams_pos-1] = i;
for (i = streams_pos+1; i < argc-num; i++) keys[i-streams_pos-1] = i;
*numkeys = num;
return keys;
}

View File

@ -285,26 +285,26 @@ void computeDatasetDigest(unsigned char *final) {
void debugCommand(client *c) {
if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
const char *help[] = {
"assert -- Crash by assertion failed.",
"change-repl-id -- Change the replication IDs of the instance. Dangerous, should be used only for testing the replication subsystem.",
"crash-and-recover <milliseconds> -- Hard crash and restart after <milliseconds> delay.",
"digest -- Outputs an hex signature representing the current DB content.",
"htstats <dbid> -- Return hash table statistics of the specified Redis database.",
"htstats-key <key> -- Like htstats but for the hash table stored as key's value.",
"loadaof -- Flush the AOF buffers on disk and reload the AOF in memory.",
"lua-always-replicate-commands (0|1) -- Setting it to 1 makes Lua replication defaulting to replicating single commands, without the script having to enable effects replication.",
"object <key> -- Show low level info about key and associated value.",
"panic -- Crash the server simulating a panic.",
"populate <count> [prefix] [size] -- Create <count> string keys named key:<num>. If a prefix is specified is used instead of the 'key' prefix.",
"reload -- Save the RDB on disk and reload it back in memory.",
"restart -- Graceful restart: save config, db, restart.",
"sdslen <key> -- Show low level SDS string info representing key and value.",
"segfault -- Crash the server with sigsegv.",
"set-active-expire (0|1) -- Setting it to 0 disables expiring keys in background when they are not accessed (otherwise the Redis behavior). Setting it to 1 reenables back the default.",
"sleep <seconds> -- Stop the server for <seconds>. Decimals allowed.",
"structsize -- Return the size of different Redis core C structures.",
"ziplist <key> -- Show low level info about the ziplist encoding.",
"error <string> -- Return a Redis protocol error with <string> as message. Useful for clients unit tests to simulate Redis errors.",
"ASSERT -- Crash by assertion failed.",
"CHANGE-REPL-ID -- Change the replication IDs of the instance. Dangerous, should be used only for testing the replication subsystem.",
"CRASH-AND-RECOVER <milliseconds> -- Hard crash and restart after <milliseconds> delay.",
"DIGEST -- Output a hex signature representing the current DB content.",
"ERROR <string> -- Return a Redis protocol error with <string> as message. Useful for clients unit tests to simulate Redis errors.",
"HTSTATS <dbid> -- Return hash table statistics of the specified Redis database.",
"HTSTATS-KEY <key> -- Like htstats but for the hash table stored as key's value.",
"LOADAOF -- Flush the AOF buffers on disk and reload the AOF in memory.",
"LUA-ALWAYS-REPLICATE-COMMANDS <0|1> -- Setting it to 1 makes Lua replication defaulting to replicating single commands, without the script having to enable effects replication.",
"OBJECT <key> -- Show low level info about key and associated value.",
"PANIC -- Crash the server simulating a panic.",
"POPULATE <count> [prefix] [size] -- Create <count> string keys named key:<num>. If a prefix is specified is used instead of the 'key' prefix.",
"RELOAD -- Save the RDB on disk and reload it back in memory.",
"RESTART -- Graceful restart: save config, db, restart.",
"SDSLEN <key> -- Show low level SDS string info representing key and value.",
"SEGFAULT -- Crash the server with sigsegv.",
"SET-ACTIVE-EXPIRE <0|1> -- Setting it to 0 disables expiring keys in background when they are not accessed (otherwise the Redis behavior). Setting it to 1 reenables back the default.",
"SLEEP <seconds> -- Stop the server for <seconds>. Decimals allowed.",
"STRUCTSIZE -- Return the size of different Redis core C structures.",
"ZIPLIST <key> -- Show low level info about the ziplist encoding.",
NULL
};
addReplyHelp(c, help);
@ -348,7 +348,7 @@ NULL
serverLog(LL_WARNING,"DB reloaded by DEBUG RELOAD");
addReply(c,shared.ok);
} else if (!strcasecmp(c->argv[1]->ptr,"loadaof")) {
if (server.aof_state == AOF_ON) flushAppendOnlyFile(1);
if (server.aof_state != AOF_OFF) flushAppendOnlyFile(1);
emptyDb(-1,EMPTYDB_NO_FLAGS,NULL);
if (loadAppendOnlyFile(server.aof_filename) != C_OK) {
addReply(c,shared.err);
@ -582,8 +582,7 @@ NULL
clearReplicationId2();
addReply(c,shared.ok);
} else {
addReplyErrorFormat(c, "Unknown subcommand or wrong number of arguments for '%s'. Try DEBUG HELP",
(char*)c->argv[1]->ptr);
addReplySubcommandSyntaxError(c);
return;
}
}
@ -1077,7 +1076,7 @@ void sigsegvHandler(int sig, siginfo_t *info, void *secret) {
infostring = genRedisInfoString("all");
serverLogRaw(LL_WARNING|LL_RAW, infostring);
serverLogRaw(LL_WARNING|LL_RAW, "\n------ CLIENT LIST OUTPUT ------\n");
clients = getAllClientsInfoString();
clients = getAllClientsInfoString(-1);
serverLogRaw(LL_WARNING|LL_RAW, clients);
sdsfree(infostring);
sdsfree(clients);

View File

@ -592,6 +592,171 @@ long defragSet(redisDb *db, dictEntry *kde) {
return defragged;
}
/* Defrag callback for radix tree iterator, called for each node,
* used in order to defrag the nodes allocations. */
int defragRaxNode(raxNode **noderef) {
raxNode *newnode = activeDefragAlloc(*noderef);
if (newnode) {
*noderef = newnode;
return 1;
}
return 0;
}
/* returns 0 if no more work needs to be been done, and 1 if time is up and more work is needed. */
int scanLaterStraemListpacks(robj *ob, unsigned long *cursor, long long endtime, long long *defragged) {
static unsigned char last[sizeof(streamID)];
raxIterator ri;
long iterations = 0;
if (ob->type != OBJ_STREAM || ob->encoding != OBJ_ENCODING_STREAM) {
*cursor = 0;
return 0;
}
stream *s = ob->ptr;
raxStart(&ri,s->rax);
if (*cursor == 0) {
/* if cursor is 0, we start new iteration */
defragRaxNode(&s->rax->head);
/* assign the iterator node callback before the seek, so that the
* initial nodes that are processed till the first item are covered */
ri.node_cb = defragRaxNode;
raxSeek(&ri,"^",NULL,0);
} else {
/* if cursor is non-zero, we seek to the static 'last' */
if (!raxSeek(&ri,">", last, sizeof(last))) {
*cursor = 0;
return 0;
}
/* assign the iterator node callback after the seek, so that the
* initial nodes that are processed till now aren't covered */
ri.node_cb = defragRaxNode;
}
(*cursor)++;
while (raxNext(&ri)) {
void *newdata = activeDefragAlloc(ri.data);
if (newdata)
raxSetData(ri.node, ri.data=newdata), (*defragged)++;
if (++iterations > 16) {
if (ustime() > endtime) {
serverAssert(ri.key_len==sizeof(last));
memcpy(last,ri.key,ri.key_len);
raxStop(&ri);
return 1;
}
iterations = 0;
}
}
raxStop(&ri);
*cursor = 0;
return 0;
}
/* optional callback used defrag each rax element (not including the element pointer itself) */
typedef void *(raxDefragFunction)(raxIterator *ri, void *privdata, long *defragged);
/* defrag radix tree including:
* 1) rax struct
* 2) rax nodes
* 3) rax entry data (only if defrag_data is specified)
* 4) call a callback per element, and allow the callback to return a new pointer for the element */
long defragRadixTree(rax **raxref, int defrag_data, raxDefragFunction *element_cb, void *element_cb_data) {
long defragged = 0;
raxIterator ri;
rax* rax;
if ((rax = activeDefragAlloc(*raxref)))
defragged++, *raxref = rax;
rax = *raxref;
raxStart(&ri,rax);
ri.node_cb = defragRaxNode;
defragRaxNode(&rax->head);
raxSeek(&ri,"^",NULL,0);
while (raxNext(&ri)) {
void *newdata = NULL;
if (element_cb)
newdata = element_cb(&ri, element_cb_data, &defragged);
if (defrag_data && !newdata)
newdata = activeDefragAlloc(ri.data);
if (newdata)
raxSetData(ri.node, ri.data=newdata), defragged++;
}
raxStop(&ri);
return defragged;
}
typedef struct {
streamCG *cg;
streamConsumer *c;
} PendingEntryContext;
void* defragStreamConsumerPendingEntry(raxIterator *ri, void *privdata, long *defragged) {
UNUSED(defragged);
PendingEntryContext *ctx = privdata;
streamNACK *nack = ri->data, *newnack;
nack->consumer = ctx->c; /* update nack pointer to consumer */
newnack = activeDefragAlloc(nack);
if (newnack) {
/* update consumer group pointer to the nack */
void *prev;
raxInsert(ctx->cg->pel, ri->key, ri->key_len, newnack, &prev);
serverAssert(prev==nack);
/* note: we don't increment 'defragged' that's done by the caller */
}
return newnack;
}
void* defragStreamConsumer(raxIterator *ri, void *privdata, long *defragged) {
streamConsumer *c = ri->data;
streamCG *cg = privdata;
void *newc = activeDefragAlloc(c);
if (newc) {
/* note: we don't increment 'defragged' that's done by the caller */
c = newc;
}
sds newsds = activeDefragSds(c->name);
if (newsds)
(*defragged)++, c->name = newsds;
if (c->pel) {
PendingEntryContext pel_ctx = {cg, c};
*defragged += defragRadixTree(&c->pel, 0, defragStreamConsumerPendingEntry, &pel_ctx);
}
return newc; /* returns NULL if c was not defragged */
}
void* defragStreamConsumerGroup(raxIterator *ri, void *privdata, long *defragged) {
streamCG *cg = ri->data;
UNUSED(privdata);
if (cg->consumers)
*defragged += defragRadixTree(&cg->consumers, 0, defragStreamConsumer, cg);
if (cg->pel)
*defragged += defragRadixTree(&cg->pel, 0, NULL, NULL);
return NULL;
}
long defragStream(redisDb *db, dictEntry *kde) {
long defragged = 0;
robj *ob = dictGetVal(kde);
serverAssert(ob->type == OBJ_STREAM && ob->encoding == OBJ_ENCODING_STREAM);
stream *s = ob->ptr, *news;
/* handle the main struct */
if ((news = activeDefragAlloc(s)))
defragged++, ob->ptr = s = news;
if (raxSize(s->rax) > server.active_defrag_max_scan_fields) {
rax *newrax = activeDefragAlloc(s->rax);
if (newrax)
defragged++, s->rax = newrax;
defragLater(db, kde);
} else
defragged += defragRadixTree(&s->rax, 1, NULL, NULL);
if (s->cgroups)
defragged += defragRadixTree(&s->cgroups, 1, defragStreamConsumerGroup, NULL);
return defragged;
}
/* for each key we scan in the main dict, this function will attempt to defrag
* all the various pointers it has. Returns a stat of how many pointers were
* moved. */
@ -660,6 +825,8 @@ long defragKey(redisDb *db, dictEntry *de) {
} else {
serverPanic("Unknown hash encoding");
}
} else if (ob->type == OBJ_STREAM) {
defragged += defragStream(db, de);
} else if (ob->type == OBJ_MODULE) {
/* Currently defragmenting modules private data types
* is not supported. */
@ -680,7 +847,7 @@ void defragScanCallback(void *privdata, const dictEntry *de) {
server.stat_active_defrag_scanned++;
}
/* Defrag scan callback for for each hash table bicket,
/* Defrag scan callback for each hash table bicket,
* used in order to defrag the dictEntry allocations. */
void defragDictBucketCallback(void *privdata, dictEntry **bucketref) {
UNUSED(privdata); /* NOTE: this function is also used by both activeDefragCycle and scanLaterHash, etc. don't use privdata */
@ -700,9 +867,8 @@ void defragDictBucketCallback(void *privdata, dictEntry **bucketref) {
* or not, a false detection can cause the defragmenter to waste a lot of CPU
* without the possibility of getting any results. */
float getAllocatorFragmentation(size_t *out_frag_bytes) {
size_t resident = server.cron_malloc_stats.allocator_resident;
size_t active = server.cron_malloc_stats.allocator_active;
size_t allocated = server.cron_malloc_stats.allocator_allocated;
size_t resident, active, allocated;
zmalloc_get_allocator_info(&allocated, &active, &resident);
float frag_pct = ((float)active / allocated)*100 - 100;
size_t frag_bytes = active - allocated;
float rss_pct = ((float)resident / allocated)*100 - 100;
@ -728,27 +894,29 @@ long defragOtherGlobals() {
return defragged;
}
unsigned long defragLaterItem(dictEntry *de, unsigned long cursor) {
long defragged = 0;
/* returns 0 more work may or may not be needed (see non-zero cursor),
* and 1 if time is up and more work is needed. */
int defragLaterItem(dictEntry *de, unsigned long *cursor, long long endtime) {
if (de) {
robj *ob = dictGetVal(de);
if (ob->type == OBJ_LIST) {
defragged += scanLaterList(ob);
cursor = 0; /* list has no scan, we must finish it in one go */
server.stat_active_defrag_hits += scanLaterList(ob);
*cursor = 0; /* list has no scan, we must finish it in one go */
} else if (ob->type == OBJ_SET) {
defragged += scanLaterSet(ob, &cursor);
server.stat_active_defrag_hits += scanLaterSet(ob, cursor);
} else if (ob->type == OBJ_ZSET) {
defragged += scanLaterZset(ob, &cursor);
server.stat_active_defrag_hits += scanLaterZset(ob, cursor);
} else if (ob->type == OBJ_HASH) {
defragged += scanLaterHash(ob, &cursor);
server.stat_active_defrag_hits += scanLaterHash(ob, cursor);
} else if (ob->type == OBJ_STREAM) {
return scanLaterStraemListpacks(ob, cursor, endtime, &server.stat_active_defrag_hits);
} else {
cursor = 0; /* object type may have changed since we schedule it for later */
*cursor = 0; /* object type may have changed since we schedule it for later */
}
} else {
cursor = 0; /* object may have been deleted already */
*cursor = 0; /* object may have been deleted already */
}
server.stat_active_defrag_hits += defragged;
return cursor;
return 0;
}
/* returns 0 if no more work needs to be been done, and 1 if time is up and more work is needed. */
@ -788,17 +956,22 @@ int defragLaterStep(redisDb *db, long long endtime) {
dictEntry *de = dictFind(db->dict, current_key);
key_defragged = server.stat_active_defrag_hits;
do {
cursor = defragLaterItem(de, cursor);
int quit = 0;
if (defragLaterItem(de, &cursor, endtime))
quit = 1; /* time is up, we didn't finish all the work */
/* Don't start a new BIG key in this loop, this is because the
* next key can be a list, and scanLaterList must be done in once cycle */
if (!cursor)
quit = 1;
/* Once in 16 scan iterations, 512 pointer reallocations, or 64 fields
* (if we have a lot of pointers in one hash bucket, or rehashing),
* check if we reached the time limit.
* But regardless, don't start a new BIG key in this loop, this is because the
* next key can be a list, and scanLaterList must be done in once cycle */
if (!cursor || (++iterations > 16 ||
* check if we reached the time limit. */
if (quit || (++iterations > 16 ||
server.stat_active_defrag_hits - prev_defragged > 512 ||
server.stat_active_defrag_scanned - prev_scanned > 64)) {
if (!cursor || ustime() > endtime) {
if (quit || ustime() > endtime) {
if(key_defragged != server.stat_active_defrag_hits)
server.stat_active_defrag_key_hits++;
else

View File

@ -327,7 +327,7 @@ int dictReplace(dict *d, void *key, void *val)
dictEntry *entry, *existing, auxentry;
/* Try to add the element. If the key
* does not exists dictAdd will suceed. */
* does not exists dictAdd will succeed. */
entry = dictAddRaw(d,key,&existing);
if (entry) {
dictSetVal(d, entry, val);
@ -705,8 +705,10 @@ unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count) {
* table, there will be no elements in both tables up to
* the current rehashing index, so we jump if possible.
* (this happens when going from big to small table). */
if (i >= d->ht[1].size) i = d->rehashidx;
continue;
if (i >= d->ht[1].size)
i = d->rehashidx;
else
continue;
}
if (i >= d->ht[j].size) continue; /* Out of range for this table. */
dictEntry *he = d->ht[j].table[i];

View File

@ -43,12 +43,12 @@ uint16_t intrev16(uint16_t v);
uint32_t intrev32(uint32_t v);
uint64_t intrev64(uint64_t v);
/* variants of the function doing the actual convertion only if the target
/* variants of the function doing the actual conversion only if the target
* host is big endian */
#if (BYTE_ORDER == LITTLE_ENDIAN)
#define memrev16ifbe(p)
#define memrev32ifbe(p)
#define memrev64ifbe(p)
#define memrev16ifbe(p) ((void)(0))
#define memrev32ifbe(p) ((void)(0))
#define memrev64ifbe(p) ((void)(0))
#define intrev16ifbe(v) (v)
#define intrev32ifbe(v) (v)
#define intrev64ifbe(v) (v)

View File

@ -112,7 +112,7 @@ void activeExpireCycle(int type) {
if (type == ACTIVE_EXPIRE_CYCLE_FAST) {
/* Don't start a fast cycle if the previous cycle did not exit
* for time limt. Also don't repeat a fast cycle for the same period
* for time limit. Also don't repeat a fast cycle for the same period
* as the fast cycle total duration itself. */
if (!timelimit_exit) return;
if (start < last_fast_cycle + ACTIVE_EXPIRE_CYCLE_FAST_DURATION*2) return;

View File

@ -145,7 +145,7 @@ double extractUnitOrReply(client *c, robj *unit) {
/* Input Argument Helper.
* Extract the dinstance from the specified two arguments starting at 'argv'
* that shouldbe in the form: <number> <unit> and return the dinstance in the
* specified unit on success. *conversino is populated with the coefficient
* specified unit on success. *conversions is populated with the coefficient
* to use in order to convert meters to the unit.
*
* On error a value less than zero is returned. */

View File

@ -144,8 +144,8 @@ int geohashEncode(const GeoHashRange *long_range, const GeoHashRange *lat_range,
(longitude - long_range->min) / (long_range->max - long_range->min);
/* convert to fixed point based on the step size */
lat_offset *= (1 << step);
long_offset *= (1 << step);
lat_offset *= (1ULL << step);
long_offset *= (1ULL << step);
hash->bits = interleave64(lat_offset, long_offset);
return 1;
}

View File

@ -975,7 +975,7 @@ struct commandHelp {
"5.0.0" },
{ "XPENDING",
"key group [start end count] [consumer]",
"Return information and entries from a stream conusmer group pending entries list, that are messages fetched but never acknowledged.",
"Return information and entries from a stream consumer group pending entries list, that are messages fetched but never acknowledged.",
14,
"5.0.0" },
{ "XRANGE",

View File

@ -673,7 +673,7 @@ int hllSparseSet(robj *o, long index, uint8_t count) {
end = p + sdslen(o->ptr) - HLL_HDR_SIZE;
first = 0;
prev = NULL; /* Points to previos opcode at the end of the loop. */
prev = NULL; /* Points to previous opcode at the end of the loop. */
next = NULL; /* Points to the next opcode at the end of the loop. */
span = 0;
while(p < end) {
@ -764,7 +764,7 @@ int hllSparseSet(robj *o, long index, uint8_t count) {
* and is either currently represented by a VAL opcode with len > 1,
* by a ZERO opcode with len > 1, or by an XZERO opcode.
*
* In those cases the original opcode must be split into muliple
* In those cases the original opcode must be split into multiple
* opcodes. The worst case is an XZERO split in the middle resuling into
* XZERO - VAL - XZERO, so the resulting sequence max length is
* 5 bytes.
@ -887,7 +887,7 @@ promote: /* Promote to dense representation. */
*
* Note that this in turn means that PFADD will make sure the command
* is propagated to slaves / AOF, so if there is a sparse -> dense
* convertion, it will be performed in all the slaves as well. */
* conversion, it will be performed in all the slaves as well. */
int dense_retval = hllDenseSet(hdr->registers,index,count);
serverAssert(dense_retval == 1);
return dense_retval;

View File

@ -152,7 +152,7 @@ int latencyResetEvent(char *event_to_reset) {
/* ------------------------ Latency reporting (doctor) ---------------------- */
/* Analyze the samples avaialble for a given event and return a structure
/* Analyze the samples available for a given event and return a structure
* populate with different metrics, average, MAD, min, max, and so forth.
* Check latency.h definition of struct latenctStat for more info.
* If the specified event has no elements the structure is populate with
@ -294,7 +294,7 @@ sds createLatencyReport(void) {
/* Potentially commands. */
if (!strcasecmp(event,"command")) {
if (server.slowlog_log_slower_than == 0) {
if (server.slowlog_log_slower_than < 0) {
advise_slowlog_enabled = 1;
advices++;
} else if (server.slowlog_log_slower_than/1000 >

View File

@ -23,10 +23,10 @@ size_t lazyfreeGetPendingObjectsCount(void) {
* the function just returns the number of elements the object is composed of.
*
* Objects composed of single allocations are always reported as having a
* single item even if they are actaully logical composed of multiple
* single item even if they are actually logical composed of multiple
* elements.
*
* For lists the funciton returns the number of elements in the quicklist
* For lists the function returns the number of elements in the quicklist
* representing the list. */
size_t lazyfreeGetFreeEffort(robj *obj) {
if (obj->type == OBJ_LIST) {

View File

@ -291,7 +291,7 @@ int lpEncodeGetType(unsigned char *ele, uint32_t size, unsigned char *intenc, ui
/* Store a reverse-encoded variable length field, representing the length
* of the previous element of size 'l', in the target buffer 'buf'.
* The function returns the number of bytes used to encode it, from
* 1 to 5. If 'buf' is NULL the funciton just returns the number of bytes
* 1 to 5. If 'buf' is NULL the function just returns the number of bytes
* needed in order to encode the backlen. */
unsigned long lpEncodeBacklen(unsigned char *buf, uint64_t l) {
if (l <= 127) {
@ -568,7 +568,7 @@ unsigned char *lpGet(unsigned char *p, int64_t *count, unsigned char *intbuf) {
}
}
/* Insert, delete or replace the specified element 'ele' of lenght 'len' at
/* Insert, delete or replace the specified element 'ele' of length 'len' at
* the specified position 'p', with 'p' being a listpack element pointer
* obtained with lpFirst(), lpLast(), lpIndex(), lpNext(), lpPrev() or
* lpSeek().
@ -710,7 +710,7 @@ unsigned char *lpInsert(unsigned char *lp, unsigned char *ele, uint32_t size, un
return lp;
}
/* Append the specified element 'ele' of lenght 'len' at the end of the
/* Append the specified element 'ele' of length 'len' at the end of the
* listpack. It is implemented in terms of lpInsert(), so the return value is
* the same as lpInsert(). */
unsigned char *lpAppend(unsigned char *lp, unsigned char *ele, uint32_t size) {

123
src/localtime.c Normal file
View File

@ -0,0 +1,123 @@
/*
* Copyright (c) 2018, Salvatore Sanfilippo <antirez at gmail dot com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Redis nor the names of its contributors may be used
* to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <time.h>
/* This is a safe version of localtime() which contains no locks and is
* fork() friendly. Even the _r version of localtime() cannot be used safely
* in Redis. Another thread may be calling localtime() while the main thread
* forks(). Later when the child process calls localtime() again, for instance
* in order to log something to the Redis log, it may deadlock: in the copy
* of the address space of the forked process the lock will never be released.
*
* This function takes the timezone 'tz' as argument, and the 'dst' flag is
* used to check if daylight saving time is currently in effect. The caller
* of this function should obtain such information calling tzset() ASAP in the
* main() function to obtain the timezone offset from the 'timezone' global
* variable. To obtain the daylight information, if it is currently active or not,
* one trick is to call localtime() in main() ASAP as well, and get the
* information from the tm_isdst field of the tm structure. However the daylight
* time may switch in the future for long running processes, so this information
* should be refreshed at safe times.
*
* Note that this function does not work for dates < 1/1/1970, it is solely
* designed to work with what time(NULL) may return, and to support Redis
* logging of the dates, it's not really a complete implementation. */
static int is_leap_year(time_t year) {
if (year % 4) return 0; /* A year not divisible by 4 is not leap. */
else if (year % 100) return 1; /* If div by 4 and not 100 is surely leap. */
else if (year % 400) return 0; /* If div by 100 *and* 400 is not leap. */
else return 1; /* If div by 100 and not by 400 is leap. */
}
void nolocks_localtime(struct tm *tmp, time_t t, time_t tz, int dst) {
const time_t secs_min = 60;
const time_t secs_hour = 3600;
const time_t secs_day = 3600*24;
t -= tz; /* Adjust for timezone. */
t += 3600*dst; /* Adjust for daylight time. */
time_t days = t / secs_day; /* Days passed since epoch. */
time_t seconds = t % secs_day; /* Remaining seconds. */
tmp->tm_isdst = dst;
tmp->tm_hour = seconds / secs_hour;
tmp->tm_min = (seconds % secs_hour) / secs_min;
tmp->tm_sec = (seconds % secs_hour) % secs_min;
/* 1/1/1970 was a Thursday, that is, day 4 from the POV of the tm structure
* where sunday = 0, so to calculate the day of the week we have to add 4
* and take the modulo by 7. */
tmp->tm_wday = (days+4)%7;
/* Calculate the current year. */
tmp->tm_year = 1970;
while(1) {
/* Leap years have one day more. */
time_t days_this_year = 365 + is_leap_year(tmp->tm_year);
if (days_this_year > days) break;
days -= days_this_year;
tmp->tm_year++;
}
tmp->tm_yday = days; /* Number of day of the current year. */
/* We need to calculate in which month and day of the month we are. To do
* so we need to skip days according to how many days there are in each
* month, and adjust for the leap year that has one more day in February. */
int mdays[12] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
mdays[1] += is_leap_year(tmp->tm_year);
tmp->tm_mon = 0;
while(days >= mdays[tmp->tm_mon]) {
days -= mdays[tmp->tm_mon];
tmp->tm_mon++;
}
tmp->tm_mday = days+1; /* Add 1 since our 'days' is zero-based. */
tmp->tm_year -= 1900; /* Surprisingly tm_year is year-1900. */
}
#ifdef LOCALTIME_TEST_MAIN
#include <stdio.h>
int main(void) {
/* Obtain timezone and daylight info. */
tzset(); /* Now 'timezome' global is populated. */
time_t t = time(NULL);
struct tm *aux = localtime(&t);
int daylight_active = aux->tm_isdst;
struct tm tm;
char buf[1024];
nolocks_localtime(&tm,t,timezone,daylight_active);
strftime(buf,sizeof(buf),"%d %b %H:%M:%S",&tm);
printf("[timezone: %d, dl: %d] %s\n", (int)timezone, (int)daylight_active, buf);
}
#endif

View File

@ -2239,6 +2239,9 @@ int RM_HashSet(RedisModuleKey *key, int flags, ...) {
* to avoid a useless copy. */
if (flags & REDISMODULE_HASH_CFIELDS)
low_flags |= HASH_SET_TAKE_FIELD;
robj *argv[2] = {field,value};
hashTypeTryConversion(key->value,argv,0,1);
updated += hashTypeSet(key->value, field->ptr, value->ptr, low_flags);
/* If CFIELDS is active, SDS string ownership is now of hashTypeSet(),
@ -2709,9 +2712,9 @@ RedisModuleCallReply *RM_Call(RedisModuleCtx *ctx, const char *cmdname, const ch
sds proto = sdsnewlen(c->buf,c->bufpos);
c->bufpos = 0;
while(listLength(c->reply)) {
sds o = listNodeValue(listFirst(c->reply));
clientReplyBlock *o = listNodeValue(listFirst(c->reply));
proto = sdscatsds(proto,o);
proto = sdscatlen(proto,o->buf,o->used);
listDelNode(c->reply,listFirst(c->reply));
}
reply = moduleCreateCallReplyFromProto(ctx,proto);
@ -3396,7 +3399,7 @@ void RM_LogRaw(RedisModule *module, const char *levelstr, const char *fmt, va_li
*
* If the specified log level is invalid, verbose is used by default.
* There is a fixed limit to the length of the log line this function is able
* to emit, this limti is not specified but is guaranteed to be more than
* to emit, this limit is not specified but is guaranteed to be more than
* a few lines of text.
*/
void RM_Log(RedisModuleCtx *ctx, const char *levelstr, const char *fmt, ...) {
@ -3827,7 +3830,7 @@ void moduleReleaseGIL(void) {
*
* Notification callback gets executed with a redis context that can not be
* used to send anything to the client, and has the db number where the event
* occured as its selected db number.
* occurred as its selected db number.
*
* Notice that it is not necessary to enable norifications in redis.conf for
* module notifications to work.
@ -3884,7 +3887,7 @@ void moduleNotifyKeyspaceEvent(int type, const char *event, robj *key, int dbid)
}
}
/* Unsubscribe any notification subscirbers this module has upon unloading */
/* Unsubscribe any notification subscribers this module has upon unloading */
void moduleUnsubscribeNotifications(RedisModule *module) {
listIter li;
listNode *ln;
@ -4362,7 +4365,7 @@ void moduleInitModulesSystem(void) {
* because the server must be fully initialized before loading modules.
*
* The function aborts the server on errors, since to start with missing
* modules is not considered sane: clients may rely on the existance of
* modules is not considered sane: clients may rely on the existence of
* given commands, loading AOF also may need some modules to exist, and
* if this instance is a slave, it must understand commands from master. */
void moduleLoadFromQueue(void) {
@ -4499,7 +4502,15 @@ int moduleUnload(sds name) {
* MODULE LOAD <path> [args...] */
void moduleCommand(client *c) {
char *subcmd = c->argv[1]->ptr;
if (c->argc == 2 && !strcasecmp(subcmd,"help")) {
const char *help[] = {
"LIST -- Return a list of loaded modules.",
"LOAD <path> [arg ...] -- Load a module library from <path>.",
"UNLOAD <name> -- Unload a module.",
NULL
};
addReplyHelp(c, help);
} else
if (!strcasecmp(subcmd,"load") && c->argc >= 3) {
robj **argv = NULL;
int argc = 0;
@ -4548,7 +4559,8 @@ void moduleCommand(client *c) {
}
dictReleaseIterator(di);
} else {
addReply(c,shared.syntaxerr);
addReplySubcommandSyntaxError(c);
return;
}
}

View File

@ -1,5 +1,5 @@
# gendoc.rb -- Converts the top-comments inside module.c to modules API
# reference documentaiton in markdown format.
# reference documentation in markdown format.
# Convert the C comment to markdown
def markdown(s)

View File

@ -56,11 +56,14 @@ size_t getStringObjectSdsUsedMemory(robj *o) {
/* Client.reply list dup and free methods. */
void *dupClientReplyValue(void *o) {
return sdsdup(o);
clientReplyBlock *old = o;
clientReplyBlock *buf = zmalloc(sizeof(clientReplyBlock) + old->size);
memcpy(buf, o, sizeof(clientReplyBlock) + old->size);
return buf;
}
void freeClientReplyValue(void *o) {
sdsfree(o);
zfree(o);
}
int listMatchObjects(void *a, void *b) {
@ -75,6 +78,8 @@ void linkClient(client *c) {
* this way removing the client in unlinkClient() will not require
* a linear scan, but just a constant time operation. */
c->client_list_node = listLast(server.clients);
uint64_t id = htonu64(c->id);
raxInsert(server.clients_index,(unsigned char*)&id,sizeof(id),c,NULL);
}
client *createClient(int fd) {
@ -138,6 +143,7 @@ client *createClient(int fd) {
c->bpop.target = NULL;
c->bpop.xread_group = NULL;
c->bpop.xread_consumer = NULL;
c->bpop.xread_group_noack = 0;
c->bpop.numreplicas = 0;
c->bpop.reploffset = 0;
c->woff = 0;
@ -237,25 +243,35 @@ int _addReplyToBuffer(client *c, const char *s, size_t len) {
void _addReplyStringToList(client *c, const char *s, size_t len) {
if (c->flags & CLIENT_CLOSE_AFTER_REPLY) return;
if (listLength(c->reply) == 0) {
sds node = sdsnewlen(s,len);
listAddNodeTail(c->reply,node);
c->reply_bytes += len;
} else {
listNode *ln = listLast(c->reply);
sds tail = listNodeValue(ln);
listNode *ln = listLast(c->reply);
clientReplyBlock *tail = ln? listNodeValue(ln): NULL;
/* Append to this object when possible. If tail == NULL it was
* set via addDeferredMultiBulkLength(). */
if (tail && sdslen(tail)+len <= PROTO_REPLY_CHUNK_BYTES) {
tail = sdscatlen(tail,s,len);
listNodeValue(ln) = tail;
c->reply_bytes += len;
} else {
sds node = sdsnewlen(s,len);
listAddNodeTail(c->reply,node);
c->reply_bytes += len;
}
/* Note that 'tail' may be NULL even if we have a tail node, becuase when
* addDeferredMultiBulkLength() is used, it sets a dummy node to NULL just
* fo fill it later, when the size of the bulk length is set. */
/* Append to tail string when possible. */
if (tail) {
/* Copy the part we can fit into the tail, and leave the rest for a
* new node */
size_t avail = tail->size - tail->used;
size_t copy = avail >= len? len: avail;
memcpy(tail->buf + tail->used, s, copy);
tail->used += copy;
s += copy;
len -= copy;
}
if (len) {
/* Create a new node, make sure it is allocated to at
* least PROTO_REPLY_CHUNK_BYTES */
size_t size = len < PROTO_REPLY_CHUNK_BYTES? PROTO_REPLY_CHUNK_BYTES: len;
tail = zmalloc(size + sizeof(clientReplyBlock));
/* take over the allocation's internal fragmentation */
tail->size = zmalloc_usable(tail) - sizeof(clientReplyBlock);
tail->used = len;
memcpy(tail->buf, s, len);
listAddNodeTail(c->reply, tail);
c->reply_bytes += tail->size;
}
asyncCloseClientOnOutputBufferLimitReached(c);
}
@ -326,11 +342,30 @@ void addReplyErrorLength(client *c, const char *s, size_t len) {
if (!len || s[0] != '-') addReplyString(c,"-ERR ",5);
addReplyString(c,s,len);
addReplyString(c,"\r\n",2);
if (c->flags & CLIENT_MASTER) {
/* Sometimes it could be normal that a slave replies to a master with
* an error and this function gets called. Actually the error will never
* be sent because addReply*() against master clients has no effect...
* A notable example is:
*
* EVAL 'redis.call("incr",KEYS[1]); redis.call("nonexisting")' 1 x
*
* Where the master must propagate the first change even if the second
* will produce an error. However it is useful to log such events since
* they are rare and may hint at errors in a script or a bug in Redis. */
if (c->flags & (CLIENT_MASTER|CLIENT_SLAVE)) {
char* to = c->flags & CLIENT_MASTER? "master": "slave";
char* from = c->flags & CLIENT_MASTER? "slave": "master";
char *cmdname = c->lastcmd ? c->lastcmd->name : "<unknown>";
serverLog(LL_WARNING,"== CRITICAL == This slave is sending an error "
"to its master: '%s' after processing the command "
"'%s'", s, cmdname);
serverLog(LL_WARNING,"== CRITICAL == This %s is sending an error "
"to its %s: '%s' after processing the command "
"'%s'", from, to, s, cmdname);
/* Here we want to panic because when a master is sending an
* error to some slave in the context of replication, this can
* only create some kind of offset or data desynchronization. Better
* to catch it ASAP and crash instead of continuing. */
if (c->flags & CLIENT_SLAVE)
serverPanic("Continuing is unsafe: replication protocol violation.");
}
}
@ -387,26 +422,41 @@ void *addDeferredMultiBulkLength(client *c) {
/* Populate the length object and try gluing it to the next chunk. */
void setDeferredMultiBulkLength(client *c, void *node, long length) {
listNode *ln = (listNode*)node;
sds len, next;
clientReplyBlock *next;
char lenstr[128];
size_t lenstr_len = sprintf(lenstr, "*%ld\r\n", length);
/* Abort when *node is NULL: when the client should not accept writes
* we return NULL in addDeferredMultiBulkLength() */
if (node == NULL) return;
serverAssert(!listNodeValue(ln));
len = sdscatprintf(sdsnewlen("*",1),"%ld\r\n",length);
listNodeValue(ln) = len;
c->reply_bytes += sdslen(len);
if (ln->next != NULL) {
next = listNodeValue(ln->next);
/* Only glue when the next node is non-NULL (an sds in this case) */
if (next != NULL) {
len = sdscatsds(len,next);
listDelNode(c->reply,ln->next);
listNodeValue(ln) = len;
/* No need to update c->reply_bytes: we are just moving the same
* amount of bytes from one node to another. */
}
/* Normally we fill this dummy NULL node, added by addDeferredMultiBulkLength(),
* with a new buffer structure containing the protocol needed to specify
* the length of the array following. However sometimes when there is
* little memory to move, we may instead remove this NULL node, and prefix
* our protocol in the node immediately after to it, in order to save a
* write(2) syscall later. Conditions needed to do it:
*
* - The next node is non-NULL,
* - It has enough room already allocated
* - And not too large (avoid large memmove) */
if (ln->next != NULL && (next = listNodeValue(ln->next)) &&
next->size - next->used >= lenstr_len &&
next->used < PROTO_REPLY_CHUNK_BYTES * 4) {
memmove(next->buf + lenstr_len, next->buf, next->used);
memcpy(next->buf, lenstr, lenstr_len);
next->used += lenstr_len;
listDelNode(c->reply,ln);
} else {
/* Create a new node */
clientReplyBlock *buf = zmalloc(lenstr_len + sizeof(clientReplyBlock));
/* Take over the allocation's internal fragmentation */
buf->size = zmalloc_usable(buf) - sizeof(clientReplyBlock);
buf->used = lenstr_len;
memcpy(buf->buf, lenstr, lenstr_len);
listNodeValue(ln) = buf;
c->reply_bytes += buf->size;
}
asyncCloseClientOnOutputBufferLimitReached(c);
}
@ -560,11 +610,24 @@ void addReplyHelp(client *c, const char **help) {
setDeferredMultiBulkLength(c,blenp,blen);
}
/* Add a suggestive error reply.
* This function is typically invoked by from commands that support
* subcommands in response to an unknown subcommand or argument error. */
void addReplySubcommandSyntaxError(client *c) {
sds cmd = sdsnew((char*) c->argv[0]->ptr);
sdstoupper(cmd);
addReplyErrorFormat(c,
"Unknown subcommand or wrong number of arguments for '%s'. Try %s HELP.",
(char*)c->argv[1]->ptr,cmd);
sdsfree(cmd);
}
/* Copy 'src' client output buffers into 'dst' client output buffers.
* The function takes care of freeing the old output buffers of the
* destination client. */
void copyClientOutputBuffer(client *dst, client *src) {
listRelease(dst->reply);
dst->sentlen = 0;
dst->reply = listDup(src->reply);
memcpy(dst->buf,src->buf,src->bufpos);
dst->bufpos = src->bufpos;
@ -720,6 +783,8 @@ void unlinkClient(client *c) {
if (c->fd != -1) {
/* Remove from the list of active clients. */
if (c->client_list_node) {
uint64_t id = htonu64(c->id);
raxRemove(server.clients_index,(unsigned char*)&id,sizeof(id),NULL);
listDelNode(server.clients,c->client_list_node);
c->client_list_node = NULL;
}
@ -864,12 +929,21 @@ void freeClientsInAsyncFreeQueue(void) {
}
}
/* Return a client by ID, or NULL if the client ID is not in the set
* of registered clients. Note that "fake clients", created with -1 as FD,
* are not registered clients. */
client *lookupClientByID(uint64_t id) {
id = htonu64(id);
client *c = raxFind(server.clients_index,(unsigned char*)&id,sizeof(id));
return (c == raxNotFound) ? NULL : c;
}
/* Write data in output buffers to client. Return C_OK if the client
* is still valid after the call, C_ERR if it was freed. */
int writeToClient(int fd, client *c, int handler_installed) {
ssize_t nwritten = 0, totwritten = 0;
size_t objlen;
sds o;
clientReplyBlock *o;
while(clientHasPendingReplies(c)) {
if (c->bufpos > 0) {
@ -886,23 +960,24 @@ int writeToClient(int fd, client *c, int handler_installed) {
}
} else {
o = listNodeValue(listFirst(c->reply));
objlen = sdslen(o);
objlen = o->used;
if (objlen == 0) {
c->reply_bytes -= o->size;
listDelNode(c->reply,listFirst(c->reply));
continue;
}
nwritten = write(fd, o + c->sentlen, objlen - c->sentlen);
nwritten = write(fd, o->buf + c->sentlen, objlen - c->sentlen);
if (nwritten <= 0) break;
c->sentlen += nwritten;
totwritten += nwritten;
/* If we fully sent the object on head go to the next one */
if (c->sentlen == objlen) {
c->reply_bytes -= o->size;
listDelNode(c->reply,listFirst(c->reply));
c->sentlen = 0;
c->reply_bytes -= objlen;
/* If there are no longer objects in the list, we expect
* the count of reply bytes to be exactly zero. */
if (listLength(c->reply) == 0)
@ -1039,7 +1114,7 @@ void resetClient(client *c) {
* with the error and close the connection. */
int processInlineBuffer(client *c) {
char *newline;
int argc, j;
int argc, j, linefeed_chars = 1;
sds *argv, aux;
size_t querylen;
@ -1057,7 +1132,7 @@ int processInlineBuffer(client *c) {
/* Handle the \r\n case. */
if (newline && newline != c->querybuf && *(newline-1) == '\r')
newline--;
newline--, linefeed_chars++;
/* Split the input buffer up to the \r\n */
querylen = newline-(c->querybuf);
@ -1077,7 +1152,7 @@ int processInlineBuffer(client *c) {
c->repl_ack_time = server.unixtime;
/* Leave data after the first line of the query in the buffer */
sdsrange(c->querybuf,querylen+2,-1);
sdsrange(c->querybuf,querylen+linefeed_chars,-1);
/* Setup argv array on client structure */
if (argc) {
@ -1493,6 +1568,7 @@ sds catClientInfoString(sds s, client *client) {
*p++ = 'S';
}
if (client->flags & CLIENT_MASTER) *p++ = 'M';
if (client->flags & CLIENT_PUBSUB) *p++ = 'P';
if (client->flags & CLIENT_MULTI) *p++ = 'x';
if (client->flags & CLIENT_BLOCKED) *p++ = 'b';
if (client->flags & CLIENT_DIRTY_CAS) *p++ = 'd';
@ -1531,7 +1607,7 @@ sds catClientInfoString(sds s, client *client) {
client->lastcmd ? client->lastcmd->name : "NULL");
}
sds getAllClientsInfoString(void) {
sds getAllClientsInfoString(int type) {
listNode *ln;
listIter li;
client *client;
@ -1540,6 +1616,7 @@ sds getAllClientsInfoString(void) {
listRewind(server.clients,&li);
while ((ln = listNext(&li)) != NULL) {
client = listNodeValue(ln);
if (type != -1 && getClientType(client) != type) continue;
o = catClientInfoString(o,client);
o = sdscatlen(o,"\n",1);
}
@ -1553,22 +1630,40 @@ void clientCommand(client *c) {
if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
const char *help[] = {
"getname -- Return the name of the current connection.",
"kill <ip:port> -- Kill connection made from <ip:port>.",
"id -- Return the ID of the current connection.",
"getname -- Return the name of the current connection.",
"kill <ip:port> -- Kill connection made from <ip:port>.",
"kill <option> <value> [option value ...] -- Kill connections. Options are:",
" addr <ip:port> -- Kill connection made from <ip:port>.",
" type (normal|master|slave|pubsub) -- Kill connections by type.",
" skipme (yes|no) -- Skip killing current connection (default: yes).",
"list -- Return information about client connections.",
"pause <timeout> -- Suspend all Redis clients for <timout> milliseconds.",
"reply (on|off|skip) -- Control the replies sent to the current connection.",
"setname <name> -- Assign the name <name> to the current connection.",
" addr <ip:port> -- Kill connection made from <ip:port>",
" type (normal|master|slave|pubsub) -- Kill connections by type.",
" skipme (yes|no) -- Skip killing current connection (default: yes).",
"list [options ...] -- Return information about client connections. Options:",
" type (normal|master|slave|pubsub) -- Return clients of specified type.",
"pause <timeout> -- Suspend all Redis clients for <timout> milliseconds.",
"reply (on|off|skip) -- Control the replies sent to the current connection.",
"setname <name> -- Assign the name <name> to the current connection.",
"unblock <clientid> [TIMEOUT|ERROR] -- Unblock the specified blocked client.",
NULL
};
addReplyHelp(c, help);
} else if (!strcasecmp(c->argv[1]->ptr,"list") && c->argc == 2) {
} else if (!strcasecmp(c->argv[1]->ptr,"id") && c->argc == 2) {
/* CLIENT ID */
addReplyLongLong(c,c->id);
} else if (!strcasecmp(c->argv[1]->ptr,"list")) {
/* CLIENT LIST */
sds o = getAllClientsInfoString();
int type = -1;
if (c->argc == 4 && !strcasecmp(c->argv[2]->ptr,"type")) {
type = getClientTypeByName(c->argv[3]->ptr);
if (type == -1) {
addReplyErrorFormat(c,"Unknown client type '%s'",
(char*) c->argv[3]->ptr);
return;
}
} else if (c->argc != 2) {
addReply(c,shared.syntaxerr);
return;
}
sds o = getAllClientsInfoString(type);
addReplyBulkCBuffer(c,o,sdslen(o));
sdsfree(o);
} else if (!strcasecmp(c->argv[1]->ptr,"reply") && c->argc == 3) {
@ -1671,6 +1766,38 @@ NULL
/* If this client has to be closed, flag it as CLOSE_AFTER_REPLY
* only after we queued the reply to its output buffers. */
if (close_this_client) c->flags |= CLIENT_CLOSE_AFTER_REPLY;
} else if (!strcasecmp(c->argv[1]->ptr,"unblock") && (c->argc == 3 ||
c->argc == 4))
{
/* CLIENT UNBLOCK <id> [timeout|error] */
long long id;
int unblock_error = 0;
if (c->argc == 4) {
if (!strcasecmp(c->argv[3]->ptr,"timeout")) {
unblock_error = 0;
} else if (!strcasecmp(c->argv[3]->ptr,"error")) {
unblock_error = 1;
} else {
addReplyError(c,
"CLIENT UNBLOCK reason should be TIMEOUT or ERROR");
return;
}
}
if (getLongLongFromObjectOrReply(c,c->argv[2],&id,NULL)
!= C_OK) return;
struct client *target = lookupClientByID(id);
if (target && target->flags & CLIENT_BLOCKED) {
if (unblock_error)
addReplyError(target,
"-UNBLOCKED client unblocked via CLIENT UNBLOCK");
else
replyToBlockedClientTimedOut(target);
unblockClient(target);
addReply(c,shared.cone);
} else {
addReply(c,shared.czero);
}
} else if (!strcasecmp(c->argv[1]->ptr,"setname") && c->argc == 3) {
int j, len = sdslen(c->argv[2]->ptr);
char *p = c->argv[2]->ptr;
@ -1821,10 +1948,7 @@ void rewriteClientCommandArgument(client *c, int i, robj *newval) {
* the caller wishes. The main usage of this function currently is
* enforcing the client output length limits. */
unsigned long getClientOutputBufferMemoryUsage(client *c) {
unsigned long list_item_size = sizeof(listNode)+5;
/* The +5 above means we assume an sds16 hdr, may not be true
* but is not going to be a problem. */
unsigned long list_item_size = sizeof(listNode) + sizeof(clientReplyBlock);
return c->reply_bytes + (list_item_size*listLength(c->reply));
}

View File

@ -29,8 +29,8 @@
#include "server.h"
/* This file implements keyspace events notification via Pub/Sub ad
* described at http://redis.io/topics/keyspace-events. */
/* This file implements keyspace events notification via Pub/Sub and
* described at https://redis.io/topics/notifications. */
/* Turn a string representing notification classes into an integer
* representing notification classes flags xored.

View File

@ -123,9 +123,25 @@ robj *createStringObject(const char *ptr, size_t len) {
return createRawStringObject(ptr,len);
}
robj *createStringObjectFromLongLong(long long value) {
/* Create a string object from a long long value. When possible returns a
* shared integer object, or at least an integer encoded one.
*
* If valueobj is non zero, the function avoids returning a a shared
* integer, because the object is going to be used as value in the Redis key
* space (for instance when the INCR command is used), so we want LFU/LRU
* values specific for each key. */
robj *createStringObjectFromLongLongWithOptions(long long value, int valueobj) {
robj *o;
if (value >= 0 && value < OBJ_SHARED_INTEGERS) {
if (server.maxmemory == 0 ||
!(server.maxmemory_policy & MAXMEMORY_FLAG_NO_SHARED_INTEGERS))
{
/* If the maxmemory policy permits, we can still return shared integers
* even if valueobj is true. */
valueobj = 0;
}
if (value >= 0 && value < OBJ_SHARED_INTEGERS && valueobj == 0) {
incrRefCount(shared.integers[value]);
o = shared.integers[value];
} else {
@ -140,6 +156,20 @@ robj *createStringObjectFromLongLong(long long value) {
return o;
}
/* Wrapper for createStringObjectFromLongLongWithOptions() always demanding
* to create a shared object if possible. */
robj *createStringObjectFromLongLong(long long value) {
return createStringObjectFromLongLongWithOptions(value,0);
}
/* Wrapper for createStringObjectFromLongLongWithOptions() avoiding a shared
* object when LFU/LRU info are needed, that is, when the object is used
* as a value in the key space, and Redis is configured to evict based on
* LFU/LRU. */
robj *createStringObjectFromLongLongForValue(long long value) {
return createStringObjectFromLongLongWithOptions(value,1);
}
/* Create a string object from a long double. If humanfriendly is non-zero
* it does not use exponential format and trims trailing zeroes at the end,
* however this results in loss of precision. Otherwise exp format is used
@ -715,7 +745,7 @@ char *strEncoding(int encoding) {
* size of a radix tree that is used to store Stream IDs.
*
* Note: to guess the size of the radix tree is not trivial, so we
* approximate it considering 128 bytes of data overhead for each
* approximate it considering 16 bytes of data overhead for each
* key (the ID), and then adding the number of bare nodes, plus some
* overhead due by the data and child pointers. This secret recipe
* was obtained by checking the average radix tree created by real
@ -874,6 +904,7 @@ size_t objectComputeSize(robj *o, size_t sample_size) {
* structures and the PEL memory usage. */
raxIterator cri;
raxStart(&cri,cg->consumers);
raxSeek(&cri,"^",NULL,0);
while(raxNext(&cri)) {
streamConsumer *consumer = cri.data;
asize += sizeof(*consumer);
@ -1136,6 +1167,32 @@ sds getMemoryDoctorReport(void) {
return s;
}
/* Set the object LRU/LFU depending on server.maxmemory_policy.
* The lfu_freq arg is only relevant if policy is MAXMEMORY_FLAG_LFU.
* The lru_idle and lru_clock args are only relevant if policy
* is MAXMEMORY_FLAG_LRU.
* Either or both of them may be <0, in that case, nothing is set. */
void objectSetLRUOrLFU(robj *val, long long lfu_freq, long long lru_idle,
long long lru_clock) {
if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
if (lfu_freq >= 0) {
serverAssert(lfu_freq <= 255);
val->lru = (LFUGetTimeInMinutes()<<8) | lfu_freq;
}
} else if (lru_idle >= 0) {
/* Serialized LRU idle time is in seconds. Scale
* according to the LRU clock resolution this Redis
* instance was compiled with (normally 1000 ms, so the
* below statement will expand to lru_idle*1000/1000. */
lru_idle = lru_idle*1000/LRU_CLOCK_RESOLUTION;
val->lru = lru_clock - lru_idle;
/* If the lru field overflows (since LRU it is a wrapping
* clock), the best we can do is to provide the maximum
* representable idle time. */
if (val->lru < 0) val->lru = lru_clock+1;
}
}
/* ======================= The OBJECT and MEMORY commands =================== */
/* This is a helper function for the OBJECT command. We need to lookup keys
@ -1197,7 +1254,7 @@ NULL
* when the key is read or overwritten. */
addReplyLongLong(c,LFUDecrAndReturn(o));
} else {
addReplyErrorFormat(c, "Unknown subcommand or wrong number of arguments for '%s'. Try OBJECT help", (char *)c->argv[1]->ptr);
addReplySubcommandSyntaxError(c);
}
}

View File

@ -327,9 +327,9 @@ void publishCommand(client *c) {
void pubsubCommand(client *c) {
if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
const char *help[] = {
"channels [<pattern>] -- Return the currently active channels matching a pattern (default: all).",
"numpat -- Return number of subscriptions to patterns.",
"numsub [channel-1 .. channel-N] -- Returns the number of subscribers for the specified channels (excluding patterns, default: none).",
"CHANNELS [<pattern>] -- Return the currently active channels matching a pattern (default: all).",
"NUMPAT -- Return number of subscriptions to patterns.",
"NUMSUB [channel-1 .. channel-N] -- Returns the number of subscribers for the specified channels (excluding patterns, default: none).",
NULL
};
addReplyHelp(c, help);
@ -372,7 +372,6 @@ NULL
/* PUBSUB NUMPAT */
addReplyLongLong(c,listLength(server.pubsub_patterns));
} else {
addReplyErrorFormat(c, "Unknown subcommand or wrong number of arguments for '%s'. Try PUBSUB HELP",
(char*)c->argv[1]->ptr);
addReplySubcommandSyntaxError(c);
}
}

View File

@ -1636,7 +1636,7 @@ int quicklistTest(int argc, char *argv[]) {
TEST("add to tail of empty list") {
quicklist *ql = quicklistNew(-2, options[_i]);
quicklistPushTail(ql, "hello", 6);
/* 1 for head and 1 for tail beacuse 1 node = head = tail */
/* 1 for head and 1 for tail because 1 node = head = tail */
ql_verify(ql, 1, 1, 1, 1);
quicklistRelease(ql);
}
@ -1644,7 +1644,7 @@ int quicklistTest(int argc, char *argv[]) {
TEST("add to head of empty list") {
quicklist *ql = quicklistNew(-2, options[_i]);
quicklistPushHead(ql, "hello", 6);
/* 1 for head and 1 for tail beacuse 1 node = head = tail */
/* 1 for head and 1 for tail because 1 node = head = tail */
ql_verify(ql, 1, 1, 1, 1);
quicklistRelease(ql);
}

View File

@ -467,7 +467,7 @@ int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void **
/* If the node we stopped at is a compressed node, we need to
* split it before to continue.
*
* Splitting a compressed node have a few possibile cases.
* Splitting a compressed node have a few possible cases.
* Imagine that the node 'h' we are currently at is a compressed
* node contaning the string "ANNIBALE" (it means that it represents
* nodes A -> N -> N -> I -> B -> A -> L -> E with the only child
@ -749,7 +749,7 @@ int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void **
cp = raxNodeLastChildPtr(trimmed);
memcpy(cp,&postfix,sizeof(postfix));
/* Finish! We don't need to contine with the insertion
/* Finish! We don't need to continue with the insertion
* algorithm for ALGO 2. The key is already inserted. */
rax->numele++;
rax_free(h);
@ -1167,6 +1167,7 @@ void raxStart(raxIterator *it, rax *rt) {
it->key = it->key_static_string;
it->key_max = RAX_ITER_STATIC_LEN;
it->data = NULL;
it->node_cb = NULL;
raxStackInit(&it->stack);
}
@ -1240,6 +1241,10 @@ int raxIteratorNextStep(raxIterator *it, int noup) {
if (!raxIteratorAddChars(it,it->node->data,
it->node->iscompr ? it->node->size : 1)) return 0;
memcpy(&it->node,cp,sizeof(it->node));
/* Call the node callback if any, and replace the node pointer
* if the callback returns true. */
if (it->node_cb && it->node_cb(&it->node))
memcpy(cp,&it->node,sizeof(it->node));
/* For "next" step, stop every time we find a key along the
* way, since the key is lexicograhically smaller compared to
* what follows in the sub-children. */
@ -1292,6 +1297,10 @@ int raxIteratorNextStep(raxIterator *it, int noup) {
raxIteratorAddChars(it,it->node->data+i,1);
if (!raxStackPush(&it->stack,it->node)) return 0;
memcpy(&it->node,cp,sizeof(it->node));
/* Call the node callback if any, and replace the node
* pointer if the callback returns true. */
if (it->node_cb && it->node_cb(&it->node))
memcpy(cp,&it->node,sizeof(it->node));
if (it->node->iskey) {
it->data = raxGetData(it->node);
return 1;
@ -1325,7 +1334,7 @@ int raxSeekGreatest(raxIterator *it) {
/* Like raxIteratorNextStep() but implements an iteration step moving
* to the lexicographically previous element. The 'noup' option has a similar
* effect to the one of raxIteratorPrevSte(). */
* effect to the one of raxIteratorNextStep(). */
int raxIteratorPrevStep(raxIterator *it, int noup) {
if (it->flags & RAX_ITER_EOF) {
return 1;

View File

@ -94,7 +94,7 @@ typedef struct raxNode {
*
* If the node has an associated key (iskey=1) and is not NULL
* (isnull=0), then after the raxNode pointers poiting to the
* childen, an additional value pointer is present (as you can see
* children, an additional value pointer is present (as you can see
* in the representation above as "value-ptr" field).
*/
unsigned char data[];
@ -119,6 +119,21 @@ typedef struct raxStack {
int oom; /* True if pushing into this stack failed for OOM at some point. */
} raxStack;
/* Optional callback used for iterators and be notified on each rax node,
* including nodes not representing keys. If the callback returns true
* the callback changed the node pointer in the iterator structure, and the
* iterator implementation will have to replace the pointer in the radix tree
* internals. This allows the callback to reallocate the node to perform
* very special operations, normally not needed by normal applications.
*
* This callback is used to perform very low level analysis of the radix tree
* structure, scanning each possible node (but the root node), or in order to
* reallocate the nodes to reduce the allocation fragmentation (this is the
* Redis application for this callback).
*
* This is currently only supported in forward iterations (raxNext) */
typedef int (*raxNodeCallback)(raxNode **noderef);
/* Radix tree iterator state is encapsulated into this data structure. */
#define RAX_ITER_STATIC_LEN 128
#define RAX_ITER_JUST_SEEKED (1<<0) /* Iterator was just seeked. Return current
@ -137,6 +152,7 @@ typedef struct raxIterator {
unsigned char key_static_string[RAX_ITER_STATIC_LEN];
raxNode *node; /* Current node. Only for unsafe iteration. */
raxStack stack; /* Stack used for unsafe iteration. */
raxNodeCallback node_cb; /* Optional node callback. Normally set to NULL. */
} raxIterator;
/* A special pointer returned for not found items. */
@ -161,4 +177,8 @@ int raxEOF(raxIterator *it);
void raxShow(rax *rax);
uint64_t raxSize(rax *rax);
/* Internal API. May be used by the node callback in order to access rax nodes
* in a low level way, so this function is exported as well. */
void raxSetData(raxNode *n, void *data);
#endif

View File

@ -288,7 +288,7 @@ void *rdbLoadIntegerObject(rio *rdb, int enctype, int flags, size_t *lenptr) {
memcpy(p,buf,len);
return p;
} else if (encode) {
return createStringObjectFromLongLong(val);
return createStringObjectFromLongLongForValue(val);
} else {
return createObject(OBJ_STRING,sdsfromlonglong(val));
}
@ -1127,13 +1127,9 @@ int rdbSaveRio(rio *rdb, int *error, int flags, rdbSaveInfo *rsi) {
* is currently the largest type we are able to represent in RDB sizes.
* However this does not limit the actual size of the DB to load since
* these sizes are just hints to resize the hash tables. */
uint32_t db_size, expires_size;
db_size = (dictSize(db->dict) <= UINT32_MAX) ?
dictSize(db->dict) :
UINT32_MAX;
expires_size = (dictSize(db->expires) <= UINT32_MAX) ?
dictSize(db->expires) :
UINT32_MAX;
uint64_t db_size, expires_size;
db_size = dictSize(db->dict);
expires_size = dictSize(db->expires);
if (rdbSaveType(rdb,RDB_OPCODE_RESIZEDB) == -1) goto werr;
if (rdbSaveLen(rdb,db_size) == -1) goto werr;
if (rdbSaveLen(rdb,expires_size) == -1) goto werr;
@ -1241,6 +1237,10 @@ int rdbSave(char *filename, rdbSaveInfo *rsi) {
}
rioInitWithFile(&rdb,fp);
if (server.rdb_save_incremental_fsync)
rioSetAutoSync(&rdb,REDIS_AUTOSYNC_BYTES);
if (rdbSaveRio(&rdb,&error,RDB_SAVE_NONE,rsi) == C_ERR) {
errno = error;
goto werr;
@ -1658,7 +1658,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) {
if (first == NULL) {
/* Serialized listpacks should never be empty, since on
* deletion we should remove the radix tree key if the
* resulting listpack is emtpy. */
* resulting listpack is empty. */
rdbExitReportCorruptRDB("Empty listpack inside stream");
}
@ -1867,11 +1867,9 @@ int rdbLoadRio(rio *rdb, rdbSaveInfo *rsi, int loading_aof) {
}
/* Key-specific attributes, set by opcodes before the key type. */
long long expiretime = -1, now = mstime();
long long lru_idle = -1, lfu_freq = -1, expiretime = -1, now = mstime();
long long lru_clock = LRU_CLOCK();
uint64_t lru_idle = -1;
int lfu_freq = -1;
while(1) {
robj *key, *val;
@ -1899,7 +1897,9 @@ int rdbLoadRio(rio *rdb, rdbSaveInfo *rsi, int loading_aof) {
continue; /* Read next opcode. */
} else if (type == RDB_OPCODE_IDLE) {
/* IDLE: LRU idle time. */
if ((lru_idle = rdbLoadLen(rdb,NULL)) == RDB_LENERR) goto eoferr;
uint64_t qword;
if ((qword = rdbLoadLen(rdb,NULL)) == RDB_LENERR) goto eoferr;
lru_idle = qword;
continue; /* Read next opcode. */
} else if (type == RDB_OPCODE_EOF) {
/* EOF: End of file, exit the main loop. */
@ -2018,20 +2018,9 @@ int rdbLoadRio(rio *rdb, rdbSaveInfo *rsi, int loading_aof) {
/* Set the expire time if needed */
if (expiretime != -1) setExpire(NULL,db,key,expiretime);
if (lfu_freq != -1) {
val->lru = (LFUGetTimeInMinutes()<<8) | lfu_freq;
} else {
/* LRU idle time loaded from RDB is in seconds. Scale
* according to the LRU clock resolution this Redis
* instance was compiled with (normaly 1000 ms, so the
* below statement will expand to lru_idle*1000/1000. */
lru_idle = lru_idle*1000/LRU_CLOCK_RESOLUTION;
val->lru = lru_clock - lru_idle;
/* If the lru field overflows (since LRU it is a wrapping
* clock), the best we can do is to provide the maxium
* representable idle time. */
if (val->lru < 0) val->lru = lru_clock+1;
}
/* Set usage information (for eviction). */
objectSetLRUOrLFU(val,lfu_freq,lru_idle,lru_clock);
/* Decrement the key refcount since dbAdd() will take its
* own reference. */
@ -2110,7 +2099,7 @@ void backgroundSaveDoneHandlerDisk(int exitcode, int bysignal) {
latencyEndMonitor(latency);
latencyAddSampleIfNeeded("rdb-unlink-temp-file",latency);
/* SIGUSR1 is whitelisted, so we have a way to kill a child without
* tirggering an error conditon. */
* tirggering an error condition. */
if (bysignal != SIGUSR1)
server.lastbgsave_status = C_ERR;
}
@ -2147,7 +2136,7 @@ void backgroundSaveDoneHandlerSocket(int exitcode, int bysignal) {
* in error state.
*
* If the process returned an error, consider the list of slaves that
* can continue to be emtpy, so that it's just a special case of the
* can continue to be empty, so that it's just a special case of the
* normal code path. */
ok_slaves = zmalloc(sizeof(uint64_t)); /* Make space for the count. */
ok_slaves[0] = 0;

View File

@ -129,6 +129,8 @@ int rdbLoadType(rio *rdb);
int rdbSaveTime(rio *rdb, time_t t);
time_t rdbLoadTime(rio *rdb);
int rdbSaveLen(rio *rdb, uint64_t len);
int rdbSaveMillisecondTime(rio *rdb, long long t);
long long rdbLoadMillisecondTime(rio *rdb, int rdbver);
uint64_t rdbLoadLen(rio *rdb, int *isencoded);
int rdbLoadLenByRef(rio *rdb, int *isencoded, uint64_t *lenptr);
int rdbSaveObjectType(rio *rdb, robj *o);

View File

@ -34,7 +34,6 @@
void createSharedObjects(void);
void rdbLoadProgressCallback(rio *r, const void *buf, size_t len);
long long rdbLoadMillisecondTime(rio *rdb);
int rdbCheckMode = 0;
struct {
@ -224,7 +223,7 @@ int redis_check_rdb(char *rdbfilename, FILE *fp) {
/* EXPIRETIME_MS: milliseconds precision expire times introduced
* with RDB v3. Like EXPIRETIME but no with more precision. */
rdbstate.doing = RDB_CHECK_DOING_READ_EXPIRE;
if ((expiretime = rdbLoadMillisecondTime(&rdb)) == -1) goto eoferr;
if ((expiretime = rdbLoadMillisecondTime(&rdb, rdbver)) == -1) goto eoferr;
continue; /* Read next opcode. */
} else if (type == RDB_OPCODE_FREQ) {
/* FREQ: LFU frequency. */
@ -287,12 +286,8 @@ int redis_check_rdb(char *rdbfilename, FILE *fp) {
/* Read value */
rdbstate.doing = RDB_CHECK_DOING_READ_OBJECT_VALUE;
if ((val = rdbLoadObject(type,&rdb)) == NULL) goto eoferr;
/* Check if the key already expired. This function is used when loading
* an RDB file from disk, either at startup, or when an RDB was
* received from the master. In the latter case, the master is
* responsible for key expiry. If we would expire keys here, the
* snapshot taken by the master may not be reflected on the slave. */
if (server.masterhost == NULL && expiretime != -1 && expiretime < now)
/* Check if the key already expired. */
if (expiretime != -1 && expiretime < now)
rdbstate.already_expired++;
if (expiretime != -1) rdbstate.expires++;
rdbstate.key = NULL;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
/* redisassert.h -- Drop in replacemnet assert.h that prints the stack trace
/* redisassert.h -- Drop in replacements assert.h that prints the stack trace
* in the Redis logs.
*
* This file should be included instead of "assert.h" inside libraries used by

View File

@ -553,7 +553,7 @@ need_full_resync:
* Side effects, other than starting a BGSAVE:
*
* 1) Handle the slaves in WAIT_START state, by preparing them for a full
* sync if the BGSAVE was succesfully started, or sending them an error
* sync if the BGSAVE was successfully started, or sending them an error
* and dropping them from the list of slaves.
*
* 2) Flush the Lua scripting script cache if the BGSAVE was actually
@ -896,7 +896,7 @@ void sendBulkToSlave(aeEventLoop *el, int fd, void *privdata, int mask) {
}
}
/* If the preamble was already transfered, send the RDB bulk data. */
/* If the preamble was already transferred, send the RDB bulk data. */
lseek(slave->repldbfd,slave->repldboff,SEEK_SET);
buflen = read(slave->repldbfd,buf,PROTO_IOBUF_LEN);
if (buflen <= 0) {
@ -965,7 +965,7 @@ void updateSlavesWaitingBgsave(int bgsaveerr, int type) {
replicationGetSlaveName(slave));
/* Note: we wait for a REPLCONF ACK message from slave in
* order to really put it online (install the write handler
* so that the accumulated data can be transfered). However
* so that the accumulated data can be transferred). However
* we change the replication state ASAP, since our slave
* is technically online now. */
slave->replstate = SLAVE_STATE_ONLINE;
@ -1048,7 +1048,7 @@ int slaveIsInHandshakeState(void) {
/* Avoid the master to detect the slave is timing out while loading the
* RDB file in initial synchronization. We send a single newline character
* that is valid protocol but is guaranteed to either be sent entierly or
* that is valid protocol but is guaranteed to either be sent entirely or
* not, since the byte is indivisible.
*
* The function is called in two contexts: while we flush the current
@ -1279,6 +1279,7 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) {
close(server.repl_transfer_fd);
replicationCreateMasterClient(server.repl_transfer_s,rsi.repl_stream_db);
server.repl_state = REPL_STATE_CONNECTED;
server.repl_down_since = 0;
/* After a full resynchroniziation we use the replication ID and
* offset of the master. The secondary ID / offset are cleared since
* we are starting a new history. */
@ -1315,24 +1316,31 @@ error:
#define SYNC_CMD_FULL (SYNC_CMD_READ|SYNC_CMD_WRITE)
char *sendSynchronousCommand(int flags, int fd, ...) {
/* Create the command to send to the master, we use simple inline
* protocol for simplicity as currently we only send simple strings. */
/* Create the command to send to the master, we use redis binary
* protocol to make sure correct arguments are sent. This function
* is not safe for all binary data. */
if (flags & SYNC_CMD_WRITE) {
char *arg;
va_list ap;
sds cmd = sdsempty();
sds cmdargs = sdsempty();
size_t argslen = 0;
va_start(ap,fd);
while(1) {
arg = va_arg(ap, char*);
if (arg == NULL) break;
if (sdslen(cmd) != 0) cmd = sdscatlen(cmd," ",1);
cmd = sdscat(cmd,arg);
cmdargs = sdscatprintf(cmdargs,"$%zu\r\n%s\r\n",strlen(arg),arg);
argslen++;
}
cmd = sdscatlen(cmd,"\r\n",2);
va_end(ap);
cmd = sdscatprintf(cmd,"*%zu\r\n",argslen);
cmd = sdscatsds(cmd,cmdargs);
sdsfree(cmdargs);
/* Transfer command to the server. */
if (syncWrite(fd,cmd,sdslen(cmd),server.repl_syncio_timeout*1000)
== -1)
@ -1389,7 +1397,7 @@ char *sendSynchronousCommand(int flags, int fd, ...) {
*
* The function returns:
*
* PSYNC_CONTINUE: If the PSYNC command succeded and we can continue.
* PSYNC_CONTINUE: If the PSYNC command succeeded and we can continue.
* PSYNC_FULLRESYNC: If PSYNC is supported but a full resync is needed.
* In this case the master run_id and global replication
* offset is saved.
@ -1943,7 +1951,6 @@ void replicationSetMaster(char *ip, int port) {
* our own parameters, to later PSYNC with the new master. */
if (was_master) replicationCacheMasterUsingMyself();
server.repl_state = REPL_STATE_CONNECT;
server.repl_down_since = 0;
}
/* Cancel replication, setting the instance as a master itself. */
@ -2113,7 +2120,7 @@ void replicationSendAck(void) {
* functions. */
/* This function is called by freeClient() in order to cache the master
* client structure instead of destryoing it. freeClient() will return
* client structure instead of destroying it. freeClient() will return
* ASAP after this function returns, so every action needed to avoid problems
* with a client that is really "suspended" has to be done by this function.
*
@ -2141,6 +2148,8 @@ void replicationCacheMaster(client *c) {
server.master->read_reploff = server.master->reploff;
if (c->flags & CLIENT_MULTI) discardTransaction(c);
listEmpty(c->reply);
c->sentlen = 0;
c->reply_bytes = 0;
c->bufpos = 0;
resetClient(c);
@ -2210,6 +2219,7 @@ void replicationResurrectCachedMaster(int newfd) {
server.master->authenticated = 1;
server.master->lastinteraction = server.unixtime;
server.repl_state = REPL_STATE_CONNECTED;
server.repl_down_since = 0;
/* Re-add to the list of clients. */
linkClient(server.master);

View File

@ -116,7 +116,7 @@ static size_t rioFileWrite(rio *r, const void *buf, size_t len) {
r->io.file.buffered >= r->io.file.autosync)
{
fflush(r->io.file.fp);
aof_fsync(fileno(r->io.file.fp));
redis_fsync(fileno(r->io.file.fp));
r->io.file.buffered = 0;
}
return retval;

View File

@ -575,9 +575,9 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) {
reply = sdsnewlen(c->buf,c->bufpos);
c->bufpos = 0;
while(listLength(c->reply)) {
sds o = listNodeValue(listFirst(c->reply));
clientReplyBlock *o = listNodeValue(listFirst(c->reply));
reply = sdscatsds(reply,o);
reply = sdscatlen(reply,o->buf,o->used);
listDelNode(c->reply,listFirst(c->reply));
}
}
@ -1457,11 +1457,11 @@ void evalShaCommand(client *c) {
void scriptCommand(client *c) {
if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
const char *help[] = {
"debug (yes|sync|no) -- Set the debug mode for subsequent scripts executed.",
"exists <sha1> [<sha1> ...] -- Return information about the existence of the scripts in the script cache.",
"flush -- Flush the Lua scripts cache. Very dangerous on slaves.",
"kill -- Kill the currently executing Lua script.",
"load <script> -- Load a script into the scripts cache, without executing it.",
"DEBUG (yes|sync|no) -- Set the debug mode for subsequent scripts executed.",
"EXISTS <sha1> [<sha1> ...] -- Return information about the existence of the scripts in the script cache.",
"FLUSH -- Flush the Lua scripts cache. Very dangerous on slaves.",
"KILL -- Kill the currently executing Lua script.",
"LOAD <script> -- Load a script into the scripts cache, without executing it.",
NULL
};
addReplyHelp(c, help);
@ -1514,7 +1514,7 @@ NULL
return;
}
} else {
addReplyErrorFormat(c, "Unknown subcommand or wrong number of arguments for '%s'. Try SCRIPT HELP", (char*)c->argv[1]->ptr);
addReplySubcommandSyntaxError(c);
}
}

View File

@ -67,8 +67,10 @@ static inline char sdsReqType(size_t string_size) {
#if (LONG_MAX == LLONG_MAX)
if (string_size < 1ll<<32)
return SDS_TYPE_32;
#endif
return SDS_TYPE_64;
#else
return SDS_TYPE_32;
#endif
}
/* Create a new sds string with the content specified by the 'init' pointer
@ -283,7 +285,7 @@ sds sdsRemoveFreeSpace(sds s) {
return s;
}
/* Return the total size of the allocation of the specifed sds string,
/* Return the total size of the allocation of the specified sds string,
* including:
* 1) The sds header before the pointer.
* 2) The string.

View File

@ -84,6 +84,7 @@ typedef struct sentinelAddr {
#define SENTINEL_MAX_PENDING_COMMANDS 100
#define SENTINEL_ELECTION_TIMEOUT 10000
#define SENTINEL_MAX_DESYNC 1000
#define SENTINEL_DEFAULT_DENY_SCRIPTS_RECONFIG 1
/* Failover machine different states. */
#define SENTINEL_FAILOVER_STATE_NONE 0 /* No failover in progress. */
@ -177,6 +178,10 @@ typedef struct sentinelRedisInstance {
mstime_t o_down_since_time; /* Objectively down since time. */
mstime_t down_after_period; /* Consider it down after that period. */
mstime_t info_refresh; /* Time at which we received INFO output from it. */
dict *renamed_commands; /* Commands renamed in this instance:
Sentinel will use the alternative commands
mapped on this table to send things like
SLAVEOF, CONFING, INFO, ... */
/* Role and the first time we observed it.
* This is useful in order to delay replacing what the instance reports
@ -241,6 +246,8 @@ struct sentinelState {
int announce_port; /* Port that is gossiped to other sentinels if
non zero. */
unsigned long simfailure_flags; /* Failures simulation. */
int deny_scripts_reconfig; /* Allow SENTINEL SET ... to change script
paths at runtime? */
} sentinel;
/* A script execution job. */
@ -380,7 +387,9 @@ void sentinelSimFailureCrash(void);
/* ========================= Dictionary types =============================== */
uint64_t dictSdsHash(const void *key);
uint64_t dictSdsCaseHash(const void *key);
int dictSdsKeyCompare(void *privdata, const void *key1, const void *key2);
int dictSdsKeyCaseCompare(void *privdata, const void *key1, const void *key2);
void releaseSentinelRedisInstance(sentinelRedisInstance *ri);
void dictInstancesValDestructor (void *privdata, void *obj) {
@ -414,6 +423,16 @@ dictType leaderVotesDictType = {
NULL /* val destructor */
};
/* Instance renamed commands table. */
dictType renamedCommandsDictType = {
dictSdsCaseHash, /* hash function */
NULL, /* key dup */
NULL, /* val dup */
dictSdsKeyCaseCompare, /* key compare */
dictSdsDestructor, /* key destructor */
dictSdsDestructor /* val destructor */
};
/* =========================== Initialization =============================== */
void sentinelCommand(client *c);
@ -468,6 +487,7 @@ void initSentinel(void) {
sentinel.announce_ip = NULL;
sentinel.announce_port = 0;
sentinel.simfailure_flags = SENTINEL_SIMFAILURE_NONE;
sentinel.deny_scripts_reconfig = SENTINEL_DEFAULT_DENY_SCRIPTS_RECONFIG;
memset(sentinel.myid,0,sizeof(sentinel.myid));
}
@ -494,7 +514,7 @@ void sentinelIsRunning(void) {
if (sentinel.myid[j] != 0) break;
if (j == CONFIG_RUN_ID_SIZE) {
/* Pick ID and presist the config. */
/* Pick ID and persist the config. */
getRandomHexChars(sentinel.myid,CONFIG_RUN_ID_SIZE);
sentinelFlushConfig();
}
@ -1207,6 +1227,7 @@ sentinelRedisInstance *createSentinelRedisInstance(char *name, int flags, char *
ri->master = master;
ri->slaves = dictCreate(&instancesDictType,NULL);
ri->info_refresh = 0;
ri->renamed_commands = dictCreate(&renamedCommandsDictType,NULL);
/* Failover state. */
ri->leader = NULL;
@ -1254,6 +1275,7 @@ void releaseSentinelRedisInstance(sentinelRedisInstance *ri) {
sdsfree(ri->auth_pass);
sdsfree(ri->info);
releaseSentinelAddr(ri->addr);
dictRelease(ri->renamed_commands);
/* Clear state into the master if needed. */
if ((ri->flags & SRI_SLAVE) && (ri->flags & SRI_PROMOTED) && ri->master)
@ -1568,6 +1590,21 @@ char *sentinelGetInstanceTypeString(sentinelRedisInstance *ri) {
else return "unknown";
}
/* This function is used in order to send commands to Redis instances: the
* commands we send from Sentinel may be renamed, a common case is a master
* with CONFIG and SLAVEOF commands renamed for security concerns. In that
* case we check the ri->renamed_command table (or if the instance is a slave,
* we check the one of the master), and map the command that we should send
* to the set of renamed commads. However, if the command was not renamed,
* we just return "command" itself. */
char *sentinelInstanceMapCommand(sentinelRedisInstance *ri, char *command) {
sds sc = sdsnew(command);
if (ri->master) ri = ri->master;
char *retval = dictFetchValue(ri->renamed_commands, sc);
sdsfree(sc);
return retval ? retval : command;
}
/* ============================ Config handling ============================= */
char *sentinelHandleConfiguration(char **argv, int argc) {
sentinelRedisInstance *ri;
@ -1677,6 +1714,17 @@ char *sentinelHandleConfiguration(char **argv, int argc) {
si->runid = sdsnew(argv[4]);
sentinelTryConnectionSharing(si);
}
} else if (!strcasecmp(argv[0],"rename-command") && argc == 4) {
/* rename-command <name> <command> <renamed-command> */
ri = sentinelGetMasterByName(argv[1]);
if (!ri) return "No such master with specified name.";
sds oldcmd = sdsnew(argv[2]);
sds newcmd = sdsnew(argv[3]);
if (dictAdd(ri->renamed_commands,oldcmd,newcmd) != DICT_OK) {
sdsfree(oldcmd);
sdsfree(newcmd);
return "Same command renamed multiple times with rename-command.";
}
} else if (!strcasecmp(argv[0],"announce-ip") && argc == 2) {
/* announce-ip <ip-address> */
if (strlen(argv[1]))
@ -1684,6 +1732,12 @@ char *sentinelHandleConfiguration(char **argv, int argc) {
} else if (!strcasecmp(argv[0],"announce-port") && argc == 2) {
/* announce-port <port> */
sentinel.announce_port = atoi(argv[1]);
} else if (!strcasecmp(argv[0],"deny-scripts-reconfig") && argc == 2) {
/* deny-scripts-reconfig <yes|no> */
if ((sentinel.deny_scripts_reconfig = yesnotoi(argv[1])) == -1) {
return "Please specify yes or no for the "
"deny-scripts-reconfig options.";
}
} else {
return "Unrecognized sentinel configuration statement.";
}
@ -1704,6 +1758,12 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) {
line = sdscatprintf(sdsempty(), "sentinel myid %s", sentinel.myid);
rewriteConfigRewriteLine(state,"sentinel",line,1);
/* sentinel deny-scripts-reconfig. */
line = sdscatprintf(sdsempty(), "sentinel deny-scripts-reconfig %s",
sentinel.deny_scripts_reconfig ? "yes" : "no");
rewriteConfigRewriteLine(state,"sentinel",line,
sentinel.deny_scripts_reconfig != SENTINEL_DEFAULT_DENY_SCRIPTS_RECONFIG);
/* For every master emit a "sentinel monitor" config entry. */
di = dictGetIterator(sentinel.masters);
while((de = dictNext(di)) != NULL) {
@ -1811,6 +1871,18 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) {
rewriteConfigRewriteLine(state,"sentinel",line,1);
}
dictReleaseIterator(di2);
/* sentinel rename-command */
di2 = dictGetIterator(master->renamed_commands);
while((de = dictNext(di2)) != NULL) {
sds oldname = dictGetKey(de);
sds newname = dictGetVal(de);
line = sdscatprintf(sdsempty(),
"sentinel rename-command %s %s %s",
master->name, oldname, newname);
rewriteConfigRewriteLine(state,"sentinel",line,1);
}
dictReleaseIterator(di2);
}
/* sentinel current-epoch is a global state valid for all the masters. */
@ -1875,7 +1947,8 @@ void sentinelSendAuthIfNeeded(sentinelRedisInstance *ri, redisAsyncContext *c) {
ri->master->auth_pass;
if (auth_pass) {
if (redisAsyncCommand(c, sentinelDiscardReplyCallback, ri, "AUTH %s",
if (redisAsyncCommand(c, sentinelDiscardReplyCallback, ri, "%s %s",
sentinelInstanceMapCommand(ri,"AUTH"),
auth_pass) == C_OK) ri->link->pending_commands++;
}
}
@ -1891,7 +1964,9 @@ void sentinelSetClientName(sentinelRedisInstance *ri, redisAsyncContext *c, char
snprintf(name,sizeof(name),"sentinel-%.8s-%s",sentinel.myid,type);
if (redisAsyncCommand(c, sentinelDiscardReplyCallback, ri,
"CLIENT SETNAME %s", name) == C_OK)
"%s SETNAME %s",
sentinelInstanceMapCommand(ri,"CLIENT"),
name) == C_OK)
{
ri->link->pending_commands++;
}
@ -1953,8 +2028,9 @@ void sentinelReconnectInstance(sentinelRedisInstance *ri) {
sentinelSetClientName(ri,link->pc,"pubsub");
/* Now we subscribe to the Sentinels "Hello" channel. */
retval = redisAsyncCommand(link->pc,
sentinelReceiveHelloMessages, ri, "SUBSCRIBE %s",
SENTINEL_HELLO_CHANNEL);
sentinelReceiveHelloMessages, ri, "%s %s",
sentinelInstanceMapCommand(ri,"SUBSCRIBE"),
SENTINEL_HELLO_CHANNEL);
if (retval != C_OK) {
/* If we can't subscribe, the Pub/Sub connection is useless
* and we can simply disconnect it and try again. */
@ -2288,8 +2364,11 @@ void sentinelPingReplyCallback(redisAsyncContext *c, void *reply, void *privdata
{
if (redisAsyncCommand(ri->link->cc,
sentinelDiscardReplyCallback, ri,
"SCRIPT KILL") == C_OK)
"%s KILL",
sentinelInstanceMapCommand(ri,"SCRIPT")) == C_OK)
{
ri->link->pending_commands++;
}
ri->flags |= SRI_SCRIPT_KILL_SENT;
}
}
@ -2452,7 +2531,7 @@ void sentinelReceiveHelloMessages(redisAsyncContext *c, void *reply, void *privd
}
/* Send an "Hello" message via Pub/Sub to the specified 'ri' Redis
* instance in order to broadcast the current configuraiton for this
* instance in order to broadcast the current configuration for this
* master, and to advertise the existence of this Sentinel at the same time.
*
* The message has the following format:
@ -2495,8 +2574,9 @@ int sentinelSendHello(sentinelRedisInstance *ri) {
master->name,master_addr->ip,master_addr->port,
(unsigned long long) master->config_epoch);
retval = redisAsyncCommand(ri->link->cc,
sentinelPublishReplyCallback, ri, "PUBLISH %s %s",
SENTINEL_HELLO_CHANNEL,payload);
sentinelPublishReplyCallback, ri, "%s %s %s",
sentinelInstanceMapCommand(ri,"PUBLISH"),
SENTINEL_HELLO_CHANNEL,payload);
if (retval != C_OK) return C_ERR;
ri->link->pending_commands++;
return C_OK;
@ -2541,7 +2621,8 @@ int sentinelForceHelloUpdateForMaster(sentinelRedisInstance *master) {
* queued in the connection. */
int sentinelSendPing(sentinelRedisInstance *ri) {
int retval = redisAsyncCommand(ri->link->cc,
sentinelPingReplyCallback, ri, "PING");
sentinelPingReplyCallback, ri, "%s",
sentinelInstanceMapCommand(ri,"PING"));
if (retval == C_OK) {
ri->link->pending_commands++;
ri->link->last_ping_time = mstime();
@ -2605,7 +2686,8 @@ void sentinelSendPeriodicCommands(sentinelRedisInstance *ri) {
(now - ri->info_refresh) > info_period))
{
retval = redisAsyncCommand(ri->link->cc,
sentinelInfoReplyCallback, ri, "INFO");
sentinelInfoReplyCallback, ri, "%s",
sentinelInstanceMapCommand(ri,"INFO"));
if (retval == C_OK) ri->link->pending_commands++;
}
@ -3099,7 +3181,7 @@ void sentinelCommand(client *c) {
addReplySds(c,e);
}
} else if (!strcasecmp(c->argv[1]->ptr,"set")) {
if (c->argc < 3 || c->argc % 2 == 0) goto numargserr;
if (c->argc < 3) goto numargserr;
sentinelSetCommand(c);
} else if (!strcasecmp(c->argv[1]->ptr,"info-cache")) {
/* SENTINEL INFO-CACHE <name> */
@ -3275,7 +3357,7 @@ void sentinelInfoCommand(client *c) {
addReplyBulkSds(c, info);
}
/* Implements Sentinel verison of the ROLE command. The output is
/* Implements Sentinel version of the ROLE command. The output is
* "sentinel" and the list of currently monitored master names. */
void sentinelRoleCommand(client *c) {
dictIterator *di;
@ -3298,39 +3380,58 @@ void sentinelRoleCommand(client *c) {
void sentinelSetCommand(client *c) {
sentinelRedisInstance *ri;
int j, changes = 0;
char *option, *value;
int badarg = 0; /* Bad argument position for error reporting. */
char *option;
if ((ri = sentinelGetMasterByNameOrReplyError(c,c->argv[2]))
== NULL) return;
/* Process option - value pairs. */
for (j = 3; j < c->argc; j += 2) {
for (j = 3; j < c->argc; j++) {
int moreargs = (c->argc-1) - j;
option = c->argv[j]->ptr;
value = c->argv[j+1]->ptr;
robj *o = c->argv[j+1];
long long ll;
int old_j = j; /* Used to know what to log as an event. */
if (!strcasecmp(option,"down-after-milliseconds")) {
if (!strcasecmp(option,"down-after-milliseconds") && moreargs > 0) {
/* down-after-millisecodns <milliseconds> */
if (getLongLongFromObject(o,&ll) == C_ERR || ll <= 0)
robj *o = c->argv[++j];
if (getLongLongFromObject(o,&ll) == C_ERR || ll <= 0) {
badarg = j;
goto badfmt;
}
ri->down_after_period = ll;
sentinelPropagateDownAfterPeriod(ri);
changes++;
} else if (!strcasecmp(option,"failover-timeout")) {
} else if (!strcasecmp(option,"failover-timeout") && moreargs > 0) {
/* failover-timeout <milliseconds> */
if (getLongLongFromObject(o,&ll) == C_ERR || ll <= 0)
robj *o = c->argv[++j];
if (getLongLongFromObject(o,&ll) == C_ERR || ll <= 0) {
badarg = j;
goto badfmt;
}
ri->failover_timeout = ll;
changes++;
} else if (!strcasecmp(option,"parallel-syncs")) {
} else if (!strcasecmp(option,"parallel-syncs") && moreargs > 0) {
/* parallel-syncs <milliseconds> */
if (getLongLongFromObject(o,&ll) == C_ERR || ll <= 0)
robj *o = c->argv[++j];
if (getLongLongFromObject(o,&ll) == C_ERR || ll <= 0) {
badarg = j;
goto badfmt;
}
ri->parallel_syncs = ll;
changes++;
} else if (!strcasecmp(option,"notification-script")) {
} else if (!strcasecmp(option,"notification-script") && moreargs > 0) {
/* notification-script <path> */
char *value = c->argv[++j]->ptr;
if (sentinel.deny_scripts_reconfig) {
addReplyError(c,
"Reconfiguration of scripts path is denied for "
"security reasons. Check the deny-scripts-reconfig "
"configuration directive in your Sentinel configuration");
return;
}
if (strlen(value) && access(value,X_OK) == -1) {
addReplyError(c,
"Notification script seems non existing or non executable");
@ -3340,8 +3441,17 @@ void sentinelSetCommand(client *c) {
sdsfree(ri->notification_script);
ri->notification_script = strlen(value) ? sdsnew(value) : NULL;
changes++;
} else if (!strcasecmp(option,"client-reconfig-script")) {
} else if (!strcasecmp(option,"client-reconfig-script") && moreargs > 0) {
/* client-reconfig-script <path> */
char *value = c->argv[++j]->ptr;
if (sentinel.deny_scripts_reconfig) {
addReplyError(c,
"Reconfiguration of scripts path is denied for "
"security reasons. Check the deny-scripts-reconfig "
"configuration directive in your Sentinel configuration");
return;
}
if (strlen(value) && access(value,X_OK) == -1) {
addReplyError(c,
"Client reconfiguration script seems non existing or "
@ -3352,24 +3462,65 @@ void sentinelSetCommand(client *c) {
sdsfree(ri->client_reconfig_script);
ri->client_reconfig_script = strlen(value) ? sdsnew(value) : NULL;
changes++;
} else if (!strcasecmp(option,"auth-pass")) {
} else if (!strcasecmp(option,"auth-pass") && moreargs > 0) {
/* auth-pass <password> */
char *value = c->argv[++j]->ptr;
sdsfree(ri->auth_pass);
ri->auth_pass = strlen(value) ? sdsnew(value) : NULL;
changes++;
} else if (!strcasecmp(option,"quorum")) {
} else if (!strcasecmp(option,"quorum") && moreargs > 0) {
/* quorum <count> */
if (getLongLongFromObject(o,&ll) == C_ERR || ll <= 0)
robj *o = c->argv[++j];
if (getLongLongFromObject(o,&ll) == C_ERR || ll <= 0) {
badarg = j;
goto badfmt;
}
ri->quorum = ll;
changes++;
} else if (!strcasecmp(option,"rename-command") && moreargs > 1) {
/* rename-command <oldname> <newname> */
sds oldname = c->argv[++j]->ptr;
sds newname = c->argv[++j]->ptr;
if ((sdslen(oldname) == 0) || (sdslen(newname) == 0)) {
badarg = sdslen(newname) ? j-1 : j;
goto badfmt;
}
/* Remove any older renaming for this command. */
dictDelete(ri->renamed_commands,oldname);
/* If the target name is the same as the source name there
* is no need to add an entry mapping to itself. */
if (!dictSdsKeyCaseCompare(NULL,oldname,newname)) {
oldname = sdsdup(oldname);
newname = sdsdup(newname);
dictAdd(ri->renamed_commands,oldname,newname);
}
changes++;
} else {
addReplyErrorFormat(c,"Unknown option '%s' for SENTINEL SET",
option);
addReplyErrorFormat(c,"Unknown option or number of arguments for "
"SENTINEL SET '%s'", option);
if (changes) sentinelFlushConfig();
return;
}
sentinelEvent(LL_WARNING,"+set",ri,"%@ %s %s",option,value);
/* Log the event. */
int numargs = j-old_j+1;
switch(numargs) {
case 2:
sentinelEvent(LL_WARNING,"+set",ri,"%@ %s %s",c->argv[old_j]->ptr,
c->argv[old_j+1]->ptr);
break;
case 3:
sentinelEvent(LL_WARNING,"+set",ri,"%@ %s %s %s",c->argv[old_j]->ptr,
c->argv[old_j+1]->ptr,
c->argv[old_j+2]->ptr);
break;
default:
sentinelEvent(LL_WARNING,"+set",ri,"%@ %s",c->argv[old_j]->ptr);
break;
}
}
if (changes) sentinelFlushConfig();
@ -3379,7 +3530,7 @@ void sentinelSetCommand(client *c) {
badfmt: /* Bad format errors */
if (changes) sentinelFlushConfig();
addReplyErrorFormat(c,"Invalid argument '%s' for SENTINEL SET '%s'",
value, option);
(char*)c->argv[badarg]->ptr,option);
}
/* Our fake PUBLISH command: it is actually useful only to receive hello messages
@ -3417,7 +3568,7 @@ void sentinelCheckSubjectivelyDown(sentinelRedisInstance *ri) {
if (ri->link->cc &&
(mstime() - ri->link->cc_conn_time) >
SENTINEL_MIN_LINK_RECONNECT_PERIOD &&
ri->link->act_ping_time != 0 && /* Ther is a pending ping... */
ri->link->act_ping_time != 0 && /* There is a pending ping... */
/* The pending ping is delayed, and we did not received
* error replies as well. */
(mstime() - ri->link->act_ping_time) > (ri->down_after_period/2) &&
@ -3585,7 +3736,8 @@ void sentinelAskMasterStateToOtherSentinels(sentinelRedisInstance *master, int f
ll2string(port,sizeof(port),master->addr->port);
retval = redisAsyncCommand(ri->link->cc,
sentinelReceiveIsMasterDownReply, ri,
"SENTINEL is-master-down-by-addr %s %s %llu %s",
"%s is-master-down-by-addr %s %s %llu %s",
sentinelInstanceMapCommand(ri,"SENTINEL"),
master->addr->ip, port,
sentinel.current_epoch,
(master->failover_state > SENTINEL_FAILOVER_STATE_NONE) ?
@ -3605,7 +3757,7 @@ void sentinelSimFailureCrash(void) {
}
/* Vote for the sentinel with 'req_runid' or return the old vote if already
* voted for the specifed 'req_epoch' or one greater.
* voted for the specified 'req_epoch' or one greater.
*
* If a vote is not available returns NULL, otherwise return the Sentinel
* runid and populate the leader_epoch with the epoch of the vote. */
@ -3756,7 +3908,7 @@ int sentinelSendSlaveOf(sentinelRedisInstance *ri, char *host, int port) {
/* In order to send SLAVEOF in a safe way, we send a transaction performing
* the following tasks:
* 1) Reconfigure the instance according to the specified host/port params.
* 2) Rewrite the configuraiton.
* 2) Rewrite the configuration.
* 3) Disconnect all clients (but this one sending the commnad) in order
* to trigger the ask-master-on-reconnection protocol for connected
* clients.
@ -3764,17 +3916,21 @@ int sentinelSendSlaveOf(sentinelRedisInstance *ri, char *host, int port) {
* Note that we don't check the replies returned by commands, since we
* will observe instead the effects in the next INFO output. */
retval = redisAsyncCommand(ri->link->cc,
sentinelDiscardReplyCallback, ri, "MULTI");
sentinelDiscardReplyCallback, ri, "%s",
sentinelInstanceMapCommand(ri,"MULTI"));
if (retval == C_ERR) return retval;
ri->link->pending_commands++;
retval = redisAsyncCommand(ri->link->cc,
sentinelDiscardReplyCallback, ri, "SLAVEOF %s %s", host, portstr);
sentinelDiscardReplyCallback, ri, "%s %s %s",
sentinelInstanceMapCommand(ri,"SLAVEOF"),
host, portstr);
if (retval == C_ERR) return retval;
ri->link->pending_commands++;
retval = redisAsyncCommand(ri->link->cc,
sentinelDiscardReplyCallback, ri, "CONFIG REWRITE");
sentinelDiscardReplyCallback, ri, "%s REWRITE",
sentinelInstanceMapCommand(ri,"CONFIG"));
if (retval == C_ERR) return retval;
ri->link->pending_commands++;
@ -3784,12 +3940,14 @@ int sentinelSendSlaveOf(sentinelRedisInstance *ri, char *host, int port) {
* recognized as a syntax error, and the transaction will not fail (but
* only the unsupported command will fail). */
retval = redisAsyncCommand(ri->link->cc,
sentinelDiscardReplyCallback, ri, "CLIENT KILL TYPE normal");
sentinelDiscardReplyCallback, ri, "%s KILL TYPE normal",
sentinelInstanceMapCommand(ri,"CLIENT"));
if (retval == C_ERR) return retval;
ri->link->pending_commands++;
retval = redisAsyncCommand(ri->link->cc,
sentinelDiscardReplyCallback, ri, "EXEC");
sentinelDiscardReplyCallback, ri, "%s",
sentinelInstanceMapCommand(ri,"EXEC"));
if (retval == C_ERR) return retval;
ri->link->pending_commands++;

View File

@ -198,8 +198,8 @@ struct redisCommand redisCommandTable[] = {
{"zrank",zrankCommand,3,"rF",0,NULL,1,1,1,0,0},
{"zrevrank",zrevrankCommand,3,"rF",0,NULL,1,1,1,0,0},
{"zscan",zscanCommand,-3,"rR",0,NULL,1,1,1,0,0},
{"zpopmin",zpopminCommand,-2,"wF",0,NULL,1,-1,1,0,0},
{"zpopmax",zpopmaxCommand,-2,"wF",0,NULL,1,-1,1,0,0},
{"zpopmin",zpopminCommand,-2,"wF",0,NULL,1,1,1,0,0},
{"zpopmax",zpopmaxCommand,-2,"wF",0,NULL,1,1,1,0,0},
{"bzpopmin",bzpopminCommand,-2,"wsF",0,NULL,1,-2,1,0,0},
{"bzpopmax",bzpopmaxCommand,-2,"wsF",0,NULL,1,-2,1,0,0},
{"hset",hsetCommand,-4,"wmF",0,NULL,1,1,1,0,0},
@ -326,6 +326,10 @@ struct redisCommand redisCommandTable[] = {
/*============================ Utility functions ============================ */
/* We use a private localtime implementation which is fork-safe. The logging
* function of Redis may be called from other threads. */
void nolocks_localtime(struct tm *tmp, time_t t, time_t tz, int dst);
/* Low level logging. To use only for very big messages, otherwise
* serverLog() is to prefer. */
void serverLogRaw(int level, const char *msg) {
@ -351,7 +355,9 @@ void serverLogRaw(int level, const char *msg) {
pid_t pid = getpid();
gettimeofday(&tv,NULL);
off = strftime(buf,sizeof(buf),"%d %b %H:%M:%S.",localtime(&tv.tv_sec));
struct tm tm;
nolocks_localtime(&tm,tv.tv_sec,server.timezone,server.daylight_active);
off = strftime(buf,sizeof(buf),"%d %b %H:%M:%S.",&tm);
snprintf(buf+off,sizeof(buf)-off,"%03d",(int)tv.tv_usec/1000);
if (server.sentinel_mode) {
role_char = 'X'; /* Sentinel. */
@ -859,15 +865,102 @@ int clientsCronResizeQueryBuffer(client *c) {
/* Reset the peak again to capture the peak memory usage in the next
* cycle. */
c->querybuf_peak = 0;
/* Clients representing masters also use a "pending query buffer" that
* is the yet not applied part of the stream we are reading. Such buffer
* also needs resizing from time to time, otherwise after a very large
* transfer (a huge value or a big MIGRATE operation) it will keep using
* a lot of memory. */
if (c->flags & CLIENT_MASTER) {
/* There are two conditions to resize the pending query buffer:
* 1) Pending Query buffer is > LIMIT_PENDING_QUERYBUF.
* 2) Used length is smaller than pending_querybuf_size/2 */
size_t pending_querybuf_size = sdsAllocSize(c->pending_querybuf);
if(pending_querybuf_size > LIMIT_PENDING_QUERYBUF &&
sdslen(c->pending_querybuf) < (pending_querybuf_size/2))
{
c->pending_querybuf = sdsRemoveFreeSpace(c->pending_querybuf);
}
}
return 0;
}
/* This function is used in order to track clients using the biggest amount
* of memory in the latest few seconds. This way we can provide such information
* in the INFO output (clients section), without having to do an O(N) scan for
* all the clients.
*
* This is how it works. We have an array of CLIENTS_PEAK_MEM_USAGE_SLOTS slots
* where we track, for each, the biggest client output and input buffers we
* saw in that slot. Every slot correspond to one of the latest seconds, since
* the array is indexed by doing UNIXTIME % CLIENTS_PEAK_MEM_USAGE_SLOTS.
*
* When we want to know what was recently the peak memory usage, we just scan
* such few slots searching for the maximum value. */
#define CLIENTS_PEAK_MEM_USAGE_SLOTS 8
size_t ClientsPeakMemInput[CLIENTS_PEAK_MEM_USAGE_SLOTS];
size_t ClientsPeakMemOutput[CLIENTS_PEAK_MEM_USAGE_SLOTS];
int clientsCronTrackExpansiveClients(client *c) {
size_t in_usage = sdsAllocSize(c->querybuf);
size_t out_usage = getClientOutputBufferMemoryUsage(c);
int i = server.unixtime % CLIENTS_PEAK_MEM_USAGE_SLOTS;
int zeroidx = (i+1) % CLIENTS_PEAK_MEM_USAGE_SLOTS;
/* Always zero the next sample, so that when we switch to that second, we'll
* only register samples that are greater in that second without considering
* the history of such slot.
*
* Note: our index may jump to any random position if serverCron() is not
* called for some reason with the normal frequency, for instance because
* some slow command is called taking multiple seconds to execute. In that
* case our array may end containing data which is potentially older
* than CLIENTS_PEAK_MEM_USAGE_SLOTS seconds: however this is not a problem
* since here we want just to track if "recently" there were very expansive
* clients from the POV of memory usage. */
ClientsPeakMemInput[zeroidx] = 0;
ClientsPeakMemOutput[zeroidx] = 0;
/* Track the biggest values observed so far in this slot. */
if (in_usage > ClientsPeakMemInput[i]) ClientsPeakMemInput[i] = in_usage;
if (out_usage > ClientsPeakMemOutput[i]) ClientsPeakMemOutput[i] = out_usage;
return 0; /* This function never terminates the client. */
}
/* Return the max samples in the memory usage of clients tracked by
* the function clientsCronTrackExpansiveClients(). */
void getExpansiveClientsInfo(size_t *in_usage, size_t *out_usage) {
size_t i = 0, o = 0;
for (int j = 0; j < CLIENTS_PEAK_MEM_USAGE_SLOTS; j++) {
if (ClientsPeakMemInput[j] > i) i = ClientsPeakMemInput[j];
if (ClientsPeakMemOutput[j] > o) o = ClientsPeakMemOutput[j];
}
*in_usage = i;
*out_usage = o;
}
/* This function is called by serverCron() and is used in order to perform
* operations on clients that are important to perform constantly. For instance
* we use this function in order to disconnect clients after a timeout, including
* clients blocked in some blocking command with a non-zero timeout.
*
* The function makes some effort to process all the clients every second, even
* if this cannot be strictly guaranteed, since serverCron() may be called with
* an actual frequency lower than server.hz in case of latency events like slow
* commands.
*
* It is very important for this function, and the functions it calls, to be
* very fast: sometimes Redis has tens of hundreds of connected clients, and the
* default server.hz value is 10, so sometimes here we need to process thousands
* of clients per second, turning this function into a source of latency.
*/
#define CLIENTS_CRON_MIN_ITERATIONS 5
void clientsCron(void) {
/* Make sure to process at least numclients/server.hz of clients
* per call. Since this function is called server.hz times per second
* we are sure that in the worst case we process all the clients in 1
* second. */
/* Try to process at least numclients/server.hz of clients
* per call. Since normally (if there are no big latency events) this
* function is called server.hz times per second, in the average case we
* process all the clients in 1 second. */
int numclients = listLength(server.clients);
int iterations = numclients/server.hz;
mstime_t now = mstime();
@ -894,6 +987,7 @@ void clientsCron(void) {
* terminated. */
if (clientsCronHandleTimeout(c,now)) continue;
if (clientsCronResizeQueryBuffer(c)) continue;
if (clientsCronTrackExpansiveClients(c)) continue;
}
}
@ -960,6 +1054,14 @@ void updateCachedTime(void) {
time_t unixtime = time(NULL);
atomicSet(server.unixtime,unixtime);
server.mstime = mstime();
/* To get information about daylight saving time, we need to call localtime_r
* and cache the result. However calling localtime_r in this context is safe
* since we will never fork() while here, in the main thread. The logging
* function will call a thread safe version of localtime that has no locks. */
struct tm tm;
localtime_r(&server.unixtime,&tm);
server.daylight_active = tm.tm_isdst;
}
/* This is our timer interrupt, called server.hz times per second.
@ -1134,7 +1236,7 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
} else {
/* If there is not a background saving/rewrite in progress check if
* we have to save/rewrite now. */
for (j = 0; j < server.saveparamslen; j++) {
for (j = 0; j < server.saveparamslen; j++) {
struct saveparam *sp = server.saveparams+j;
/* Save if we reached the given amount of changes,
@ -1154,23 +1256,23 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
rdbSaveBackground(server.rdb_filename,rsiptr);
break;
}
}
}
/* Trigger an AOF rewrite if needed. */
if (server.aof_state == AOF_ON &&
server.rdb_child_pid == -1 &&
server.aof_child_pid == -1 &&
server.aof_rewrite_perc &&
server.aof_current_size > server.aof_rewrite_min_size)
{
/* Trigger an AOF rewrite if needed. */
if (server.aof_state == AOF_ON &&
server.rdb_child_pid == -1 &&
server.aof_child_pid == -1 &&
server.aof_rewrite_perc &&
server.aof_current_size > server.aof_rewrite_min_size)
{
long long base = server.aof_rewrite_base_size ?
server.aof_rewrite_base_size : 1;
server.aof_rewrite_base_size : 1;
long long growth = (server.aof_current_size*100/base) - 100;
if (growth >= server.aof_rewrite_perc) {
serverLog(LL_NOTICE,"Starting automatic rewriting of AOF on %lld%% growth",growth);
rewriteAppendOnlyFileBackground();
}
}
}
}
@ -1402,10 +1504,12 @@ void initServerConfig(void) {
pthread_mutex_init(&server.lruclock_mutex,NULL);
pthread_mutex_init(&server.unixtime_mutex,NULL);
updateCachedTime();
getRandomHexChars(server.runid,CONFIG_RUN_ID_SIZE);
server.runid[CONFIG_RUN_ID_SIZE] = '\0';
changeReplicationId();
clearReplicationId2();
server.timezone = timezone; /* Initialized by tzset(). */
server.configfile = NULL;
server.executable = NULL;
server.hz = CONFIG_DEFAULT_HZ;
@ -1457,6 +1561,7 @@ void initServerConfig(void) {
server.aof_selected_db = -1; /* Make sure the first time will not match */
server.aof_flush_postponed_start = 0;
server.aof_rewrite_incremental_fsync = CONFIG_DEFAULT_AOF_REWRITE_INCREMENTAL_FSYNC;
server.rdb_save_incremental_fsync = CONFIG_DEFAULT_RDB_SAVE_INCREMENTAL_FSYNC;
server.aof_load_truncated = CONFIG_DEFAULT_AOF_LOAD_TRUNCATED;
server.aof_use_rdb_preamble = CONFIG_DEFAULT_AOF_USE_RDB_PREAMBLE;
server.pidfile = NULL;
@ -1889,6 +1994,7 @@ void initServer(void) {
server.pid = getpid();
server.current_client = NULL;
server.clients = listCreate();
server.clients_index = raxNew();
server.clients_to_close = listCreate();
server.slaves = listCreate();
server.monitors = listCreate();
@ -1981,7 +2087,6 @@ void initServer(void) {
server.aof_last_write_status = C_OK;
server.aof_last_write_errno = 0;
server.repl_good_slaves_count = 0;
updateCachedTime();
/* Create the timer callback, this is our way to process many background
* operations incrementally, like clients timeout, eviction of unaccessed
@ -2345,7 +2450,7 @@ void call(client *c, int flags) {
if (c->flags & CLIENT_FORCE_AOF) propagate_flags |= PROPAGATE_AOF;
/* However prevent AOF / replication propagation if the command
* implementatino called preventCommandPropagation() or similar,
* implementations called preventCommandPropagation() or similar,
* or if we don't have the call() flags to do so. */
if (c->flags & CLIENT_PREVENT_REPL_PROP ||
!(flags & CMD_CALL_PROPAGATE_REPL))
@ -2415,8 +2520,13 @@ int processCommand(client *c) {
c->cmd = c->lastcmd = lookupCommand(c->argv[0]->ptr);
if (!c->cmd) {
flagTransaction(c);
addReplyErrorFormat(c,"unknown command '%s'",
(char*)c->argv[0]->ptr);
sds args = sdsempty();
int i;
for (i=1; i < c->argc && sdslen(args) < 128; i++)
args = sdscatprintf(args, "`%.*s`, ", 128-(int)sdslen(args), (char*)c->argv[i]->ptr);
addReplyErrorFormat(c,"unknown command `%s`, with args beginning with: %s",
(char*)c->argv[0]->ptr, args);
sdsfree(args);
return C_OK;
} else if ((c->cmd->arity > 0 && c->cmd->arity != c->argc) ||
(c->argc < -c->cmd->arity)) {
@ -2485,7 +2595,8 @@ int processCommand(client *c) {
if (((server.stop_writes_on_bgsave_err &&
server.saveparamslen > 0 &&
server.lastbgsave_status == C_ERR) ||
server.aof_last_write_status == C_ERR) &&
(server.aof_state != AOF_OFF &&
server.aof_last_write_status == C_ERR)) &&
server.masterhost == NULL &&
(c->cmd->flags & CMD_WRITE ||
c->cmd->proc == pingCommand))
@ -2638,7 +2749,7 @@ int prepareForShutdown(int flags) {
/* Append only file: flush buffers and fsync() the AOF at exit */
serverLog(LL_NOTICE,"Calling fsync() on the AOF file.");
flushAppendOnlyFile(1);
aof_fsync(server.aof_fd);
redis_fsync(server.aof_fd);
}
/* Create a new RDB file before exiting. */
@ -2827,9 +2938,9 @@ void commandCommand(client *c) {
if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
const char *help[] = {
"(no subcommand) -- Return details about all Redis commands.",
"count -- Return the total number of commands in this Redis server.",
"getkeys <full-command> -- Return the keys from a full Redis command.",
"info [command-name ...] -- Return details about multiple Redis commands.",
"COUNT -- Return the total number of commands in this Redis server.",
"GETKEYS <full-command> -- Return the keys from a full Redis command.",
"INFO [command-name ...] -- Return details about multiple Redis commands.",
NULL
};
addReplyHelp(c, help);
@ -2853,7 +2964,10 @@ NULL
int *keys, numkeys, j;
if (!cmd) {
addReplyErrorFormat(c,"Invalid command specified");
addReplyError(c,"Invalid command specified");
return;
} else if (cmd->getkeys_proc == NULL && cmd->firstkey == 0) {
addReplyError(c,"The command has no key arguments");
return;
} else if ((cmd->arity > 0 && cmd->arity != c->argc-2) ||
((c->argc-2) < -cmd->arity))
@ -2863,11 +2977,15 @@ NULL
}
keys = getKeysFromCommand(cmd,c->argv+2,c->argc-2,&numkeys);
addReplyMultiBulkLen(c,numkeys);
for (j = 0; j < numkeys; j++) addReplyBulk(c,c->argv[keys[j]+2]);
getKeysFreeResult(keys);
if (!keys) {
addReplyError(c,"Invalid arguments specified for command");
} else {
addReplyMultiBulkLen(c,numkeys);
for (j = 0; j < numkeys; j++) addReplyBulk(c,c->argv[keys[j]+2]);
getKeysFreeResult(keys);
}
} else {
addReplyErrorFormat(c, "Unknown subcommand or wrong number of arguments for '%s'. Try COMMAND HELP", (char*)c->argv[1]->ptr);
addReplySubcommandSyntaxError(c);
}
}
@ -2879,7 +2997,6 @@ void bytesToHuman(char *s, unsigned long long n) {
if (n < 1024) {
/* Bytes */
sprintf(s,"%lluB",n);
return;
} else if (n < (1024*1024)) {
d = (double)n/(1024);
sprintf(s,"%.2fK",d);
@ -2909,7 +3026,6 @@ sds genRedisInfoString(char *section) {
time_t uptime = server.unixtime-server.stat_starttime;
int j;
struct rusage self_ru, c_ru;
unsigned long lol, bib;
int allsections = 0, defsections = 0;
int sections = 0;
@ -2919,7 +3035,6 @@ sds genRedisInfoString(char *section) {
getrusage(RUSAGE_SELF, &self_ru);
getrusage(RUSAGE_CHILDREN, &c_ru);
getClientsMaxBuffers(&lol,&bib);
/* Server */
if (allsections || defsections || !strcasecmp(section,"server")) {
@ -2989,15 +3104,17 @@ sds genRedisInfoString(char *section) {
/* Clients */
if (allsections || defsections || !strcasecmp(section,"clients")) {
size_t maxin, maxout;
getExpansiveClientsInfo(&maxin,&maxout);
if (sections++) info = sdscat(info,"\r\n");
info = sdscatprintf(info,
"# Clients\r\n"
"connected_clients:%lu\r\n"
"client_longest_output_list:%lu\r\n"
"client_biggest_input_buf:%lu\r\n"
"client_recent_max_input_buffer:%zu\r\n"
"client_recent_max_output_buffer:%zu\r\n"
"blocked_clients:%d\r\n",
listLength(server.clients)-listLength(server.slaves),
lol, bib,
maxin, maxout,
server.blocked_clients);
}
@ -3061,6 +3178,11 @@ sds genRedisInfoString(char *section) {
"rss_overhead_bytes:%zu\r\n"
"mem_fragmentation_ratio:%.2f\r\n"
"mem_fragmentation_bytes:%zu\r\n"
"mem_not_counted_for_evict:%zu\r\n"
"mem_replication_backlog:%zu\r\n"
"mem_clients_slaves:%zu\r\n"
"mem_clients_normal:%zu\r\n"
"mem_aof_buffer:%zu\r\n"
"mem_allocator:%s\r\n"
"active_defrag_running:%d\r\n"
"lazyfree_pending_objects:%zu\r\n",
@ -3093,6 +3215,11 @@ sds genRedisInfoString(char *section) {
mh->rss_extra_bytes,
mh->total_frag, /* this is the total RSS overhead, including fragmentation, */
mh->total_frag_bytes, /* named so for backwards compatibility */
freeMemoryGetNotCountedMemory(),
mh->repl_backlog,
mh->clients_slaves,
mh->clients_normal,
mh->aof_buffer,
ZMALLOC_LIB,
server.active_defrag_running,
lazyfreeGetPendingObjectsCount()
@ -3835,9 +3962,11 @@ int main(int argc, char **argv) {
spt_init(argc, argv);
#endif
setlocale(LC_COLLATE,"");
tzset(); /* Populates 'timezone' global. */
zmalloc_set_oom_handler(redisOutOfMemoryHandler);
srand(time(NULL)^getpid());
gettimeofday(&tv,NULL);
char hashseed[16];
getRandomHexChars(hashseed,sizeof(hashseed));
dictSetHashFunctionSeed((uint8_t*)hashseed);
@ -3894,7 +4023,7 @@ int main(int argc, char **argv) {
configfile = argv[j];
server.configfile = getAbsolutePath(configfile);
/* Replace the config file in server.exec_argv with
* its absoulte path. */
* its absolute path. */
zfree(server.exec_argv[j]);
server.exec_argv[j] = zstrdup(server.configfile);
j++;

View File

@ -142,6 +142,7 @@ typedef long long mstime_t; /* millisecond time type. */
#define CONFIG_DEFAULT_AOF_USE_RDB_PREAMBLE 1
#define CONFIG_DEFAULT_ACTIVE_REHASHING 1
#define CONFIG_DEFAULT_AOF_REWRITE_INCREMENTAL_FSYNC 1
#define CONFIG_DEFAULT_RDB_SAVE_INCREMENTAL_FSYNC 1
#define CONFIG_DEFAULT_MIN_SLAVES_TO_WRITE 0
#define CONFIG_DEFAULT_MIN_SLAVES_MAX_LAG 10
#define NET_IP_STR_LEN 46 /* INET6_ADDRSTRLEN is 46, but we need to be sure */
@ -183,7 +184,9 @@ typedef long long mstime_t; /* millisecond time type. */
#define PROTO_INLINE_MAX_SIZE (1024*64) /* Max size of inline reads */
#define PROTO_MBULK_BIG_ARG (1024*32)
#define LONG_STR_SIZE 21 /* Bytes needed for long -> str + '\0' */
#define AOF_AUTOSYNC_BYTES (1024*1024*32) /* fdatasync every 32MB */
#define REDIS_AUTOSYNC_BYTES (1024*1024*32) /* fdatasync every 32MB */
#define LIMIT_PENDING_QUERYBUF (4*1024*1024) /* 4mb */
/* When configuring the server eventloop, we setup it so that the total number
* of file descriptors we can handle are server.maxclients + RESERVED_FDS +
@ -616,6 +619,13 @@ typedef struct redisObject {
struct evictionPoolEntry; /* Defined in evict.c */
/* This structure is used in order to represent the output buffer of a client,
* which is actually a linked list of blocks like that, that is: client->reply. */
typedef struct clientReplyBlock {
size_t size, used;
char buf[];
} clientReplyBlock;
/* Redis database representation. There are multiple databases identified
* by integers from 0 (the default database) up to the max configured
* database. The database number is the 'id' field in the structure. */
@ -662,6 +672,7 @@ typedef struct blockingState {
robj *xread_group; /* XREADGROUP group name. */
robj *xread_consumer; /* XREADGROUP consumer name. */
mstime_t xread_retry_time, xread_retry_ttl;
int xread_group_noack;
/* BLOCKED_WAIT */
int numreplicas; /* Number of replicas we are waiting for ACK. */
@ -697,9 +708,10 @@ typedef struct client {
redisDb *db; /* Pointer to currently SELECTed DB. */
robj *name; /* As set by CLIENT SETNAME. */
sds querybuf; /* Buffer we use to accumulate client queries. */
sds pending_querybuf; /* If this is a master, this buffer represents the
yet not applied replication stream that we
are receiving from the master. */
sds pending_querybuf; /* If this client is flagged as master, this buffer
represents the yet not applied portion of the
replication stream that we are receiving from
the master. */
size_t querybuf_peak; /* Recent (100ms or more) peak of querybuf size. */
int argc; /* Num of arguments of current command. */
robj **argv; /* Arguments of current command. */
@ -881,13 +893,13 @@ typedef struct rdbSaveInfo {
#define RDB_SAVE_INFO_INIT {-1,0,"000000000000000000000000000000",-1}
typedef struct malloc_stats {
struct malloc_stats {
size_t zmalloc_used;
size_t process_rss;
size_t allocator_allocated;
size_t allocator_active;
size_t allocator_resident;
} malloc_stats;
};
/*-----------------------------------------------------------------------------
* Global server state
@ -951,6 +963,7 @@ struct redisServer {
list *clients_pending_write; /* There is to write or install handler. */
list *slaves, *monitors; /* List of slaves and MONITORs */
client *current_client; /* Current client, only used on crash report */
rax *clients_index; /* Active clients dictionary by client ID. */
int clients_paused; /* True if clients are currently paused */
mstime_t clients_pause_end_time; /* Time when we undo clients_paused */
char neterr[ANET_ERR_LEN]; /* Error buffer for anet.c */
@ -994,7 +1007,7 @@ struct redisServer {
long long slowlog_entry_id; /* SLOWLOG current entry ID */
long long slowlog_log_slower_than; /* SLOWLOG time limit (to get logged) */
unsigned long slowlog_max_len; /* SLOWLOG max number of items logged */
malloc_stats cron_malloc_stats; /* sampled in serverCron(). */
struct malloc_stats cron_malloc_stats; /* sampled in serverCron(). */
long long stat_net_input_bytes; /* Bytes read from network. */
long long stat_net_output_bytes; /* Bytes written to network. */
size_t stat_rdb_cow_bytes; /* Copy on write bytes during RDB saving. */
@ -1046,7 +1059,8 @@ struct redisServer {
time_t aof_rewrite_time_start; /* Current AOF rewrite start time. */
int aof_lastbgrewrite_status; /* C_OK or C_ERR */
unsigned long aof_delayed_fsync; /* delayed AOF fsync() counter */
int aof_rewrite_incremental_fsync;/* fsync incrementally while rewriting? */
int aof_rewrite_incremental_fsync;/* fsync incrementally while aof rewriting? */
int rdb_save_incremental_fsync; /* fsync incrementally while rdb saving? */
int aof_last_write_status; /* C_OK or C_ERR */
int aof_last_write_errno; /* Valid if aof_last_write_status is ERR */
int aof_load_truncated; /* Don't stop on unexpected AOF EOF. */
@ -1186,6 +1200,8 @@ struct redisServer {
int list_compress_depth;
/* time cache */
time_t unixtime; /* Unix time sampled every cron cycle. */
time_t timezone; /* Cached timezone. As set by tzset(). */
int daylight_active; /* Currently in daylight saving time. */
long long mstime; /* Like 'unixtime' but with milliseconds resolution. */
/* Pubsub */
dict *pubsub_channels; /* Map channels to list of subscribed clients */
@ -1410,15 +1426,17 @@ void addReplyHumanLongDouble(client *c, long double d);
void addReplyLongLong(client *c, long long ll);
void addReplyMultiBulkLen(client *c, long length);
void addReplyHelp(client *c, const char **help);
void addReplySubcommandSyntaxError(client *c);
void copyClientOutputBuffer(client *dst, client *src);
size_t sdsZmallocSize(sds s);
size_t getStringObjectSdsUsedMemory(robj *o);
void freeClientReplyValue(void *o);
void *dupClientReplyValue(void *o);
void getClientsMaxBuffers(unsigned long *longest_output_list,
unsigned long *biggest_input_buffer);
char *getClientPeerId(client *client);
sds catClientInfoString(sds s, client *client);
sds getAllClientsInfoString(void);
sds getAllClientsInfoString(int type);
void rewriteClientCommandVector(client *c, int argc, ...);
void rewriteClientCommandArgument(client *c, int i, robj *newval);
void replaceClientCommandVector(client *c, int argc, robj **argv);
@ -1499,6 +1517,7 @@ robj *tryObjectEncoding(robj *o);
robj *getDecodedObject(robj *o);
size_t stringObjectLen(robj *o);
robj *createStringObjectFromLongLong(long long value);
robj *createStringObjectFromLongLongForValue(long long value);
robj *createStringObjectFromLongDouble(long double value, int humanfriendly);
robj *createQuicklistObject(void);
robj *createZiplistObject(void);
@ -1593,11 +1612,11 @@ void receiveChildInfo(void);
#define ZADD_NONE 0
#define ZADD_INCR (1<<0) /* Increment the score instead of setting it. */
#define ZADD_NX (1<<1) /* Don't touch elements not already existing. */
#define ZADD_XX (1<<2) /* Only touch elements already exisitng. */
#define ZADD_XX (1<<2) /* Only touch elements already existing. */
/* Output flags. */
#define ZADD_NOP (1<<3) /* Operation not performed because of conditionals.*/
#define ZADD_NAN (1<<4) /* Only touch elements already exisitng. */
#define ZADD_NAN (1<<4) /* Only touch elements already existing. */
#define ZADD_ADDED (1<<5) /* The element was new and was added. */
#define ZADD_UPDATED (1<<6) /* The element already existed, score updated. */
@ -1653,6 +1672,7 @@ int zslLexValueLteMax(sds value, zlexrangespec *spec);
/* Core functions */
int getMaxmemoryState(size_t *total, size_t *logical, size_t *tofree, float *level);
size_t freeMemoryGetNotCountedMemory();
int freeMemoryIfNeeded(void);
int processCommand(client *c);
void setupSignalHandlers(void);
@ -1769,6 +1789,8 @@ robj *lookupKeyWriteOrReply(client *c, robj *key, robj *reply);
robj *lookupKeyReadWithFlags(redisDb *db, robj *key, int flags);
robj *objectCommandLookup(client *c, robj *key);
robj *objectCommandLookupOrReply(client *c, robj *key, robj *reply);
void objectSetLRUOrLFU(robj *val, long long lfu_freq, long long lru_idle,
long long lru_clock);
#define LOOKUP_NONE 0
#define LOOKUP_NOTOUCH (1<<0)
void dbAdd(redisDb *db, robj *key, robj *val);

View File

@ -142,11 +142,11 @@ void slowlogReset(void) {
void slowlogCommand(client *c) {
if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
const char *help[] = {
"get [count] -- Return top entries from the slowlog (default: 10)."
"GET [count] -- Return top entries from the slowlog (default: 10)."
" Entries are made of:",
" id, timestamp, time in microseconds, arguments array, client IP and port, client name",
"len -- Return the length of the slowlog.",
"reset -- Reset the slowlog.",
"LEN -- Return the length of the slowlog.",
"RESET -- Reset the slowlog.",
NULL
};
addReplyHelp(c, help);
@ -187,6 +187,6 @@ NULL
}
setDeferredMultiBulkLength(c,totentries,sent);
} else {
addReplyErrorFormat(c, "Unknown subcommand or wrong number of arguments for '%s'. Try SLOWLOG HELP", (char*)c->argv[1]->ptr);
addReplySubcommandSyntaxError(c);
}
}

View File

@ -193,7 +193,7 @@ void sortCommand(client *c) {
long limit_start = 0, limit_count = -1, start, end;
int j, dontsort = 0, vectorlen;
int getop = 0; /* GET operation counter */
int int_convertion_error = 0;
int int_conversion_error = 0;
int syntax_error = 0;
robj *sortval, *sortby = NULL, *storekey = NULL;
redisSortObject *vector; /* Resulting vector to sort */
@ -447,7 +447,7 @@ void sortCommand(client *c) {
serverAssertWithInfo(c,sortval,j == vectorlen);
/* Now it's time to load the right scores in the sorting vector */
if (dontsort == 0) {
if (!dontsort) {
for (j = 0; j < vectorlen; j++) {
robj *byval;
if (sortby) {
@ -469,7 +469,7 @@ void sortCommand(client *c) {
if (eptr[0] != '\0' || errno == ERANGE ||
isnan(vector[j].u.score))
{
int_convertion_error = 1;
int_conversion_error = 1;
}
} else if (byval->encoding == OBJ_ENCODING_INT) {
/* Don't need to decode the object if it's
@ -487,9 +487,7 @@ void sortCommand(client *c) {
decrRefCount(byval);
}
}
}
if (dontsort == 0) {
server.sort_desc = desc;
server.sort_alpha = alpha;
server.sort_bypattern = sortby ? 1 : 0;
@ -503,7 +501,7 @@ void sortCommand(client *c) {
/* Send command output to the output buffer, performing the specified
* GET/DEL/INCR/DECR operations if any. */
outputlen = getop ? getop*(end-start+1) : end-start+1;
if (int_convertion_error) {
if (int_conversion_error) {
addReplyError(c,"One or more scores can't be converted into double");
} else if (storekey == NULL) {
/* STORE option not specified, sent the sorting result to client */

View File

@ -108,5 +108,6 @@ streamConsumer *streamLookupConsumer(streamCG *cg, sds name, int create);
streamCG *streamCreateCG(stream *s, char *name, size_t namelen, streamID *id);
streamNACK *streamCreateNACK(streamConsumer *consumer);
void streamDecodeID(void *buf, streamID *id);
int streamCompareID(streamID *a, streamID *b);
#endif

View File

@ -207,7 +207,7 @@ int streamAppendItem(stream *s, robj **argv, int64_t numfields, streamID *added_
/* Create a new listpack and radix tree node if needed. Note that when
* a new listpack is created, we populate it with a "master entry". This
* is just a set of fields that is taken as refernce in order to compress
* is just a set of fields that is taken as references in order to compress
* the stream entries that we'll add inside the listpack.
*
* Note that while we use the first added entry fields to create
@ -469,7 +469,7 @@ int64_t streamTrimByLength(stream *s, size_t maxlen, int approx) {
* iteration is from the start to the end element (inclusive), otherwise
* if rev is non-zero, the iteration is reversed.
*
* Once the iterator is initalized, we iterate like this:
* Once the iterator is initialized, we iterate like this:
*
* streamIterator myiterator;
* streamIteratorStart(&myiterator,...);
@ -705,10 +705,22 @@ void streamIteratorRemoveEntry(streamIterator *si, streamID *current) {
/* Change the valid/deleted entries count in the master entry. */
unsigned char *p = lpFirst(lp);
aux = lpGetInteger(p);
lp = lpReplaceInteger(lp,&p,aux-1);
p = lpNext(lp,p); /* Seek deleted field. */
aux = lpGetInteger(p);
lp = lpReplaceInteger(lp,&p,aux+1);
if (aux == 1) {
/* If this is the last element in the listpack, we can remove the whole
* node. */
lpFree(lp);
raxRemove(si->stream->rax,si->ri.key,si->ri.key_len,NULL);
} else {
/* In the base case we alter the counters of valid/deleted entries. */
lp = lpReplaceInteger(lp,&p,aux-1);
p = lpNext(lp,p); /* Seek deleted field. */
aux = lpGetInteger(p);
lp = lpReplaceInteger(lp,&p,aux+1);
}
/* Update the number of entries counter. */
si->stream->length--;
/* Update the number of entries counter. */
si->stream->length--;
@ -834,7 +846,7 @@ void streamPropagateXCLAIM(client *c, robj *key, robj *group, robj *id, streamNA
* given, but currently such a feature is never used by the code base that
* will always pass 'spi' and propagate when a group is passed.
*
* Note that this function is recursive in certian cases. When it's called
* Note that this function is recursive in certain cases. When it's called
* with a non NULL group and consumer argument, it may call
* streamReplyWithRangeFromConsumerPEL() in order to get entries from the
* consumer pending entries list. However such a function will then call
@ -905,24 +917,26 @@ size_t streamReplyWithRange(client *c, stream *s, streamID *start, streamID *end
* will not require extra lookups. We'll fix the problem later
* if we find that there is already a entry for this ID. */
streamNACK *nack = streamCreateNACK(consumer);
int retval = 0;
retval += raxTryInsert(group->pel,buf,sizeof(buf),nack,NULL);
retval += raxTryInsert(consumer->pel,buf,sizeof(buf),nack,NULL);
int group_inserted =
raxTryInsert(group->pel,buf,sizeof(buf),nack,NULL);
int consumer_inserted =
raxTryInsert(consumer->pel,buf,sizeof(buf),nack,NULL);
/* Now we can check if the entry was already busy, and
* in that case reassign the entry to the new consumer. */
if (retval == 0) {
* in that case reassign the entry to the new consumer,
* or update it if the consumer is the same as before. */
if (group_inserted == 0) {
streamFreeNACK(nack);
nack = raxFind(group->pel,buf,sizeof(buf));
serverAssert(nack != raxNotFound);
raxRemove(nack->consumer->pel,buf,sizeof(buf),NULL);
/* Update the consumer and idle time. */
/* Update the consumer and NACK metadata. */
nack->consumer = consumer;
nack->delivery_time = mstime();
nack->delivery_count++;
nack->delivery_count = 1;
/* Add the entry in the new consumer local PEL. */
raxInsert(consumer->pel,buf,sizeof(buf),nack,NULL);
} else if (retval == 1) {
} else if (group_inserted == 1 && consumer_inserted == 0) {
serverPanic("NACK half-created. Should not be possible.");
}
@ -945,7 +959,7 @@ size_t streamReplyWithRange(client *c, stream *s, streamID *start, streamID *end
/* This is an helper function for streamReplyWithRange() when called with
* group and consumer arguments, but with a range that is referring to already
* delivered messages. In this case we just emit messages that are already
* in the history of the conusmer, fetching the IDs from its PEL.
* in the history of the consumer, fetching the IDs from its PEL.
*
* Note that this function does not have a 'rev' argument because it's not
* possible to iterate in reverse using a group. Basically this function
@ -1081,7 +1095,7 @@ invalid:
void xaddCommand(client *c) {
streamID id;
int id_given = 0; /* Was an ID different than "*" specified? */
long long maxlen = 0; /* 0 means no maximum length. */
long long maxlen = -1; /* If left to -1 no trimming is performed. */
int approx_maxlen = 0; /* If 1 only delete whole radix tree nodes, so
the maxium length is not applied verbatim. */
int maxlen_arg_idx = 0; /* Index of the count in MAXLEN, for rewriting. */
@ -1105,6 +1119,11 @@ void xaddCommand(client *c) {
}
if (getLongLongFromObjectOrReply(c,c->argv[i+1],&maxlen,NULL)
!= C_OK) return;
if (maxlen < 0) {
addReplyError(c,"The MAXLEN argument must be >= 0.");
return;
}
i++;
maxlen_arg_idx = i;
} else {
@ -1144,7 +1163,7 @@ void xaddCommand(client *c) {
server.dirty++;
/* Remove older elements if MAXLEN was specified. */
if (maxlen) {
if (maxlen >= 0) {
if (!streamTrimByLength(s,maxlen,approx_maxlen)) {
/* If no trimming was performed, for instance because approximated
* trimming length was specified, rewrite the MAXLEN argument
@ -1335,6 +1354,14 @@ void xreadCommand(client *c) {
}
if (strcmp(c->argv[i]->ptr,"$") == 0) {
if (xreadgroup) {
addReplyError(c,"The $ ID is meaningless in the context of "
"XREADGROUP: you want to read the history of "
"this consumer by specifying a proper ID, or "
"use the > ID to get new messages. The $ ID would "
"just return an empty result set.");
goto cleanup;
}
if (o) {
stream *s = o->ptr;
ids[id_idx] = s->last_id;
@ -1344,13 +1371,17 @@ void xreadCommand(client *c) {
}
continue;
} else if (strcmp(c->argv[i]->ptr,">") == 0) {
if (!xreadgroup || groupname == NULL) {
if (!xreadgroup) {
addReplyError(c,"The > ID can be specified only when calling "
"XREADGROUP using the GROUP <group> "
"<consumer> option.");
goto cleanup;
}
ids[id_idx] = group->last_id;
/* We use just the maximum ID to signal this is a ">" ID, anyway
* the code handling the blocking clients will have to update the
* ID later in order to match the changing consumer group last ID. */
ids[id_idx].ms = UINT64_MAX;
ids[id_idx].seq = UINT64_MAX;
continue;
}
if (streamParseIDOrReply(c,c->argv[i],ids+id_idx,0) != C_OK)
@ -1365,9 +1396,36 @@ void xreadCommand(client *c) {
if (o == NULL) continue;
stream *s = o->ptr;
streamID *gt = ids+i; /* ID must be greater than this. */
if (s->last_id.ms > gt->ms ||
(s->last_id.ms == gt->ms && s->last_id.seq > gt->seq))
{
int serve_synchronously = 0;
/* Check if there are the conditions to serve the client synchronously. */
if (groups) {
/* If the consumer is blocked on a group, we always serve it
* synchronously (serving its local history) if the ID specified
* was not the special ">" ID. */
if (gt->ms != UINT64_MAX ||
gt->seq != UINT64_MAX)
{
serve_synchronously = 1;
} else {
/* We also want to serve a consumer in a consumer group
* synchronously in case the group top item delivered is smaller
* than what the stream has inside. */
streamID *last = &groups[i]->last_id;
if (streamCompareID(&s->last_id, last) > 0) {
serve_synchronously = 1;
*gt = *last;
}
}
} else {
/* For consumers without a group, we serve synchronously if we can
* actually provide at least one item from the stream. */
if (streamCompareID(&s->last_id, gt) > 0) {
serve_synchronously = 1;
}
}
if (serve_synchronously) {
arraylen++;
if (arraylen == 1) arraylen_ptr = addDeferredMultiBulkLength(c);
/* streamReplyWithRange() handles the 'start' ID as inclusive,
@ -1421,6 +1479,7 @@ void xreadCommand(client *c) {
incrRefCount(consumername);
c->bpop.xread_group = groupname;
c->bpop.xread_consumer = consumername;
c->bpop.xread_group_noack = noack;
} else {
c->bpop.xread_group = NULL;
c->bpop.xread_consumer = NULL;
@ -1566,7 +1625,7 @@ void xgroupCommand(client *c) {
"CREATE <key> <groupname> <id or $> -- Create a new consumer group.",
"SETID <key> <groupname> <id or $> -- Set the current group ID.",
"DESTROY <key> <groupname> -- Remove the specified group.",
"DELCONSUMER <key> <groupname> <consumer> -- Remove the specified conusmer.",
"DELCONSUMER <key> <groupname> <consumer> -- Remove the specified consumer.",
"HELP -- Prints this help.",
NULL
};
@ -1645,7 +1704,7 @@ NULL
} else if (!strcasecmp(opt,"HELP")) {
addReplyHelp(c, help);
} else {
addReply(c,shared.syntaxerr);
addReplySubcommandSyntaxError(c);
}
}
@ -2062,7 +2121,7 @@ void xclaimCommand(client *c) {
/* XDEL <key> [<ID1> <ID2> ... <IDN>]
*
* Removes the specified entries from the stream. Returns the number
* of items actaully deleted, that may be different from the number
* of items actually deleted, that may be different from the number
* of IDs passed in case certain IDs do not exist. */
void xdelCommand(client *c) {
robj *o;
@ -2073,21 +2132,25 @@ void xdelCommand(client *c) {
/* We need to sanity check the IDs passed to start. Even if not
* a big issue, it is not great that the command is only partially
* executed becuase at some point an invalid ID is parsed. */
* executed because at some point an invalid ID is parsed. */
streamID id;
for (int j = 2; j < c->argc; j++) {
if (streamParseIDOrReply(c,c->argv[j],&id,0) != C_OK) return;
}
/* Actaully apply the command. */
/* Actually apply the command. */
int deleted = 0;
for (int j = 2; j < c->argc; j++) {
streamParseIDOrReply(c,c->argv[j],&id,0); /* Retval already checked. */
deleted += streamDeleteItem(s,&id);
}
signalModifiedKey(c->db,c->argv[1]);
notifyKeyspaceEvent(NOTIFY_STREAM,"xdel",c->argv[1],c->db->id);
server.dirty += deleted;
/* Propagate the write if needed. */
if (deleted) {
signalModifiedKey(c->db,c->argv[1]);
notifyKeyspaceEvent(NOTIFY_STREAM,"xdel",c->argv[1],c->db->id);
server.dirty += deleted;
}
addReplyLongLong(c,deleted);
}
@ -2236,18 +2299,20 @@ NULL
raxSeek(&ri,"^",NULL,0);
while(raxNext(&ri)) {
streamCG *cg = ri.data;
addReplyMultiBulkLen(c,6);
addReplyMultiBulkLen(c,8);
addReplyStatus(c,"name");
addReplyBulkCBuffer(c,ri.key,ri.key_len);
addReplyStatus(c,"consumers");
addReplyLongLong(c,raxSize(cg->consumers));
addReplyStatus(c,"pending");
addReplyLongLong(c,raxSize(cg->pel));
addReplyStatus(c,"last-delivered-id");
addReplyStreamID(c,&cg->last_id);
}
raxStop(&ri);
} else if (!strcasecmp(opt,"STREAM") && c->argc == 3) {
/* XINFO STREAM <key> (or the alias XINFO <key>). */
addReplyMultiBulkLen(c,12);
addReplyMultiBulkLen(c,14);
addReplyStatus(c,"length");
addReplyLongLong(c,s->length);
addReplyStatus(c,"radix-tree-keys");
@ -2256,6 +2321,8 @@ NULL
addReplyLongLong(c,s->rax->numnodes);
addReplyStatus(c,"groups");
addReplyLongLong(c,s->cgroups ? raxSize(s->cgroups) : 0);
addReplyStatus(c,"last-generated-id");
addReplyStreamID(c,&s->last_id);
/* To emit the first/last entry we us the streamReplyWithRange()
* API. */
@ -2272,7 +2339,7 @@ NULL
STREAM_RWR_RAWENTRIES,NULL);
if (!count) addReply(c,shared.nullbulk);
} else {
addReplyError(c,"syntax error, try 'XINFO HELP'");
addReplySubcommandSyntaxError(c);
}
}

View File

@ -361,7 +361,7 @@ void incrDecrCommand(client *c, long long incr) {
new = o;
o->ptr = (void*)((long)value);
} else {
new = createStringObjectFromLongLong(value);
new = createStringObjectFromLongLongForValue(value);
if (o) {
dbOverwrite(c->db,c->argv[1],new);
} else {

View File

@ -507,7 +507,7 @@ static int zslParseRange(robj *min, robj *max, zrangespec *spec) {
* + means the max string possible
*
* If the string is valid the *dest pointer is set to the redis object
* that will be used for the comparision, and ex will be set to 0 or 1
* that will be used for the comparison, and ex will be set to 0 or 1
* respectively if the item is exclusive or inclusive. C_OK will be
* returned.
*

View File

@ -451,7 +451,7 @@ int string2ld(const char *s, size_t slen, long double *dp) {
/* Convert a double to a string representation. Returns the number of bytes
* required. The representation should always be parsable by strtod(3).
* This function does not support human-friendly formatting like ld2string
* does. It is intented mainly to be used inside t_zset.c when writing scores
* does. It is intended mainly to be used inside t_zset.c when writing scores
* into a ziplist representing a sorted set. */
int d2string(char *buf, size_t len, double value) {
if (isnan(value)) {

View File

@ -269,7 +269,7 @@
* Note that this is not how the data is actually encoded, is just what we
* get filled by a function in order to operate more easily. */
typedef struct zlentry {
unsigned int prevrawlensize; /* Bytes used to encode the previos entry len*/
unsigned int prevrawlensize; /* Bytes used to encode the previous entry len*/
unsigned int prevrawlen; /* Previous entry len. */
unsigned int lensize; /* Bytes used to encode this entry type/len.
For example strings have a 1, 2 or 5 bytes
@ -431,7 +431,7 @@ unsigned int zipStorePrevEntryLength(unsigned char *p, unsigned int len) {
/* Return the length of the previous element, and the number of bytes that
* are used in order to encode the previous element length.
* 'ptr' must point to the prevlen prefix of an entry (that encodes the
* length of the previos entry in order to navigate the elements backward).
* length of the previous entry in order to navigate the elements backward).
* The length of the previous entry is stored in 'prevlen', the number of
* bytes needed to encode the previous entry length are stored in
* 'prevlensize'. */

View File

@ -165,7 +165,7 @@ void *zrealloc(void *ptr, size_t size) {
*((size_t*)newptr) = size;
update_zmalloc_stat_free(oldsize);
update_zmalloc_stat_alloc(size);
update_zmalloc_stat_alloc(size+PREFIX_SIZE);
return (char*)newptr+PREFIX_SIZE;
#endif
}
@ -182,6 +182,9 @@ size_t zmalloc_size(void *ptr) {
if (size&(sizeof(long)-1)) size += sizeof(long)-(size&(sizeof(long)-1));
return size+PREFIX_SIZE;
}
size_t zmalloc_usable(void *ptr) {
return zmalloc_usable(ptr)-PREFIX_SIZE;
}
#endif
void zfree(void *ptr) {
@ -380,7 +383,7 @@ size_t zmalloc_get_private_dirty(long pid) {
}
/* Returns the size of physical memory (RAM) in bytes.
* It looks ugly, but this is the cleanest way to achive cross platform results.
* It looks ugly, but this is the cleanest way to achieve cross platform results.
* Cleaned up from:
*
* http://nadeausoftware.com/articles/2012/09/c_c_tip_how_get_physical_memory_size_system

View File

@ -63,6 +63,11 @@
#ifndef ZMALLOC_LIB
#define ZMALLOC_LIB "libc"
#ifdef __GLIBC__
#include <malloc.h>
#define HAVE_MALLOC_SIZE 1
#define zmalloc_size(p) malloc_usable_size(p)
#endif
#endif
/* We can enable the Redis defrag capabilities only if we are using Jemalloc
@ -93,6 +98,9 @@ void *zmalloc_no_tcache(size_t size);
#ifndef HAVE_MALLOC_SIZE
size_t zmalloc_size(void *ptr);
size_t zmalloc_usable(void *ptr);
#else
#define zmalloc_usable(p) zmalloc_size(p)
#endif
#endif /* __ZMALLOC_H */

View File

@ -92,3 +92,80 @@ test "Node #10 should eventually replicate node #5" {
fail "#10 didn't became slave of #5"
}
}
source "../tests/includes/init-tests.tcl"
# Create a cluster with 3 master and 15 slaves, so that we have 5
# slaves for eatch master.
test "Create a 3 nodes cluster" {
create_cluster 3 15
}
test "Cluster is up" {
assert_cluster_state ok
}
test "The first master has actually 5 slaves" {
assert {[llength [lindex [R 0 role] 2]] == 5}
}
test {Slaves of #0 are instance #3, #6, #9, #12 and #15 as expected} {
set port0 [get_instance_attrib redis 0 port]
assert {[lindex [R 3 role] 2] == $port0}
assert {[lindex [R 6 role] 2] == $port0}
assert {[lindex [R 9 role] 2] == $port0}
assert {[lindex [R 12 role] 2] == $port0}
assert {[lindex [R 15 role] 2] == $port0}
}
test {Instance #3, #6, #9, #12 and #15 synced with the master} {
wait_for_condition 1000 50 {
[RI 3 master_link_status] eq {up} &&
[RI 6 master_link_status] eq {up} &&
[RI 9 master_link_status] eq {up} &&
[RI 12 master_link_status] eq {up} &&
[RI 15 master_link_status] eq {up}
} else {
fail "Instance #3 or #6 or #9 or #12 or #15 master link status is not up"
}
}
proc master_detected {instances} {
foreach instance [dict keys $instances] {
if {[RI $instance role] eq {master}} {
return true
}
}
return false
}
test "New Master down consecutively" {
set instances [dict create 0 1 3 1 6 1 9 1 12 1 15 1]
set loops [expr {[dict size $instances]-1}]
for {set i 0} {$i < $loops} {incr i} {
set master_id -1
foreach instance [dict keys $instances] {
if {[RI $instance role] eq {master}} {
set master_id $instance
break;
}
}
if {$master_id eq -1} {
fail "no master detected, #loop $i"
}
set instances [dict remove $instances $master_id]
kill_instance redis $master_id
wait_for_condition 1000 50 {
[master_detected $instances]
} else {
failover "No failover detected when master $master_id fails"
}
assert_cluster_state ok
}
}

View File

@ -39,6 +39,25 @@ start_server [list overrides [list "dir" $server_path]] {
} {0000000000000000000000000000000000000000}
}
start_server [list overrides [list "dir" $server_path]] {
test {Test RDB stream encoding} {
for {set j 0} {$j < 1000} {incr j} {
if {rand() < 0.9} {
r xadd stream * foo $j
} else {
r xadd stream * bar $j
}
}
r xgroup create stream mygroup 0
r xreadgroup GROUP mygroup Alice COUNT 1 STREAMS stream >
set digest [r debug digest]
r debug reload
set newdigest [r debug digest]
assert {$digest eq $newdigest}
r del stream
}
}
# Helper function to start a server and kill it, just to check the error
# logged.
set defaults {}

View File

@ -11,7 +11,7 @@ proc stop_bg_complex_data {handle} {
# partial resyncs attempts, all this while flooding the master with
# write queries.
#
# You can specifiy backlog size, ttl, delay before reconnection, test duration
# You can specify backlog size, ttl, delay before reconnection, test duration
# in seconds, and an additional condition to verify at the end.
#
# If reconnect is > 0, the test actually try to break the connection and

View File

@ -66,3 +66,13 @@ test "SDOWN is triggered by misconfigured instance repling with errors" {
R 0 bgsave
ensure_master_up
}
# We use this test setup to also test command renaming, as a side
# effect of the master going down if we send PONG instead of PING
test "SDOWN is triggered if we rename PING to PONG" {
ensure_master_up
S 4 SENTINEL SET mymaster rename-command PING PONG
ensure_master_down
S 4 SENTINEL SET mymaster rename-command PING PING
ensure_master_up
}

View File

@ -276,6 +276,12 @@ proc start_server {options {code undefined}} {
error_and_quit $config_file $line
}
if {$::wait_server} {
set msg "server started PID: [dict get $srv "pid"]. press any key to continue..."
puts $msg
read stdin 1
}
while 1 {
# check that the server actually started and is ready for connections
if {[exec grep -i "Ready to accept" | wc -l < $stdout] > 0} {

View File

@ -375,3 +375,19 @@ proc start_write_load {host port seconds} {
proc stop_write_load {handle} {
catch {exec /bin/kill -9 $handle}
}
proc K { x y } { set x }
# Shuffle a list. From Tcl wiki. Originally from Steve Cohen that improved
# other versions. Code should be under public domain.
proc lshuffle {list} {
set n [llength $list]
while {$n>0} {
set j [expr {int(rand()*$n)}]
lappend slist [lindex $list $j]
incr n -1
set temp [lindex $list $n]
set list [lreplace [K $list [set list {}]] $j $j $temp]
}
return $slist
}

View File

@ -61,6 +61,7 @@ set ::all_tests {
unit/hyperloglog
unit/lazyfree
unit/wait
unit/pendingquerybuf
}
# Index to the next test to run in the ::all_tests list.
set ::next_test 0
@ -82,6 +83,8 @@ set ::force_failure 0
set ::timeout 600; # 10 minutes without progresses will quit the test.
set ::last_progress [clock seconds]
set ::active_servers {} ; # Pids of active Redis instances.
set ::dont_clean 0
set ::wait_server 0
# Set to 1 when we are running in client mode. The Redis test uses a
# server-client model to run tests simultaneously. The server instance
@ -175,6 +178,9 @@ proc s {args} {
}
proc cleanup {} {
if {$::dont_clean} {
return
}
if {!$::quiet} {puts -nonewline "Cleanup: may take some time... "}
flush stdout
catch {exec rm -rf {*}[glob tests/tmp/redis.conf.*]}
@ -224,6 +230,7 @@ proc test_server_cron {} {
if {$elapsed > $::timeout} {
set err "\[[colorstr red TIMEOUT]\]: clients state report follows."
puts $err
lappend ::failed_tests $err
show_clients_state
kill_clients
force_kill_all_servers
@ -410,6 +417,8 @@ proc print_help_screen {} {
"--clients <num> Number of test clients (default 16)."
"--timeout <sec> Test timeout in seconds (default 10 min)."
"--force-failure Force the execution of a test that always fails."
"--dont-clean don't delete redis log files after the run"
"--wait-server wait after server is started (so that you can attach a debugger)"
"--help Print this help screen."
} "\n"]
}
@ -463,6 +472,10 @@ for {set j 0} {$j < [llength $argv]} {incr j} {
} elseif {$opt eq {--clients}} {
set ::numclients $arg
incr j
} elseif {$opt eq {--dont-clean}} {
set ::dont_clean 1
} elseif {$opt eq {--wait-server}} {
set ::wait_server 1
} elseif {$opt eq {--timeout}} {
set ::timeout $arg
incr j

View File

@ -25,6 +25,39 @@ start_server {tags {"dump"}} {
assert {$ttl >= (2569591501-3000) && $ttl <= 2569591501}
r get foo
} {bar}
test {RESTORE can set an absolute expire} {
r set foo bar
set encoded [r dump foo]
r del foo
set now [clock milliseconds]
r restore foo [expr $now+3000] $encoded absttl
set ttl [r pttl foo]
assert {$ttl >= 2998 && $ttl <= 3000}
r get foo
} {bar}
test {RESTORE can set LRU} {
r set foo bar
set encoded [r dump foo]
r del foo
r config set maxmemory-policy allkeys-lru
r restore foo 0 $encoded idletime 1000
set idle [r object idletime foo]
assert {$idle >= 1000 && $idle <= 1002}
r get foo
} {bar}
test {RESTORE can set LFU} {
r set foo bar
set encoded [r dump foo]
r del foo
r config set maxmemory-policy allkeys-lfu
r restore foo 0 $encoded freq 100
set freq [r object freq foo]
assert {$freq == 100}
r get foo
} {bar}
test {RESTORE returns an error of the key already exists} {
r set foo bar
@ -246,7 +279,7 @@ start_server {tags {"dump"}} {
set e
} {*empty string*}
test {MIGRATE with mutliple keys migrate just existing ones} {
test {MIGRATE with multiple keys migrate just existing ones} {
set first [srv 0 client]
r set key1 "v1"
r set key2 "v2"

View File

@ -121,7 +121,7 @@ start_server {tags {"expire"}} {
list $a $b
} {somevalue {}}
test {TTL returns tiem to live in seconds} {
test {TTL returns time to live in seconds} {
r del x
r setex x 10 somevalue
set ttl [r ttl x]

View File

@ -142,3 +142,95 @@ start_server {tags {"maxmemory"}} {
}
}
}
proc test_slave_buffers {cmd_count payload_len limit_memory pipeline} {
start_server {tags {"maxmemory"}} {
start_server {} {
set slave [srv 0 client]
set slave_host [srv 0 host]
set slave_port [srv 0 port]
set master [srv -1 client]
set master_host [srv -1 host]
set master_port [srv -1 port]
# add 100 keys of 100k (10MB total)
for {set j 0} {$j < 100} {incr j} {
$master setrange "key:$j" 100000 asdf
}
$master config set maxmemory-policy allkeys-random
$master config set client-output-buffer-limit "slave 100000000 100000000 60"
$master config set repl-backlog-size [expr {10*1024}]
$slave slaveof $master_host $master_port
wait_for_condition 50 100 {
[s 0 master_link_status] eq {up}
} else {
fail "Replication not started."
}
# measure used memory after the slave connected and set maxmemory
set orig_used [s -1 used_memory]
set orig_client_buf [s -1 mem_clients_normal]
set orig_mem_not_counted_for_evict [s -1 mem_not_counted_for_evict]
set orig_used_no_repl [expr {$orig_used - $orig_mem_not_counted_for_evict}]
set limit [expr {$orig_used - $orig_mem_not_counted_for_evict + 20*1024}]
if {$limit_memory==1} {
$master config set maxmemory $limit
}
# put the slave to sleep
set rd_slave [redis_deferring_client]
$rd_slave debug sleep 60
# send some 10mb woth of commands that don't increase the memory usage
if {$pipeline == 1} {
set rd_master [redis_deferring_client -1]
for {set k 0} {$k < $cmd_count} {incr k} {
$rd_master setrange key:0 0 [string repeat A $payload_len]
}
for {set k 0} {$k < $cmd_count} {incr k} {
#$rd_master read
}
} else {
for {set k 0} {$k < $cmd_count} {incr k} {
$master setrange key:0 0 [string repeat A $payload_len]
}
}
set new_used [s -1 used_memory]
set slave_buf [s -1 mem_clients_slaves]
set client_buf [s -1 mem_clients_normal]
set mem_not_counted_for_evict [s -1 mem_not_counted_for_evict]
set used_no_repl [expr {$new_used - $mem_not_counted_for_evict}]
set delta [expr {($used_no_repl - $client_buf) - ($orig_used_no_repl - $orig_client_buf)}]
assert {[$master dbsize] == 100}
assert {$slave_buf > 2*1024*1024} ;# some of the data may have been pushed to the OS buffers
assert {$delta < 50*1024 && $delta > -50*1024} ;# 1 byte unaccounted for, with 1M commands will consume some 1MB
$master client kill type slave
set killed_used [s -1 used_memory]
set killed_slave_buf [s -1 mem_clients_slaves]
set killed_mem_not_counted_for_evict [s -1 mem_not_counted_for_evict]
set killed_used_no_repl [expr {$killed_used - $killed_mem_not_counted_for_evict}]
set delta_no_repl [expr {$killed_used_no_repl - $used_no_repl}]
assert {$killed_slave_buf == 0}
assert {$delta_no_repl > -50*1024 && $delta_no_repl < 50*1024} ;# 1 byte unaccounted for, with 1M commands will consume some 1MB
}
}
}
test {slave buffer are counted correctly} {
# we wanna use many small commands, and we don't wanna wait long
# so we need to use a pipeline (redis_deferring_client)
# that may cause query buffer to fill and induce eviction, so we disable it
test_slave_buffers 1000000 10 0 1
}
test {slave buffer don't induce eviction} {
# test again with fewer (and bigger) commands without pipeline, but with eviction
test_slave_buffers 100000 100 1 0
}

View File

@ -41,7 +41,7 @@ start_server {tags {"defrag"}} {
test "Active defrag" {
r config set activedefrag no
r config set active-defrag-threshold-lower 5
r config set active-defrag-cycle-min 25
r config set active-defrag-cycle-min 65
r config set active-defrag-cycle-max 75
r config set active-defrag-ignore-bytes 2mb
r config set maxmemory 100mb
@ -66,9 +66,10 @@ start_server {tags {"defrag"}} {
}
# Wait for the active defrag to stop working.
wait_for_condition 100 100 {
wait_for_condition 150 100 {
[s active_defrag_running] eq 0
} else {
after 120 ;# serverCron only updates the info once in 100ms
puts [r info memory]
puts [r memory malloc-stats]
fail "defrag didn't stop."
@ -97,10 +98,15 @@ start_server {tags {"defrag"}} {
r config set active-defrag-ignore-bytes 2mb
r config set maxmemory 0
r config set list-max-ziplist-size 5 ;# list of 10k items will have 2000 quicklist nodes
r config set stream-node-max-entries 5
r hmset hash h1 v1 h2 v2 h3 v3
r lpush list a b c d
r zadd zset 0 a 1 b 2 c 3 d
r sadd set a b c d
r xadd stream * item 1 value a
r xadd stream * item 2 value b
r xgroup create stream mygroup 0
r xreadgroup GROUP mygroup Alice COUNT 1 STREAMS stream >
# create big keys with 10k items
set rd [redis_deferring_client]
@ -109,8 +115,9 @@ start_server {tags {"defrag"}} {
$rd lpush biglist [concat "asdfasdfasdf" $j]
$rd zadd bigzset $j [concat "asdfasdfasdf" $j]
$rd sadd bigset [concat "asdfasdfasdf" $j]
$rd xadd bigstream * item 1 value a
}
for {set j 0} {$j < 40000} {incr j} {
for {set j 0} {$j < 50000} {incr j} {
$rd read ; # Discard replies
}
@ -134,7 +141,7 @@ start_server {tags {"defrag"}} {
for {set j 0} {$j < 500000} {incr j} {
$rd read ; # Discard replies
}
assert {[r dbsize] == 500008}
assert {[r dbsize] == 500010}
# create some fragmentation
for {set j 0} {$j < 500000} {incr j 2} {
@ -143,7 +150,7 @@ start_server {tags {"defrag"}} {
for {set j 0} {$j < 500000} {incr j 2} {
$rd read ; # Discard replies
}
assert {[r dbsize] == 250008}
assert {[r dbsize] == 250010}
# start defrag
after 120 ;# serverCron only updates the info once in 100ms
@ -155,6 +162,7 @@ start_server {tags {"defrag"}} {
r config set latency-monitor-threshold 5
r latency reset
set digest [r debug digest]
catch {r config set activedefrag yes} e
if {![string match {DISABLED*} $e]} {
# wait for the active defrag to start working (decision once a second)
@ -168,6 +176,7 @@ start_server {tags {"defrag"}} {
wait_for_condition 500 100 {
[s active_defrag_running] eq 0
} else {
after 120 ;# serverCron only updates the info once in 100ms
puts [r info memory]
puts [r memory malloc-stats]
fail "defrag didn't stop."
@ -193,9 +202,11 @@ start_server {tags {"defrag"}} {
# due to high fragmentation, 10hz, and active-defrag-cycle-max set to 75,
# we expect max latency to be not much higher than 75ms
assert {$max_latency <= 80}
} else {
set _ ""
}
} {}
# verify the data isn't corrupted or changed
set newdigest [r debug digest]
assert {$digest eq $newdigest}
r save ;# saving an rdb iterates over all the data / pointers
} {OK}
}
}

View File

@ -0,0 +1,35 @@
proc info_memory {r property} {
if {[regexp "\r\n$property:(.*?)\r\n" [{*}$r info memory] _ value]} {
set _ $value
}
}
proc prepare_value {size} {
set _v "c"
for {set i 1} {$i < $size} {incr i} {
append _v 0
}
return $_v
}
start_server {tags {"wait"}} {
start_server {} {
set slave [srv 0 client]
set slave_host [srv 0 host]
set slave_port [srv 0 port]
set master [srv -1 client]
set master_host [srv -1 host]
set master_port [srv -1 port]
test "pending querybuf: check size of pending_querybuf after set a big value" {
$slave slaveof $master_host $master_port
set _v [prepare_value [expr 32*1024*1024]]
$master set key $_v
after 2000
set m_usedmemory [info_memory $master used_memory]
set s_usedmemory [info_memory $slave used_memory]
if { $s_usedmemory > $m_usedmemory + 10*1024*1024 } {
fail "the used_memory of slave is too larger than master.Master:$m_usedmemory Slave:$s_usedmemory"
}
}
}}

View File

@ -246,7 +246,7 @@ start_server {tags {"scan"}} {
array set found {}
# Populate the set
set numele [expr {100+[randomInt 1000]}]
set numele [expr {101+[randomInt 1000]}]
for {set j 0} {$j < $numele} {incr j} {
r sadd set $j
if {$j >= 100} {

View File

@ -517,7 +517,7 @@ start_server {tags {"scripting"}} {
# Note: keep this test at the end of this server stanza because it
# kills the server.
test {SHUTDOWN NOSAVE can kill a timedout script anyway} {
# The server sould be still unresponding to normal commands.
# The server could be still unresponding to normal commands.
catch {r ping} e
assert_match {BUSY*} $e
catch {r shutdown nosave}

View File

@ -78,4 +78,14 @@ start_server {tags {"slowlog"} overrides {slowlog-log-slower-than 1000000}} {
set e [lindex [r slowlog get] 0]
assert_equal {lastentry_client} [lindex $e 5]
}
test {SLOWLOG - can be disabled} {
r config set slowlog-log-slower-than 1
r slowlog reset
assert_equal [r slowlog len] 1
r config set slowlog-log-slower-than -1
r slowlog reset
r debug sleep 0.2
assert_equal [r slowlog len] 0
}
}

View File

@ -81,4 +81,19 @@ start_server {
# just ID2.
assert {[r XACK mystream mygroup $id1 $id2] eq 1}
}
test {PEL NACK reassignment after XGROUP SETID event} {
r del events
r xadd events * f1 v1
r xadd events * f1 v1
r xadd events * f1 v1
r xadd events * f1 v1
r xgroup create events g1 $
r xadd events * f1 v1
set c [llength [lindex [r xreadgroup group g1 c1 streams events >] 0 1]]
assert {$c == 1}
r xgroup setid events g1 -
set c [llength [lindex [r xreadgroup group g1 c2 streams events >] 0 1]]
assert {$c == 5}
}
}

View File

@ -234,6 +234,53 @@ start_server {
assert {[lindex $res 0 1 1 1] eq {field two}}
}
test {XDEL basic test} {
r del somestream
r xadd somestream * foo value0
set id [r xadd somestream * foo value1]
r xadd somestream * foo value2
r xdel somestream $id
assert {[r xlen somestream] == 2}
set result [r xrange somestream - +]
assert {[lindex $result 0 1 1] eq {value0}}
assert {[lindex $result 1 1 1] eq {value2}}
}
# Here the idea is to check the consistency of the stream data structure
# as we remove all the elements down to zero elements.
test {XDEL fuzz test} {
r del somestream
set ids {}
set x 0; # Length of the stream
while 1 {
lappend ids [r xadd somestream * item $x]
incr x
# Add enough elements to have a few radix tree nodes inside the stream.
if {[dict get [r xinfo stream somestream] radix-tree-keys] > 20} break
}
# Now remove all the elements till we reach an empty stream
# and after every deletion, check that the stream is sane enough
# to report the right number of elements with XRANGE: this will also
# force accessing the whole data structure to check sanity.
assert {[r xlen somestream] == $x}
# We want to remove elements in random order to really test the
# implementation in a better way.
set ids [lshuffle $ids]
foreach id $ids {
assert {[r xdel somestream $id] == 1}
incr x -1
assert {[r xlen somestream] == $x}
# The test would be too slow calling XRANGE for every iteration.
# Do it every 100 removal.
if {$x % 100 == 0} {
set res [r xrange somestream - +]
assert {[llength $res] == $x}
}
}
}
test {XRANGE fuzzing} {
set low_id [lindex $items 0 0]
set high_id [lindex $items end 0]

View File

@ -84,7 +84,7 @@ start_server {tags {"zset"}} {
set err
} {ERR*}
test "ZADD NX with non exisitng key" {
test "ZADD NX with non existing key" {
r del ztmp
r zadd ztmp nx 10 x 20 y 30 z
assert {[r zcard ztmp] == 3}

View File

@ -5,7 +5,7 @@ rehashing.c
Visually show buckets in the two hash tables between rehashings. Also stress
test getRandomKeys() implementation, that may actually disappear from
Redis soon, however visualizaiton some code is reusable in new bugs
Redis soon, however visualization some code is reusable in new bugs
investigation.
Compile with: