From 12dcd2d7acc4ecbfec7eacfefc7ead225f8705af Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 13 Jan 2015 16:35:00 +0100 Subject: [PATCH 01/17] Redis 2.9.102 (3.0.0 Release Candidate 2). --- 00-RELEASENOTES | 32 ++++++++++++++++++++++++++++++++ src/version.h | 2 +- 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/00-RELEASENOTES b/00-RELEASENOTES index 1966716e..c855370a 100644 --- a/00-RELEASENOTES +++ b/00-RELEASENOTES @@ -12,6 +12,38 @@ HIGH: There is a critical bug that may affect a subset of users. Upgrade! CRITICAL: There is a critical bug affecting MOST USERS. Upgrade ASAP. -------------------------------------------------------------------------------- +--[ Redis 3.0.0 RC2 (version 2.9.102) ] Release date: 13 jan 2014 + +Upgrade urgency: LOW. + +This is the second release candidate of Redis Cluster. The major changes +are back porting of things implemented into the unstable branch while +this was still possible (with the new development model adopted only +bug fixes will be merged in the future). + +RC2 also fixes a few Redis Cluster non critical bugs. + +>> General changes + +* [FIX] A number of minor bug fixes. + +* [NEW] Diskless replication backportede. +* [NEW] Lua bitops and updated cmsgpack backported. +* [NEW] Transparent Huge Pages warnings and reporting backported. + +>> Cluster changes + +* [FIX] Fix PUBLISH cluster bus message count field. +* [FIX] It is no longer possible to write outside node hash slots using Lua. +* [FIX] Valgrind warnings (no actual bugs). +* [FIX] Less strict in acceptiong myself->ip if it's not populated. + +* [NEW] Better testing of Lua scripts. + +>> Sentinel changes + +No changes to Sentinel. + --[ Redis 3.0.0 RC1 (version 2.9.101) ] Release date: 9 oct 2014 This is the first release candidate of Redis Cluster. diff --git a/src/version.h b/src/version.h index 9928573c..05576be0 100644 --- a/src/version.h +++ b/src/version.h @@ -1 +1 @@ -#define REDIS_VERSION "2.9.101" +#define REDIS_VERSION "2.9.102" From 6fe6cbd0ef4ea025c7c5adf23b17a4dc75f9ebc6 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 13 Jan 2015 18:09:40 +0100 Subject: [PATCH 02/17] RC2 release date fixed in Changelog. --- 00-RELEASENOTES | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/00-RELEASENOTES b/00-RELEASENOTES index c855370a..fac2b1e3 100644 --- a/00-RELEASENOTES +++ b/00-RELEASENOTES @@ -12,7 +12,7 @@ HIGH: There is a critical bug that may affect a subset of users. Upgrade! CRITICAL: There is a critical bug affecting MOST USERS. Upgrade ASAP. -------------------------------------------------------------------------------- ---[ Redis 3.0.0 RC2 (version 2.9.102) ] Release date: 13 jan 2014 +--[ Redis 3.0.0 RC2 (version 2.9.102) ] Release date: 13 jan 2015 Upgrade urgency: LOW. From 23cc0a3f1c3a72abce4214e9a66ad2398557f57b Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 21 Jan 2015 16:39:38 +0100 Subject: [PATCH 03/17] AOF rewrite: set iterator var to NULL when freed. The cleanup code expects that if 'di' is not NULL, it is a valid iterator that should be freed. The result of this bug was a crash of the AOF rewriting process if an error occurred after the DBs data are written and the iterator is no longer valid. --- src/aof.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/aof.c b/src/aof.c index 0af519bf..c6b84040 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1105,6 +1105,7 @@ int rewriteAppendOnlyFile(char *filename) { } } dictReleaseIterator(di); + di = NULL; } /* Do an initial slow fsync here while the parent is still sending From 83a6cc339862e6573f7f8a39b22f75ec17da160d Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 20 Jan 2015 18:01:28 +0100 Subject: [PATCH 04/17] Panic on recursive calls to luaRedisGenericCommand(). Related to issue #2302. --- src/scripting.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/scripting.c b/src/scripting.c index b6a333a4..4b6f47fd 100644 --- a/src/scripting.c +++ b/src/scripting.c @@ -214,11 +214,22 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) { static int argv_size = 0; static robj *cached_objects[LUA_CMD_OBJCACHE_SIZE]; static size_t cached_objects_len[LUA_CMD_OBJCACHE_SIZE]; + static int inuse = 0; /* Recursive calls detection. */ + + /* By using Lua debug hooks it is possible to trigger a recursive call + * to luaRedisGenericCommand(), which normally should never happen. + * To make this function reentrant is futile and makes it slower, but + * we should at least detect such a misuse, and abort. */ + if (inuse) { + redisPanic("luaRedisGenericCommand() recursive call detected. Are you doing funny stuff with Lua debug hooks?"); + } + inuse++; /* Require at least one argument */ if (argc == 0) { luaPushError(lua, "Please specify at least one argument for redis.call()"); + inuse--; return 1; } @@ -273,6 +284,7 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) { } luaPushError(lua, "Lua redis() command arguments must be strings or integers"); + inuse--; return 1; } @@ -426,8 +438,10 @@ cleanup: * return the plain error. */ lua_pushstring(lua,"err"); lua_gettable(lua,-2); + inuse--; return lua_error(lua); } + inuse--; return 1; } From 9b92edddfa9540c3192cbac03053fb8c623ecbae Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 20 Jan 2015 23:13:47 +0100 Subject: [PATCH 05/17] luaRedisGenericCommand() recursion: just return an error. Instead of calling redisPanic() to abort the server. Related to issue #2302. --- src/scripting.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/scripting.c b/src/scripting.c index 4b6f47fd..313bc218 100644 --- a/src/scripting.c +++ b/src/scripting.c @@ -221,7 +221,9 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) { * To make this function reentrant is futile and makes it slower, but * we should at least detect such a misuse, and abort. */ if (inuse) { - redisPanic("luaRedisGenericCommand() recursive call detected. Are you doing funny stuff with Lua debug hooks?"); + luaPushError(lua, + "luaRedisGenericCommand() recursive call detected. Are you doing funny stuff with Lua debug hooks?"); + return 1; } inuse++; From 26698af37dfe7352ad66bdeec5799e333647d67a Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 20 Jan 2015 23:20:12 +0100 Subject: [PATCH 06/17] luaRedisGenericCommand(): log error at WARNING level when re-entered. Rationale is that when re-entering, it is likely due to Lua debugging hooks. Returning an error will be ignored in most cases, going totally unnoticed. With the log at least we leave a trace. Related to issue #2302. --- src/scripting.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/scripting.c b/src/scripting.c index 313bc218..c5dd4e71 100644 --- a/src/scripting.c +++ b/src/scripting.c @@ -221,8 +221,11 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) { * To make this function reentrant is futile and makes it slower, but * we should at least detect such a misuse, and abort. */ if (inuse) { - luaPushError(lua, - "luaRedisGenericCommand() recursive call detected. Are you doing funny stuff with Lua debug hooks?"); + char *recursion_warning = + "luaRedisGenericCommand() recursive call detected. " + "Are you doing funny stuff with Lua debug hooks?"; + redisLog(REDIS_WARNING,"%s",recursion_warning); + luaPushError(lua,recursion_warning); return 1; } inuse++; From fa470fe78267d2c993e85ce56a59e84c76257231 Mon Sep 17 00:00:00 2001 From: Matt Stancliff Date: Fri, 9 Jan 2015 17:43:48 -0500 Subject: [PATCH 07/17] Tell sentinel/cluster tests to allow valgrind --- tests/instances.tcl | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/tests/instances.tcl b/tests/instances.tcl index 426508f3..b9eb4225 100644 --- a/tests/instances.tcl +++ b/tests/instances.tcl @@ -16,6 +16,7 @@ source ../support/server.tcl source ../support/test.tcl set ::verbose 0 +set ::valgrind 0 set ::pause_on_error 0 set ::simulate_error 0 set ::sentinel_instances {} @@ -65,7 +66,13 @@ proc spawn_instance {type base_port count {conf {}}} { } else { error "Unknown instance type." } - set pid [exec ../../../src/${prgname} $cfgfile &] + + if {$::valgrind} { + set pid [exec valgrind --suppressions=../../../src/valgrind.sup --show-reachable=no --show-possibly-lost=no --leak-check=full ../../../src/${prgname} $cfgfile &] + } else { + set pid [exec ../../../src/${prgname} $cfgfile &] + } + lappend ::pids $pid # Check availability @@ -113,6 +120,8 @@ proc parse_options {} { set ::pause_on_error 1 } elseif {$opt eq "--fail"} { set ::simulate_error 1 + } elseif {$opt eq {--valgrind}} { + set ::valgrind 1 } elseif {$opt eq "--help"} { puts "Hello, I'm sentinel.tcl and I run Sentinel unit tests." puts "\nOptions:" @@ -390,7 +399,13 @@ proc restart_instance {type id} { } else { set prgname redis-sentinel } - set pid [exec ../../../src/${prgname} $cfgfile &] + + if {$::valgrind} { + set pid [exec valgrind --suppressions=../../../src/valgrind.sup --show-reachable=no --show-possibly-lost=no --leak-check=full ../../../src/${prgname} $cfgfile &] + } else { + set pid [exec ../../../src/${prgname} $cfgfile &] + } + set_instance_attrib $type $id pid $pid lappend ::pids $pid From 98f56f8f9ea8655e31797fbffb7f5419070a1d5f Mon Sep 17 00:00:00 2001 From: Matt Stancliff Date: Tue, 13 Jan 2015 11:15:30 -0500 Subject: [PATCH 08/17] Add --track-origins=yes to valgrind --- tests/instances.tcl | 4 ++-- tests/support/server.tcl | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/instances.tcl b/tests/instances.tcl index b9eb4225..4e2f33df 100644 --- a/tests/instances.tcl +++ b/tests/instances.tcl @@ -68,7 +68,7 @@ proc spawn_instance {type base_port count {conf {}}} { } if {$::valgrind} { - set pid [exec valgrind --suppressions=../../../src/valgrind.sup --show-reachable=no --show-possibly-lost=no --leak-check=full ../../../src/${prgname} $cfgfile &] + set pid [exec valgrind --track-origins=yes --suppressions=../../../src/valgrind.sup --show-reachable=no --show-possibly-lost=no --leak-check=full ../../../src/${prgname} $cfgfile &] } else { set pid [exec ../../../src/${prgname} $cfgfile &] } @@ -401,7 +401,7 @@ proc restart_instance {type id} { } if {$::valgrind} { - set pid [exec valgrind --suppressions=../../../src/valgrind.sup --show-reachable=no --show-possibly-lost=no --leak-check=full ../../../src/${prgname} $cfgfile &] + set pid [exec valgrind --track-origins=yes --suppressions=../../../src/valgrind.sup --show-reachable=no --show-possibly-lost=no --leak-check=full ../../../src/${prgname} $cfgfile &] } else { set pid [exec ../../../src/${prgname} $cfgfile &] } diff --git a/tests/support/server.tcl b/tests/support/server.tcl index 67ee2452..317b40a8 100644 --- a/tests/support/server.tcl +++ b/tests/support/server.tcl @@ -207,7 +207,7 @@ proc start_server {options {code undefined}} { set stderr [format "%s/%s" [dict get $config "dir"] "stderr"] if {$::valgrind} { - set pid [exec valgrind --suppressions=src/valgrind.sup --show-reachable=no --show-possibly-lost=no --leak-check=full src/redis-server $config_file > $stdout 2> $stderr &] + set pid [exec valgrind --track-origins=yes --suppressions=src/valgrind.sup --show-reachable=no --show-possibly-lost=no --leak-check=full src/redis-server $config_file > $stdout 2> $stderr &] } else { set pid [exec src/redis-server $config_file > $stdout 2> $stderr &] } From 5130c2536b66402ca17b582d52960e11eac7dde7 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 21 Jan 2015 15:55:53 +0100 Subject: [PATCH 09/17] Cluster: set the slaves->slaveof filed to NULL when master is freed. Related to issue #2289. --- src/cluster.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/cluster.c b/src/cluster.c index c3cf0602..84bf4fa8 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -821,6 +821,14 @@ int clusterCountNonFailingSlaves(clusterNode *n) { void freeClusterNode(clusterNode *n) { sds nodename; + int j; + + /* If the node is a master with associated slaves, we have to set + * all the slaves->slaveof fields to NULL (unknown). */ + if (nodeIsMaster(n)) { + for (j = 0; j < n->numslaves; j++) + n->slaves[j]->slaveof = NULL; + } nodename = sdsnewlen(n->name, REDIS_CLUSTER_NAMELEN); redisAssert(dictDelete(server.cluster->nodes,nodename) == DICT_OK); From 0a3edcbe51d02c59b0b29759a7d1671441c8abd3 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 21 Jan 2015 16:03:43 +0100 Subject: [PATCH 10/17] Cluster: node deletion cleanup / centralization. --- src/cluster.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/src/cluster.c b/src/cluster.c index 84bf4fa8..d6ec2734 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -819,6 +819,7 @@ int clusterCountNonFailingSlaves(clusterNode *n) { return okslaves; } +/* Low level cleanup of the node structure. Only called by clusterDelNode(). */ void freeClusterNode(clusterNode *n) { sds nodename; int j; @@ -830,10 +831,15 @@ void freeClusterNode(clusterNode *n) { n->slaves[j]->slaveof = NULL; } + /* Remove this node from the list of slaves of its master. */ + if (nodeIsSlave(n) && n->slaveof) clusterNodeRemoveSlave(n->slaveof,n); + + /* Unlink from the set of nodes. */ nodename = sdsnewlen(n->name, REDIS_CLUSTER_NAMELEN); redisAssert(dictDelete(server.cluster->nodes,nodename) == DICT_OK); sdsfree(nodename); - if (n->slaveof) clusterNodeRemoveSlave(n->slaveof, n); + + /* Release link and associated data structures. */ if (n->link) freeClusterLink(n->link); listRelease(n->fail_reports); zfree(n); @@ -848,11 +854,16 @@ int clusterAddNode(clusterNode *node) { return (retval == DICT_OK) ? REDIS_OK : REDIS_ERR; } -/* Remove a node from the cluster: - * 1) Mark all the nodes handled by it as unassigned. - * 2) Remove all the failure reports sent by this node. - * 3) Free the node, that will in turn remove it from the hash table - * and from the list of slaves of its master, if it is a slave node. +/* Remove a node from the cluster. The functio performs the high level + * cleanup, calling freeClusterNode() for the low level cleanup. + * Here we do the following: + * + * 1) Mark all the slots handled by it as unassigned. + * 2) Remove all the failure reports sent by this node and referenced by + * other nodes. + * 3) Free the node with freeClusterNode() that will in turn remove it + * from the hash table and from the list of slaves of its master, if + * it is a slave node. */ void clusterDelNode(clusterNode *delnode) { int j; @@ -879,11 +890,7 @@ void clusterDelNode(clusterNode *delnode) { } dictReleaseIterator(di); - /* 3) Remove this node from its master's slaves if needed. */ - if (nodeIsSlave(delnode) && delnode->slaveof) - clusterNodeRemoveSlave(delnode->slaveof,delnode); - - /* 4) Free the node, unlinking it from the cluster. */ + /* 3) Free the node, unlinking it from the cluster. */ freeClusterNode(delnode); } @@ -1619,7 +1626,7 @@ int clusterProcessPacket(clusterLink *link) { } /* Free this node as we already have it. This will * cause the link to be freed as well. */ - freeClusterNode(link->node); + clusterDelNode(link->node); return 0; } @@ -2913,7 +2920,7 @@ void clusterCron(void) { /* A Node in HANDSHAKE state has a limited lifespan equal to the * configured node timeout. */ if (nodeInHandshake(node) && now - node->ctime > handshake_timeout) { - freeClusterNode(node); + clusterDelNode(node); continue; } From 4688f60b4b78860b1390899ec63e42ac492dd7aa Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 21 Jan 2015 16:13:30 +0100 Subject: [PATCH 11/17] Cluster/Sentinel test: pause on exceptions as well. --- tests/cluster/run.tcl | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/cluster/run.tcl b/tests/cluster/run.tcl index 69a160c4..f764cea0 100644 --- a/tests/cluster/run.tcl +++ b/tests/cluster/run.tcl @@ -21,6 +21,7 @@ proc main {} { if {[catch main e]} { puts $::errorInfo + if {$::pause_on_error} pause_on_error cleanup exit 1 } From 707d97ac4fbea09b1e136808dac4803233fc3c53 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 21 Jan 2015 16:18:34 +0100 Subject: [PATCH 12/17] Cluster/Sentinel test: also pause on abort_sentinel_test call. --- tests/instances.tcl | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/instances.tcl b/tests/instances.tcl index 4e2f33df..a68b79d1 100644 --- a/tests/instances.tcl +++ b/tests/instances.tcl @@ -105,6 +105,7 @@ proc cleanup {} { proc abort_sentinel_test msg { puts "WARNING: Aborting the test." puts ">>>>>>>> $msg" + if {$::pause_on_error} pause_on_error cleanup exit 1 } From c8f903747767eab362b3123a5c0cea6abfb1b5f3 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 21 Jan 2015 16:46:51 +0100 Subject: [PATCH 13/17] Cluster test: wait for port to unbound in kill_instance. Otherwise kill_instance + restart_instance in short succession will still find the port busy and will fail. --- tests/instances.tcl | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/instances.tcl b/tests/instances.tcl index a68b79d1..7d87cdf5 100644 --- a/tests/instances.tcl +++ b/tests/instances.tcl @@ -370,15 +370,31 @@ proc get_instance_id_by_port {type port} { # The instance can be restarted with restart-instance. proc kill_instance {type id} { set pid [get_instance_attrib $type $id pid] + set port [get_instance_attrib $type $id port] + if {$pid == -1} { error "You tried to kill $type $id twice." } + exec kill -9 $pid set_instance_attrib $type $id pid -1 set_instance_attrib $type $id link you_tried_to_talk_with_killed_instance # Remove the PID from the list of pids to kill at exit. set ::pids [lsearch -all -inline -not -exact $::pids $pid] + + # Wait for the port it was using to be available again, so that's not + # an issue to start a new server ASAP with the same port. + set retry 10 + while {[incr retry -1]} { + set port_is_free [catch {set s [socket 127.0.01 $port]}] + if {$port_is_free} break + catch {close $s} + after 1000 + } + if {$retry == 0} { + error "Port $port does not return available after killing instance." + } } # Return true of the instance of the specified type/id is killed. From 4a36350d9f97dd579f44820724aabfc56e88a57a Mon Sep 17 00:00:00 2001 From: Matt Stancliff Date: Wed, 14 Jan 2015 11:21:50 -0500 Subject: [PATCH 14/17] Fix sending uninitialized bytes Fixes valgrind error: Syscall param write(buf) points to uninitialised byte(s) at 0x514C35D: ??? (syscall-template.S:81) by 0x456B81: clusterWriteHandler (cluster.c:1907) by 0x41D596: aeProcessEvents (ae.c:416) by 0x41D8EA: aeMain (ae.c:455) by 0x41A84B: main (redis.c:3832) Address 0x5f268e2 is 2,274 bytes inside a block of size 8,192 alloc'd at 0x4932D1: je_realloc (jemalloc.c:1297) by 0x428185: zrealloc (zmalloc.c:162) by 0x4269E0: sdsMakeRoomFor.part.0 (sds.c:142) by 0x426CD7: sdscatlen (sds.c:251) by 0x4579E7: clusterSendMessage (cluster.c:1995) by 0x45805A: clusterSendPing (cluster.c:2140) by 0x45BB03: clusterCron (cluster.c:2944) by 0x423344: serverCron (redis.c:1239) by 0x41D6CD: aeProcessEvents (ae.c:311) by 0x41D8EA: aeMain (ae.c:455) by 0x41A84B: main (redis.c:3832) Uninitialised value was created by a stack allocation at 0x457810: nodeUpdateAddressIfNeeded (cluster.c:1236) --- src/cluster.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cluster.c b/src/cluster.c index d6ec2734..187fdfd8 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -1249,7 +1249,7 @@ void nodeIp2String(char *buf, clusterLink *link) { * The function returns 0 if the node address is still the same, * otherwise 1 is returned. */ int nodeUpdateAddressIfNeeded(clusterNode *node, clusterLink *link, int port) { - char ip[REDIS_IP_STR_LEN]; + char ip[REDIS_IP_STR_LEN] = {0}; /* We don't proceed if the link is the same as the sender link, as this * function is designed to see if the node link is consistent with the From 97ffeb7c091bb5a2195473fdff5a65606666b14e Mon Sep 17 00:00:00 2001 From: Matt Stancliff Date: Wed, 14 Jan 2015 11:31:17 -0500 Subject: [PATCH 15/17] Fix cluster reset memory leak [maybe] Fixes valgrind errors: 32 bytes in 4 blocks are definitely lost in loss record 107 of 228 at 0x80EA447: je_malloc (jemalloc.c:944) by 0x806E59C: zrealloc (zmalloc.c:125) by 0x80A9AFC: clusterSetMaster (cluster.c:801) by 0x80AEDC9: clusterCommand (cluster.c:3994) by 0x80682A5: call (redis.c:2049) by 0x8068A20: processCommand (redis.c:2309) by 0x8076497: processInputBuffer (networking.c:1143) by 0x8073BAF: readQueryFromClient (networking.c:1208) by 0x8060E98: aeProcessEvents (ae.c:412) by 0x806123B: aeMain (ae.c:455) by 0x806C3DB: main (redis.c:3832) 64 bytes in 8 blocks are definitely lost in loss record 143 of 228 at 0x80EA447: je_malloc (jemalloc.c:944) by 0x806E59C: zrealloc (zmalloc.c:125) by 0x80AAB40: clusterProcessPacket (cluster.c:801) by 0x80A847F: clusterReadHandler (cluster.c:1975) by 0x30000FF: ??? 80 bytes in 10 blocks are definitely lost in loss record 148 of 228 at 0x80EA447: je_malloc (jemalloc.c:944) by 0x806E59C: zrealloc (zmalloc.c:125) by 0x80AAB40: clusterProcessPacket (cluster.c:801) by 0x80A847F: clusterReadHandler (cluster.c:1975) by 0x2FFFFFF: ??? --- src/cluster.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cluster.c b/src/cluster.c index 187fdfd8..54663c49 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -842,6 +842,7 @@ void freeClusterNode(clusterNode *n) { /* Release link and associated data structures. */ if (n->link) freeClusterLink(n->link); listRelease(n->fail_reports); + zfree(n->slaves); zfree(n); } From 98faed3a3f57239e69460f3396d072418893dd43 Mon Sep 17 00:00:00 2001 From: Matt Stancliff Date: Wed, 14 Jan 2015 11:10:25 -0500 Subject: [PATCH 16/17] Fix potential invalid read past end of array If array has N elements, we can't read +1 if we are already at N. Also, we need to move elements by their storage size in the array, not just by individual bytes. --- src/cluster.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/cluster.c b/src/cluster.c index 54663c49..7e0b41b6 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -783,8 +783,11 @@ int clusterNodeRemoveSlave(clusterNode *master, clusterNode *slave) { for (j = 0; j < master->numslaves; j++) { if (master->slaves[j] == slave) { - memmove(master->slaves+j,master->slaves+(j+1), - (master->numslaves-1)-j); + if ((j+1) < master->numslaves) { + int remaining_slaves = (master->numslaves - j) - 1; + memmove(master->slaves+j,master->slaves+(j+1), + (sizeof(*master->slaves) * remaining_slaves)); + } master->numslaves--; return REDIS_OK; } From ebb07a0b487641b8a208a836dee130a808ab19ce Mon Sep 17 00:00:00 2001 From: Matt Stancliff Date: Thu, 15 Jan 2015 14:20:59 -0500 Subject: [PATCH 17/17] Fix cluster migrate memory leak Fixes valgrind error: 48 bytes in 1 blocks are definitely lost in loss record 196 of 373 at 0x4910D3: je_malloc (jemalloc.c:944) by 0x42807D: zmalloc (zmalloc.c:125) by 0x41FA0D: dictGetIterator (dict.c:543) by 0x41FA48: dictGetSafeIterator (dict.c:555) by 0x459B73: clusterHandleSlaveMigration (cluster.c:2776) by 0x45BF27: clusterCron (cluster.c:3123) by 0x423344: serverCron (redis.c:1239) by 0x41D6CD: aeProcessEvents (ae.c:311) by 0x41D8EA: aeMain (ae.c:455) by 0x41A84B: main (redis.c:3832) --- src/cluster.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cluster.c b/src/cluster.c index 7e0b41b6..ce544970 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -2803,6 +2803,7 @@ void clusterHandleSlaveMigration(int max_slaves) { } } } + dictReleaseIterator(di); /* Step 4: perform the migration if there is a target, and if I'm the * candidate. */