diff --git a/src/cluster.c b/src/cluster.c index 66fc301c..bbad47bd 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -40,6 +40,7 @@ #include #include #include +#include /* A global reference to myself is handy to make code more clear. * Myself always points to server.cluster->myself, that is, the clusterNode @@ -2136,8 +2137,9 @@ void clusterSendPing(clusterLink *link, int type) { * Since we have non-voting slaves that lower the probability of an entry * to feature our node, we set the number of entires per packet as * 10% of the total nodes we have. */ - wanted = freshnodes/10; + wanted = floor(dictSize(server.cluster->nodes)/10); if (wanted < 3) wanted = 3; + if (wanted > freshnodes) wanted = freshnodes; /* Compute the maxium totlen to allocate our buffer. We'll fix the totlen * later according to the number of gossip sections we really were able @@ -2156,7 +2158,7 @@ void clusterSendPing(clusterLink *link, int type) { clusterBuildMessageHdr(hdr,type); /* Populate the gossip fields */ - int maxiterations = wanted+10; + int maxiterations = wanted*3; while(freshnodes > 0 && gossipcount < wanted && maxiterations--) { dictEntry *de = dictGetRandomKey(server.cluster->nodes); clusterNode *this = dictGetVal(de); @@ -2167,6 +2169,11 @@ void clusterSendPing(clusterLink *link, int type) { * already, so we just gossip about other nodes. */ if (this == myself) continue; + /* Give a bias to FAIL/PFAIL nodes. */ + if (maxiterations > wanted*2 && + !(this->flags & (REDIS_NODE_PFAIL|REDIS_NODE_FAIL))) + continue; + /* In the gossip section don't include: * 1) Nodes in HANDSHAKE state. * 3) Nodes with the NOADDR flag set. diff --git a/utils/cluster_fail_time.tcl b/utils/cluster_fail_time.tcl new file mode 100644 index 00000000..87399495 --- /dev/null +++ b/utils/cluster_fail_time.tcl @@ -0,0 +1,50 @@ +# This simple script is used in order to estimate the average PFAIL->FAIL +# state switch after a failure. + +set ::sleep_time 10 ; # How much to sleep to trigger PFAIL. +set ::fail_port 30016 ; # Node to put in sleep. +set ::other_port 30001 ; # Node to use to monitor the flag switch. + +proc avg vector { + set sum 0.0 + foreach x $vector { + set sum [expr {$sum+$x}] + } + expr {$sum/[llength $vector]} +} + +set samples {} +while 1 { + exec redis-cli -p $::fail_port debug sleep $::sleep_time > /dev/null & + + # Wait for fail? to appear. + while 1 { + set output [exec redis-cli -p $::other_port cluster nodes] + if {[string match {*fail\?*} $output]} break + after 100 + } + + puts "FAIL?" + set start [clock milliseconds] + + # Wait for fail? to disappear. + while 1 { + set output [exec redis-cli -p $::other_port cluster nodes] + if {![string match {*fail\?*} $output]} break + after 100 + } + + puts "FAIL" + set now [clock milliseconds] + set elapsed [expr {$now-$start}] + puts $elapsed + lappend samples $elapsed + + puts "AVG([llength $samples]): [avg $samples]" + + # Wait for the instance to be available again. + exec redis-cli -p $::fail_port ping + + # Wait for the fail flag to be cleared. + after 2000 +} diff --git a/utils/create-cluster/README b/utils/create-cluster/README index f3a3f088..1f43748e 100644 --- a/utils/create-cluster/README +++ b/utils/create-cluster/README @@ -24,4 +24,4 @@ In order to stop a cluster: 1. Use "./craete-cluster stop" to stop all the instances. After you stopped the instances you can use "./create-cluster start" to restart them if you change ideas. 2. Use "./create-cluster clean" to remove all the AOF / log files to restat with a clean environment. -It is currently hardcoded that you start a cluster where each master has one slave, since the script is pretty basic. +Use the command "./create-cluster help" to get the full list of features. diff --git a/utils/create-cluster/create-cluster b/utils/create-cluster/create-cluster index 76f61091..efb3135d 100755 --- a/utils/create-cluster/create-cluster +++ b/utils/create-cluster/create-cluster @@ -1,8 +1,21 @@ #!/bin/bash +# Settings PORT=30000 -ENDPORT=30006 -TIMEOUT=15000 +TIMEOUT=2000 +NODES=6 +REPLICAS=1 + +# You may want to put the above config parameters into config.sh in order to +# override the defaults without modifying this script. + +if [ -a config.sh ] +then + source "config.sh" +fi + +# Computed vars +ENDPORT=$((PORT+NODES)) if [ "$1" == "start" ] then @@ -21,7 +34,7 @@ then PORT=$((PORT+1)) HOSTS="$HOSTS 127.0.0.1:$PORT" done - ../../src/redis-trib.rb create --replicas 1 $HOSTS + ../../src/redis-trib.rb create --replicas $REPLICAS $HOSTS exit 0 fi @@ -35,22 +48,31 @@ then exit 0 fi -if [ "$1" == "join" ] +if [ "$1" == "watch" ] +then + PORT=$((PORT+1)) + while [ 1 ]; do + clear + date + redis-cli -p $PORT cluster nodes | head -30 + sleep 1 + done + exit 0 +fi + +if [ "$1" == "tail" ] +then + INSTANCE=$2 + PORT=$((PORT+INSTANCE)) + tail -f ${PORT}.log + exit 0 +fi + +if [ "$1" == "call" ] then while [ $((PORT < ENDPORT)) != "0" ]; do PORT=$((PORT+1)) - echo "Joining $PORT" - redis-cli -p $PORT CLUSTER MEET 127.0.0.1 10002 - done - - echo "Waiting 5 seconds" - sleep 5 - - PORT=30000 - while [ $((PORT < ENDPORT)) != "0" ]; do - PORT=$((PORT+1)) - echo "Replicate $PORT" - redis-cli -p $PORT CLUSTER REPLICATE $2 + ../../src/redis-cli -p $PORT $2 $3 $4 $5 $6 $7 $8 $9 done exit 0 fi @@ -64,4 +86,10 @@ then exit 0 fi -echo "Usage: $0 [start|create|stop|join|clean]" +echo "Usage: $0 [start|create|stop|watch|tail|clean]" +echo "start -- Launch Redis Cluster instances." +echo "create -- Create a cluster using redis-trib create." +echo "stop -- Stop Redis Cluster instances." +echo "watch -- Show CLUSTER NODES output (first 30 lines) of first node." +echo "tail -- Run tail -f of instance at base port + ID." +echo "clean -- Remove all instances data, logs, configs."