mirror of
https://github.com/fluencelabs/redis
synced 2025-06-13 09:11:20 +00:00
Cluster: ability to prevent slaves from failing over their masters.
This commit, in some parts derived from PR #3041 which is no longer possible to merge (because the user deleted the original branch), implements the ability of slaves to have a special configuration preventing that they try to start a failover when the master is failing. There are multiple reasons for wanting this, and the feautre was requested in issue #3021 time ago. The differences between this patch and the original PR are the following: 1. The flag is saved/loaded on the nodes configuration. 2. The 'myself' node is now flag-aware, the flag is updated as needed when the configuration is changed via CONFIG SET. 3. The flag name uses NOFAILOVER instead of NO_FAILOVER to be consistent with existing NOADDR. 4. The redis.conf documentation was rewritten. Thanks to @deep011 for the original patch.
This commit is contained in:
@ -201,6 +201,8 @@ int clusterLoadConfig(char *filename) {
|
||||
n->flags |= CLUSTER_NODE_HANDSHAKE;
|
||||
} else if (!strcasecmp(s,"noaddr")) {
|
||||
n->flags |= CLUSTER_NODE_NOADDR;
|
||||
} else if (!strcasecmp(s,"nofailover")) {
|
||||
n->flags |= CLUSTER_NODE_NOFAILOVER;
|
||||
} else if (!strcasecmp(s,"noflags")) {
|
||||
/* nothing to do */
|
||||
} else {
|
||||
@ -407,6 +409,22 @@ int clusterLockConfig(char *filename) {
|
||||
return C_OK;
|
||||
}
|
||||
|
||||
/* Some flags (currently just the NOFAILOVER flag) may need to be updated
|
||||
* in the "myself" node based on the current configuration of the node,
|
||||
* that may change at runtime via CONFIG SET. This function changes the
|
||||
* set of flags in myself->flags accordingly. */
|
||||
void clusterUpdateMyselfFlags(void) {
|
||||
int oldflags = myself->flags;
|
||||
int nofailover = server.cluster_slave_no_failover ?
|
||||
CLUSTER_NODE_NOFAILOVER : 0;
|
||||
myself->flags &= ~CLUSTER_NODE_NOFAILOVER;
|
||||
myself->flags |= nofailover;
|
||||
if (myself->flags != oldflags) {
|
||||
clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
|
||||
CLUSTER_TODO_UPDATE_STATE);
|
||||
}
|
||||
}
|
||||
|
||||
void clusterInit(void) {
|
||||
int saveconf = 0;
|
||||
|
||||
@ -497,6 +515,7 @@ void clusterInit(void) {
|
||||
|
||||
server.cluster->mf_end = 0;
|
||||
resetManualFailover();
|
||||
clusterUpdateMyselfFlags();
|
||||
}
|
||||
|
||||
/* Reset a node performing a soft or hard reset:
|
||||
@ -1808,6 +1827,18 @@ int clusterProcessPacket(clusterLink *link) {
|
||||
}
|
||||
}
|
||||
|
||||
/* Copy the CLUSTER_NODE_NOFAILOVER flag from what the sender
|
||||
* announced. This is a dynamic flag that we receive from the
|
||||
* sender, and the latest status must be trusted. We need it to
|
||||
* be propagated because the slave ranking used to understand the
|
||||
* delay of each slave in the voting process, needs to know
|
||||
* what are the instances really competing. */
|
||||
if (sender) {
|
||||
int nofailover = flags & CLUSTER_NODE_NOFAILOVER;
|
||||
sender->flags &= ~CLUSTER_NODE_NOFAILOVER;
|
||||
sender->flags |= nofailover;
|
||||
}
|
||||
|
||||
/* Update the node address if it changed. */
|
||||
if (sender && type == CLUSTERMSG_TYPE_PING &&
|
||||
!nodeInHandshake(sender) &&
|
||||
@ -2723,6 +2754,7 @@ int clusterGetSlaveRank(void) {
|
||||
myoffset = replicationGetSlaveOffset();
|
||||
for (j = 0; j < master->numslaves; j++)
|
||||
if (master->slaves[j] != myself &&
|
||||
!nodeCantFailover(master->slaves[j]) &&
|
||||
master->slaves[j]->repl_offset > myoffset) rank++;
|
||||
return rank;
|
||||
}
|
||||
@ -2860,10 +2892,13 @@ void clusterHandleSlaveFailover(void) {
|
||||
* of an automatic or manual failover:
|
||||
* 1) We are a slave.
|
||||
* 2) Our master is flagged as FAIL, or this is a manual failover.
|
||||
* 3) It is serving slots. */
|
||||
* 3) We don't have the no failover configuration set, and this is
|
||||
* not a manual failover.
|
||||
* 4) It is serving slots. */
|
||||
if (nodeIsMaster(myself) ||
|
||||
myself->slaveof == NULL ||
|
||||
(!nodeFailed(myself->slaveof) && !manual_failover) ||
|
||||
(server.cluster_slave_no_failover && !manual_failover) ||
|
||||
myself->slaveof->numslots == 0)
|
||||
{
|
||||
/* There are no reasons to failover, so we set the reason why we
|
||||
@ -3239,6 +3274,9 @@ void clusterCron(void) {
|
||||
handshake_timeout = server.cluster_node_timeout;
|
||||
if (handshake_timeout < 1000) handshake_timeout = 1000;
|
||||
|
||||
/* Update myself flags. */
|
||||
clusterUpdateMyselfFlags();
|
||||
|
||||
/* Check if we have disconnected nodes and re-establish the connection.
|
||||
* Also update a few stats while we are here, that can be used to make
|
||||
* better decisions in other part of the code. */
|
||||
@ -3837,7 +3875,8 @@ static struct redisNodeFlags redisNodeFlagsTable[] = {
|
||||
{CLUSTER_NODE_PFAIL, "fail?,"},
|
||||
{CLUSTER_NODE_FAIL, "fail,"},
|
||||
{CLUSTER_NODE_HANDSHAKE, "handshake,"},
|
||||
{CLUSTER_NODE_NOADDR, "noaddr,"}
|
||||
{CLUSTER_NODE_NOADDR, "noaddr,"},
|
||||
{CLUSTER_NODE_NOFAILOVER, "nofailover,"}
|
||||
};
|
||||
|
||||
/* Concatenate the comma separated list of node flags to the given SDS
|
||||
|
Reference in New Issue
Block a user