Sentinel: always send CONFIG REWRITE when changing instance role.

This change makes Sentinel less fragile about a number of failure modes.

This commit also fixes a different bug as a side effect, SLAVEOF command
was sent multiple times without incrementing the pending commands count.
This commit is contained in:
antirez 2013-11-05 17:23:11 +01:00
parent f7f97bf730
commit 97810c45e8

View File

@ -1520,8 +1520,7 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
(mstime() - ri->master->info_refresh) < SENTINEL_INFO_PERIOD*2) (mstime() - ri->master->info_refresh) < SENTINEL_INFO_PERIOD*2)
{ {
int retval; int retval;
retval = redisAsyncCommand(ri->cc, retval = sentinelSendSlaveOf(ri,
sentinelDiscardReplyCallback, NULL, "SLAVEOF %s %d",
ri->master->addr->ip, ri->master->addr->ip,
ri->master->addr->port); ri->master->addr->port);
if (retval == REDIS_OK) if (retval == REDIS_OK)
@ -2523,6 +2522,39 @@ char *sentinelGetObjectiveLeader(sentinelRedisInstance *master) {
return winner; return winner;
} }
/* Send SLAVEOF to the specified instance, always followed by a
* CONFIG REWRITE command in order to store the new configuration on disk
* when possible (that is, if the Redis instance is recent enough to support
* config rewriting, and if the server was started with a configuration file).
*
* If Host is NULL the function sends "SLAVEOF NO ONE".
*
* The command returns REDIS_OK if the SLAVEOF command was accepted for
* (later) delivery otherwise REDIS_ERR. The command replies are just
* discarded. */
int sentinelSendSlaveOf(sentinelRedisInstance *ri, char *host, int port) {
char portstr[32];
ll2string(portstr,sizeof(portstr),port);
if (host == NULL) {
host = "NO";
memcpy(portstr,"ONE",4);
}
retval = redisAsyncCommand(ri->cc,
sentinelDiscardReplyCallback, NULL, "SLAVEOF %s %s", host, portstr);
if (retval == REDIS_ERR) return retval;
ri->pending_commands++;
if (redisAsyncCommand(ri->cc,
sentinelDiscardReplyCallback, NULL, "CONFIG REWRITE") == REDIS_OK)
{
ri->pending_commands++;
}
return REDIS_OK;
}
/* Setup the master state to start a failover as a leader. /* Setup the master state to start a failover as a leader.
* *
* State can be either: * State can be either:
@ -2752,10 +2784,8 @@ void sentinelFailoverSendSlaveOfNoOne(sentinelRedisInstance *ri) {
* We actually register a generic callback for this command as we don't * We actually register a generic callback for this command as we don't
* really care about the reply. We check if it worked indirectly observing * really care about the reply. We check if it worked indirectly observing
* if INFO returns a different role (master instead of slave). */ * if INFO returns a different role (master instead of slave). */
retval = redisAsyncCommand(ri->promoted_slave->cc, retval = sentinelSendSlaveOf(ri->promoted_slave,NULL,0);
sentinelDiscardReplyCallback, NULL, "SLAVEOF NO ONE");
if (retval != REDIS_OK) return; if (retval != REDIS_OK) return;
ri->promoted_slave->pending_commands++;
sentinelEvent(REDIS_NOTICE, "+failover-state-wait-promotion", sentinelEvent(REDIS_NOTICE, "+failover-state-wait-promotion",
ri->promoted_slave,"%@"); ri->promoted_slave,"%@");
ri->failover_state = SENTINEL_FAILOVER_STATE_WAIT_PROMOTION; ri->failover_state = SENTINEL_FAILOVER_STATE_WAIT_PROMOTION;
@ -2825,10 +2855,6 @@ void sentinelFailoverDetectEnd(sentinelRedisInstance *master) {
if (timeout && (master->flags & SRI_I_AM_THE_LEADER)) { if (timeout && (master->flags & SRI_I_AM_THE_LEADER)) {
dictIterator *di; dictIterator *di;
dictEntry *de; dictEntry *de;
char master_port[32];
ll2string(master_port,sizeof(master_port),
master->promoted_slave->addr->port);
di = dictGetIterator(master->slaves); di = dictGetIterator(master->slaves);
while((de = dictNext(di)) != NULL) { while((de = dictNext(di)) != NULL) {
@ -2838,10 +2864,9 @@ void sentinelFailoverDetectEnd(sentinelRedisInstance *master) {
if (slave->flags & if (slave->flags &
(SRI_RECONF_DONE|SRI_RECONF_SENT|SRI_DISCONNECTED)) continue; (SRI_RECONF_DONE|SRI_RECONF_SENT|SRI_DISCONNECTED)) continue;
retval = redisAsyncCommand(slave->cc, retval = sentinelSendSlaveOf(slave,
sentinelDiscardReplyCallback, NULL, "SLAVEOF %s %s",
master->promoted_slave->addr->ip, master->promoted_slave->addr->ip,
master_port); master->promoted_slave->addr->port);
if (retval == REDIS_OK) { if (retval == REDIS_OK) {
sentinelEvent(REDIS_NOTICE,"+slave-reconf-sent-be",slave,"%@"); sentinelEvent(REDIS_NOTICE,"+slave-reconf-sent-be",slave,"%@");
slave->flags |= SRI_RECONF_SENT; slave->flags |= SRI_RECONF_SENT;
@ -2894,15 +2919,11 @@ void sentinelFailoverReconfNextSlave(sentinelRedisInstance *master) {
continue; continue;
/* Send SLAVEOF <new master>. */ /* Send SLAVEOF <new master>. */
ll2string(master_port,sizeof(master_port), retval = sentinelSendSlaveOf(slave,
master->promoted_slave->addr->port);
retval = redisAsyncCommand(slave->cc,
sentinelDiscardReplyCallback, NULL, "SLAVEOF %s %s",
master->promoted_slave->addr->ip, master->promoted_slave->addr->ip,
master_port); master->promoted_slave->addr->port);
if (retval == REDIS_OK) { if (retval == REDIS_OK) {
slave->flags |= SRI_RECONF_SENT; slave->flags |= SRI_RECONF_SENT;
slave->pending_commands++;
slave->slave_reconf_sent_time = mstime(); slave->slave_reconf_sent_time = mstime();
sentinelEvent(REDIS_NOTICE,"+slave-reconf-sent",slave,"%@"); sentinelEvent(REDIS_NOTICE,"+slave-reconf-sent",slave,"%@");
in_progress++; in_progress++;
@ -2984,13 +3005,11 @@ void sentinelFailoverStateMachine(sentinelRedisInstance *ri) {
* back to the master as well, sending a best effort SLAVEOF command. * back to the master as well, sending a best effort SLAVEOF command.
*/ */
void sentinelAbortFailover(sentinelRedisInstance *ri) { void sentinelAbortFailover(sentinelRedisInstance *ri) {
char master_port[32];
dictIterator *di; dictIterator *di;
dictEntry *de; dictEntry *de;
int sentinel_role; int sentinel_role;
redisAssert(ri->flags & SRI_FAILOVER_IN_PROGRESS); redisAssert(ri->flags & SRI_FAILOVER_IN_PROGRESS);
ll2string(master_port,sizeof(master_port),ri->addr->port);
/* Clear failover related flags from slaves. /* Clear failover related flags from slaves.
* Also if we are the leader make sure to send SLAVEOF commands to all the * Also if we are the leader make sure to send SLAVEOF commands to all the
@ -3006,10 +3025,7 @@ void sentinelAbortFailover(sentinelRedisInstance *ri) {
{ {
int retval; int retval;
retval = redisAsyncCommand(slave->cc, retval = sentinelSendSlaveOf(slave,ri->addr->ip,ri->addr->port);
sentinelDiscardReplyCallback, NULL, "SLAVEOF %s %s",
ri->addr->ip,
master_port);
if (retval == REDIS_OK) if (retval == REDIS_OK)
sentinelEvent(REDIS_NOTICE,"-slave-reconf-undo",slave,"%@"); sentinelEvent(REDIS_NOTICE,"-slave-reconf-undo",slave,"%@");
} }