From 308940aa2c22c60e62d4ae906ef3092079b92b14 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 27 May 2013 11:17:17 +0200 Subject: [PATCH] Close connection with timedout slaves. Now masters, using the time at which the last REPLCONF ACK was received, are able to explicitly disconnect slaves that are no longer responding. Previously the only chance was to see a very long output buffer, that was highly suboptimal. --- src/replication.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/replication.c b/src/replication.c index 348428c5..a028359a 100644 --- a/src/replication.c +++ b/src/replication.c @@ -424,6 +424,7 @@ int masterTryPartialResynchronization(redisClient *c) { * 3) Send the backlog data (from the offset to the end) to the slave. */ c->flags |= REDIS_SLAVE; c->replstate = REDIS_REPL_ONLINE; + c->repl_ack_time = server.unixtime; listAddNodeTail(server.slaves,c); /* We can't use the connection buffers since they are used to accumulate * new commands at this stage. But we are sure the socket send buffer is @@ -655,6 +656,7 @@ void sendBulkToSlave(aeEventLoop *el, int fd, void *privdata, int mask) { slave->repldbfd = -1; aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE); slave->replstate = REDIS_REPL_ONLINE; + slave->repl_ack_time = server.unixtime; if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE, sendReplyToClient, slave) == AE_ERR) { freeClient(slave); @@ -1457,6 +1459,31 @@ void replicationCron(void) { } } + /* Disconnect timedout slaves. */ + if (listLength(server.slaves)) { + listIter li; + listNode *ln; + + listRewind(server.slaves,&li); + while((ln = listNext(&li))) { + redisClient *slave = ln->value; + + if (slave->replstate != REDIS_REPL_ONLINE) continue; + if ((server.unixtime - slave->repl_ack_time) > server.repl_timeout) + { + char ip[32]; + int port; + + if (anetPeerToString(slave->fd,ip,&port) != -1) { + redisLog(REDIS_WARNING, + "Disconnecting timedout slave: %s:%d", + ip, slave->slave_listening_port); + } + freeClient(slave); + } + } + } + /* If we have no attached slaves and there is a replication backlog * using memory, free it after some (configured) time. */ if (listLength(server.slaves) == 0 && server.repl_backlog_time_limit &&