From 1d80acae541f4287ea7416d4d526d65086bace44 Mon Sep 17 00:00:00 2001 From: charsyam Date: Thu, 31 Jan 2013 12:09:16 +0900 Subject: [PATCH] Turn off TCP_NODELAY on the slave socket after SYNC. Further details from @antirez: It was reported by @StopForumSpam on Twitter that the Redis replication link was strangely using multiple TCP packets for multiple commands. This wastes a lot of bandwidth and is due to the TCP_NODELAY option we enable on the socket after accepting a new connection. However the master -> slave channel is a one-way channel since Redis replication is asynchronous, so there is no point in trying to reduce the latency, we should aim to reduce the bandwidth. For this reason this commit introduces the ability to disable the nagle algorithm on the socket after a successful SYNC. This feature is off by default because the delay can be up to 40 milliseconds with normally configured Linux kernels. --- src/anet.c | 14 ++++++++++++-- src/anet.h | 1 + src/config.c | 7 +++++++ src/redis.c | 1 + src/redis.h | 1 + src/replication.c | 8 ++++++++ 6 files changed, 30 insertions(+), 2 deletions(-) diff --git a/src/anet.c b/src/anet.c index 4da3e28d..d002cb31 100644 --- a/src/anet.c +++ b/src/anet.c @@ -75,9 +75,8 @@ int anetNonBlock(char *err, int fd) return ANET_OK; } -int anetTcpNoDelay(char *err, int fd) +static int _anetTcpNoDelay(char *err, int fd, int yes) { - int yes = 1; if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &yes, sizeof(yes)) == -1) { anetSetError(err, "setsockopt TCP_NODELAY: %s", strerror(errno)); @@ -86,6 +85,17 @@ int anetTcpNoDelay(char *err, int fd) return ANET_OK; } +int anetTcpNoDelay(char *err, int fd) +{ + return _anetTcpNoDelay(err, fd, 1); +} + +int anetTcpNoDelayOff(char *err, int fd) +{ + return _anetTcpNoDelay(err, fd, 0); +} + + int anetSetSendBuffer(char *err, int fd, int buffsize) { if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &buffsize, sizeof(buffsize)) == -1) diff --git a/src/anet.h b/src/anet.h index 062b22c5..56ae5057 100644 --- a/src/anet.h +++ b/src/anet.h @@ -52,6 +52,7 @@ int anetUnixAccept(char *err, int serversock); int anetWrite(int fd, char *buf, int count); int anetNonBlock(char *err, int fd); int anetTcpNoDelay(char *err, int fd); +int anetTcpNoDelayOff(char *err, int fd); int anetTcpKeepAlive(char *err, int fd); int anetPeerToString(int fd, char *ip, int *port); diff --git a/src/config.c b/src/config.c index 4ab19c15..365e5047 100644 --- a/src/config.c +++ b/src/config.c @@ -382,6 +382,8 @@ void loadServerConfigFromString(char *config) { if ((server.stop_writes_on_bgsave_err = yesnotoi(argv[1])) == -1) { err = "argument must be 'yes' or 'no'"; goto loaderr; } + } else if (!strcasecmp(argv[0],"slave-tcp-nodelay-off") && argc == 2) { + server.slave_tcp_nodelay_off = atoi(argv[1]); } else if (!strcasecmp(argv[0],"slave-priority") && argc == 2) { server.slave_priority = atoi(argv[1]); } else if (!strcasecmp(argv[0],"notify-keyspace-events") && argc == 2) { @@ -715,6 +717,10 @@ void configSetCommand(redisClient *c) { if (flags == -1) goto badfmt; server.notify_keyspace_events = flags; + } else if (!strcasecmp(c->argv[2]->ptr,"slave-tcp-nodelay-off")) { + if (getLongLongFromObject(o,&ll) == REDIS_ERR ) goto badfmt; + + server.slave_tcp_nodelay_off = ll; } else if (!strcasecmp(c->argv[2]->ptr,"slave-priority")) { if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll <= 0) goto badfmt; @@ -808,6 +814,7 @@ void configGetCommand(redisClient *c) { config_get_numerical_field("repl-timeout",server.repl_timeout); config_get_numerical_field("maxclients",server.maxclients); config_get_numerical_field("watchdog-period",server.watchdog_period); + config_get_numerical_field("slave-tcp-nodelay-off",server.slave_tcp_nodelay_off); config_get_numerical_field("slave-priority",server.slave_priority); config_get_numerical_field("hz",server.hz); diff --git a/src/redis.c b/src/redis.c index 6786eedd..c1bdd519 100644 --- a/src/redis.c +++ b/src/redis.c @@ -1164,6 +1164,7 @@ void initServerConfig() { server.repl_serve_stale_data = 1; server.repl_slave_ro = 1; server.repl_down_since = time(NULL); + server.slave_tcp_nodelay_off = 1; server.slave_priority = REDIS_DEFAULT_SLAVE_PRIORITY; /* Client output buffer limits */ diff --git a/src/redis.h b/src/redis.h index 635232f9..dad5fc5a 100644 --- a/src/redis.h +++ b/src/redis.h @@ -633,6 +633,7 @@ struct redisServer { int repl_serve_stale_data; /* Serve stale data when link is down? */ int repl_slave_ro; /* Slave is read only? */ time_t repl_down_since; /* Unix time at which link with master went down */ + int slave_tcp_nodelay_off; /* turn off slave's tcp nodelay */ int slave_priority; /* Reported in INFO and used by Sentinel. */ /* Limits */ unsigned int maxclients; /* Max number of simultaneous clients */ diff --git a/src/replication.c b/src/replication.c index 2f0cba70..fffac0e1 100644 --- a/src/replication.c +++ b/src/replication.c @@ -118,6 +118,14 @@ void syncCommand(redisClient *c) { /* ignore SYNC if already slave or in monitor mode */ if (c->flags & REDIS_SLAVE) return; + if (server.slave_tcp_nodelay_off) { + redisLog(REDIS_NOTICE, "Turning off slave's :%d TCP NODELAY SETTING", c->fd); + char err[1024]; + if (anetTcpNoDelayOff(err, c->fd) == ANET_ERR) + redisLog(REDIS_WARNING, + "Can't turn off %d 's tcp nodelay setting: %s", c->fd, err); + } + /* Refuse SYNC requests if we are a slave but the link with our master * is not ok... */ if (server.masterhost && server.repl_state != REDIS_REPL_CONNECTED) {