AOF: don't abort on write errors unless fsync is 'always'.

A system similar to the RDB write error handling is used, in which when
we can't write to the AOF file, writes are no longer accepted until we
are able to write again.

For fsync == always we still abort on errors since there is currently no
easy way to avoid replying with success to the user otherwise, and this
would violate the contract with the user of only acknowledging data
already secured on disk.
This commit is contained in:
antirez
2014-02-12 12:47:10 +01:00
parent 688d32e16b
commit fadbbdd3f4
3 changed files with 91 additions and 24 deletions

View File

@ -1120,9 +1120,13 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
}
/* If we postponed an AOF buffer flush, let's try to do it every time the
* cron function is called. */
if (server.aof_flush_postponed_start) flushAppendOnlyFile(0);
/* AOF: we may have postponed buffer flush, or were not able to
* write our buffer because of write(2) error. Try again here. */
if (server.aof_flush_postponed_start ||
server.aof_last_write_status == REDIS_ERR)
{
flushAppendOnlyFile(0);
}
/* Close clients that need to be closed asynchronous */
freeClientsInAsyncFreeQueue();
@ -1593,6 +1597,8 @@ void initServer() {
server.unixtime = time(NULL);
server.mstime = mstime();
server.lastbgsave_status = REDIS_OK;
server.aof_last_write_status = REDIS_OK;
server.aof_last_write_errno = 0;
server.repl_good_slaves_count = 0;
/* Create the serverCron() time event, that's our main way to process
@ -1928,15 +1934,22 @@ int processCommand(redisClient *c) {
/* Don't accept write commands if there are problems persisting on disk
* and if this is a master instance. */
if (server.stop_writes_on_bgsave_err &&
server.saveparamslen > 0
&& server.lastbgsave_status == REDIS_ERR &&
if (((server.stop_writes_on_bgsave_err &&
server.saveparamslen > 0 &&
server.lastbgsave_status == REDIS_ERR) ||
server.aof_last_write_status == REDIS_ERR) &&
server.masterhost == NULL &&
(c->cmd->flags & REDIS_CMD_WRITE ||
c->cmd->proc == pingCommand))
{
flagTransaction(c);
addReply(c, shared.bgsaveerr);
if (server.aof_last_write_status == REDIS_OK)
addReply(c, shared.bgsaveerr);
else
addReplySds(c,
sdscatprintf(sdsempty(),
"-MISCONF Errors writing to the AOF file: %s\r\n",
strerror(server.aof_last_write_errno)));
return REDIS_OK;
}
@ -2315,7 +2328,8 @@ sds genRedisInfoString(char *section) {
"aof_rewrite_scheduled:%d\r\n"
"aof_last_rewrite_time_sec:%jd\r\n"
"aof_current_rewrite_time_sec:%jd\r\n"
"aof_last_bgrewrite_status:%s\r\n",
"aof_last_bgrewrite_status:%s\r\n"
"aof_last_write_status:%s\r\n",
server.loading,
server.dirty,
server.rdb_child_pid != -1,
@ -2330,7 +2344,8 @@ sds genRedisInfoString(char *section) {
(intmax_t)server.aof_rewrite_time_last,
(intmax_t)((server.aof_child_pid == -1) ?
-1 : time(NULL)-server.aof_rewrite_time_start),
(server.aof_lastbgrewrite_status == REDIS_OK) ? "ok" : "err");
(server.aof_lastbgrewrite_status == REDIS_OK) ? "ok" : "err",
(server.aof_last_write_status == REDIS_OK) ? "ok" : "err");
if (server.aof_state != REDIS_AOF_OFF) {
info = sdscatprintf(info,