diskless replication on slave side (don't store rdb to file), plus some other related fixes

The implementation of the diskless replication was currently diskless only on the master side.
The slave side was still storing the received rdb file to the disk before loading it back in and parsing it.

This commit adds two modes to load rdb directly from socket:
1) when-empty
2) using "swapdb"
the third mode of using diskless slave by flushdb is risky and currently not included.

other changes:
--------------
distinguish between aof configuration and state so that we can re-enable aof only when sync eventually
succeeds (and not when exiting from readSyncBulkPayload after a failed attempt)
also a CONFIG GET and INFO during rdb loading would have lied

When loading rdb from the network, don't kill the server on short read (that can be a network error)

Fix rdb check when performed on preamble AOF

tests:
run replication tests for diskless slave too
make replication test a bit more aggressive
Add test for diskless load swapdb
This commit is contained in:
Oran Agra
2019-07-01 15:22:29 +03:00
parent 722446510f
commit 2de544cfcc
17 changed files with 648 additions and 252 deletions

View File

@ -1,12 +1,3 @@
proc start_bg_complex_data {host port db ops} {
set tclsh [info nameofexecutable]
exec $tclsh tests/helpers/bg_complex_data.tcl $host $port $db $ops &
}
proc stop_bg_complex_data {handle} {
catch {exec /bin/kill -9 $handle}
}
# Creates a master-slave pair and breaks the link continuously to force
# partial resyncs attempts, all this while flooding the master with
# write queries.
@ -17,7 +8,7 @@ proc stop_bg_complex_data {handle} {
# If reconnect is > 0, the test actually try to break the connection and
# reconnect with the master, otherwise just the initial synchronization is
# checked for consistency.
proc test_psync {descr duration backlog_size backlog_ttl delay cond diskless reconnect} {
proc test_psync {descr duration backlog_size backlog_ttl delay cond mdl sdl reconnect} {
start_server {tags {"repl"}} {
start_server {} {
@ -28,8 +19,9 @@ proc test_psync {descr duration backlog_size backlog_ttl delay cond diskless rec
$master config set repl-backlog-size $backlog_size
$master config set repl-backlog-ttl $backlog_ttl
$master config set repl-diskless-sync $diskless
$master config set repl-diskless-sync $mdl
$master config set repl-diskless-sync-delay 1
$slave config set repl-diskless-load $sdl
set load_handle0 [start_bg_complex_data $master_host $master_port 9 100000]
set load_handle1 [start_bg_complex_data $master_host $master_port 11 100000]
@ -54,7 +46,7 @@ proc test_psync {descr duration backlog_size backlog_ttl delay cond diskless rec
}
}
test "Test replication partial resync: $descr (diskless: $diskless, reconnect: $reconnect)" {
test "Test replication partial resync: $descr (diskless: $mdl, $sdl, reconnect: $reconnect)" {
# Now while the clients are writing data, break the maste-slave
# link multiple times.
if ($reconnect) {
@ -132,23 +124,25 @@ proc test_psync {descr duration backlog_size backlog_ttl delay cond diskless rec
}
}
foreach diskless {no yes} {
test_psync {no reconnection, just sync} 6 1000000 3600 0 {
} $diskless 0
foreach mdl {no yes} {
foreach sdl {disabled swapdb} {
test_psync {no reconnection, just sync} 6 1000000 3600 0 {
} $mdl $sdl 0
test_psync {ok psync} 6 100000000 3600 0 {
test_psync {ok psync} 6 100000000 3600 0 {
assert {[s -1 sync_partial_ok] > 0}
} $diskless 1
} $mdl $sdl 1
test_psync {no backlog} 6 100 3600 0.5 {
test_psync {no backlog} 6 100 3600 0.5 {
assert {[s -1 sync_partial_err] > 0}
} $diskless 1
} $mdl $sdl 1
test_psync {ok after delay} 3 100000000 3600 3 {
test_psync {ok after delay} 3 100000000 3600 3 {
assert {[s -1 sync_partial_ok] > 0}
} $diskless 1
} $mdl $sdl 1
test_psync {backlog expired} 3 100000000 1 3 {
test_psync {backlog expired} 3 100000000 1 3 {
assert {[s -1 sync_partial_err] > 0}
} $diskless 1
} $mdl $sdl 1
}
}