From 783b210db4b79cdd1313a8196ea58fa5ad999fd1 Mon Sep 17 00:00:00 2001 From: Binbin Date: Wed, 11 May 2022 16:21:16 +0800 Subject: [PATCH] FLUSHDB and FLUSHALL add call forceCommandPropagation / FLUSHALL reset dirty counter to 0 if we enable save (#10691) ## FLUSHALL We used to restore the dirty counter after `rdbSave` zeroed it if we enable save. Otherwise FLUSHALL will not be replicated nor put into the AOF. And then we do increment it again below. Without that extra dirty++, when db was already empty, FLUSHALL will not be replicated nor put into the AOF. We now gonna replace all that dirty counter magic with a call to forceCommandPropagation (REPL and AOF), instead of all the messing around with the dirty counter. Added tests to cover three part (dirty counter, REPL, AOF). One benefit other than cleaner code is that the `rdb_changes_since_last_save` is correct in this case. ## FLUSHDB FLUSHDB was not replicated nor put into the AOF when db was already empty. Unlike DEL on a non-existing key, FLUSHDB always does something, and that's to call the module hook. So basically FLUSHDB is never a NOP, and thus it should always be propagated. Not doing that, could mean that if a module does something in that hook, and wants to avoid issues of that hook being missing on the replica if the db is empty, it'll need to do complicated things. So now FLUSHDB add call forceCommandPropagation, we will always propagate FLUSHDB. Always propagating FLUSHDB seems like a safe approach that shouldn't have any drawbacks (other than looking odd) This was mentioned in #8972 ## Test section: We actually found it while solving a race condition in the BGSAVE test (other.tcl). It was found in extra_ci Daily Arm64 (test-libc-malloc). ``` [exception]: Executing test client: ERR Background save already in progress. ERR Background save already in progress ``` It look like `r flushdb` trigger (schedule) a bgsave right after `waitForBgsave r` and before `r save`. Changing flushdb to flushall, FLUSHALL will do a foreground save and then set the dirty counter to 0. --- src/db.c | 18 +++++++----- tests/integration/aof.tcl | 46 +++++++++++++++++++++++++++++++ tests/integration/rdb.tcl | 2 +- tests/integration/replication.tcl | 42 +++++++++++++++++++++++++--- tests/unit/other.tcl | 20 ++++++++++++-- 5 files changed, 114 insertions(+), 14 deletions(-) diff --git a/src/db.c b/src/db.c index 5b46b70ef..eaf45e9e3 100644 --- a/src/db.c +++ b/src/db.c @@ -602,18 +602,11 @@ void flushAllDataAndResetRDB(int flags) { server.dirty += emptyData(-1,flags,NULL); if (server.child_type == CHILD_TYPE_RDB) killRDBChild(); if (server.saveparamslen > 0) { - /* Normally rdbSave() will reset dirty, but we don't want this here - * as otherwise FLUSHALL will not be replicated nor put into the AOF. */ - int saved_dirty = server.dirty; rdbSaveInfo rsi, *rsiptr; rsiptr = rdbPopulateSaveInfo(&rsi); rdbSave(SLAVE_REQ_NONE,server.rdb_filename,rsiptr); - server.dirty = saved_dirty; } - /* Without that extra dirty++, when db was already empty, FLUSHALL will - * not be replicated nor put into the AOF. */ - server.dirty++; #if defined(USE_JEMALLOC) /* jemalloc 5 doesn't release pages back to the OS when there's no traffic. * for large databases, flushdb blocks for long anyway, so a bit more won't @@ -632,7 +625,13 @@ void flushdbCommand(client *c) { if (getFlushCommandFlags(c,&flags) == C_ERR) return; /* flushdb should not flush the functions */ server.dirty += emptyData(c->db->id,flags | EMPTYDB_NOFUNCTIONS,NULL); + + /* Without the forceCommandPropagation, when DB was already empty, + * FLUSHDB will not be replicated nor put into the AOF. */ + forceCommandPropagation(c, PROPAGATE_REPL | PROPAGATE_AOF); + addReply(c,shared.ok); + #if defined(USE_JEMALLOC) /* jemalloc 5 doesn't release pages back to the OS when there's no traffic. * for large databases, flushdb blocks for long anyway, so a bit more won't @@ -650,6 +649,11 @@ void flushallCommand(client *c) { if (getFlushCommandFlags(c,&flags) == C_ERR) return; /* flushall should not flush the functions */ flushAllDataAndResetRDB(flags | EMPTYDB_NOFUNCTIONS); + + /* Without the forceCommandPropagation, when DBs were already empty, + * FLUSHALL will not be replicated nor put into the AOF. */ + forceCommandPropagation(c, PROPAGATE_REPL | PROPAGATE_AOF); + addReply(c,shared.ok); } diff --git a/tests/integration/aof.tcl b/tests/integration/aof.tcl index 3d8c08c51..1f73fc341 100644 --- a/tests/integration/aof.tcl +++ b/tests/integration/aof.tcl @@ -632,4 +632,50 @@ tags {"aof external:skip"} { } result assert_match "*Failed to truncate AOF*to timestamp*because it is not the last file*" $result } + + start_server {overrides {appendonly yes appendfsync always}} { + test {FLUSHDB / FLUSHALL should persist in AOF} { + set aof [get_last_incr_aof_path r] + + r set key value + r flushdb + r set key value2 + r flushdb + + # DB is empty + r flushdb + r flushdb + r flushdb + + r set key value + r flushall + r set key value2 + r flushall + + # DBs are empty. + r flushall + r flushall + r flushall + + # Assert that each FLUSHDB command is persisted even the DB is empty. + # Assert that each FLUSHALL command is persisted even the DBs are empty. + assert_aof_content $aof { + {select *} + {set key value} + {flushdb} + {set key value2} + {flushdb} + {flushdb} + {flushdb} + {flushdb} + {set key value} + {flushall} + {set key value2} + {flushall} + {flushall} + {flushall} + {flushall} + } + } + } } diff --git a/tests/integration/rdb.tcl b/tests/integration/rdb.tcl index 652431ee7..326a17350 100644 --- a/tests/integration/rdb.tcl +++ b/tests/integration/rdb.tcl @@ -278,7 +278,7 @@ start_server {overrides {save ""}} { set current_save_keys_total [s current_save_keys_total] if {$::verbose} { - puts "Keys before bgsave start: current_save_keys_total" + puts "Keys before bgsave start: $current_save_keys_total" } # on each iteration, we will write some key to the server to trigger copy-on-write, and diff --git a/tests/integration/replication.tcl b/tests/integration/replication.tcl index 44915be1b..d60c91918 100644 --- a/tests/integration/replication.tcl +++ b/tests/integration/replication.tcl @@ -225,11 +225,45 @@ start_server {tags {"repl external:skip"}} { } } - test {FLUSHALL should replicate} { + test {FLUSHDB / FLUSHALL should replicate} { + set repl [attach_to_replication_stream] + + r -1 set key value + r -1 flushdb + + r -1 set key value2 r -1 flushall - if {$::valgrind} {after 2000} - list [r -1 dbsize] [r 0 dbsize] - } {0 0} + + wait_for_ofs_sync [srv 0 client] [srv -1 client] + assert_equal [r -1 dbsize] 0 + assert_equal [r 0 dbsize] 0 + + # DB is empty. + r -1 flushdb + r -1 flushdb + r -1 flushdb + + # DBs are empty. + r -1 flushall + r -1 flushall + r -1 flushall + + # Assert that each FLUSHDB command is replicated even the DB is empty. + # Assert that each FLUSHALL command is replicated even the DBs are empty. + assert_replication_stream $repl { + {set key value} + {flushdb} + {set key value2} + {flushall} + {flushdb} + {flushdb} + {flushdb} + {flushall} + {flushall} + {flushall} + } + close_replication_stream $repl + } test {ROLE in master reports master with a slave} { set res [r -1 role] diff --git a/tests/unit/other.tcl b/tests/unit/other.tcl index bd220a145..2ae09b5b7 100644 --- a/tests/unit/other.tcl +++ b/tests/unit/other.tcl @@ -38,9 +38,25 @@ start_server {tags {"other"}} { } } + start_server {overrides {save ""} tags {external:skip}} { + test {FLUSHALL should not reset the dirty counter if we disable save} { + r set key value + r flushall + assert_morethan [s rdb_changes_since_last_save] 0 + } + + test {FLUSHALL should reset the dirty counter to 0 if we enable save} { + r config set save "3600 1 300 100 60 10000" + r set key value + r flushall + assert_equal [s rdb_changes_since_last_save] 0 + } + } + test {BGSAVE} { - r flushdb - waitForBgsave r + # Use FLUSHALL instead of FLUSHDB, FLUSHALL do a foreground save + # and reset the dirty counter to 0, so we won't trigger an unexpected bgsave. + r flushall r save r set x 10 r bgsave