diff --git a/src/aof.c b/src/aof.c index 89443e4bdb..f3f6d782fc 100644 --- a/src/aof.c +++ b/src/aof.c @@ -30,6 +30,13 @@ aofManifest *aofLoadManifestFromFile(sds am_filepath); void aofManifestFreeAndUpdate(aofManifest *am); void aof_background_fsync_and_close(int fd); +/* When we call 'startAppendOnly', we will create a temp INCR AOF, and rename + * it to the real INCR AOF name when the AOFRW is done, so if want to know the + * accurate start offset of the INCR AOF, we need to record it when we create + * the temp INCR AOF. This variable is used to record the start offset, and + * set the start offset of the real INCR AOF when the AOFRW is done. */ +static long long tempIncAofStartReplOffset = 0; + /* ---------------------------------------------------------------------------- * AOF Manifest file implementation. * @@ -73,10 +80,15 @@ void aof_background_fsync_and_close(int fd); #define AOF_MANIFEST_KEY_FILE_NAME "file" #define AOF_MANIFEST_KEY_FILE_SEQ "seq" #define AOF_MANIFEST_KEY_FILE_TYPE "type" +#define AOF_MANIFEST_KEY_FILE_STARTOFFSET "startoffset" +#define AOF_MANIFEST_KEY_FILE_ENDOFFSET "endoffset" /* Create an empty aofInfo. */ aofInfo *aofInfoCreate(void) { - return zcalloc(sizeof(aofInfo)); + aofInfo *ai = zcalloc(sizeof(aofInfo)); + ai->start_offset = -1; + ai->end_offset = -1; + return ai; } /* Free the aofInfo structure (pointed to by ai) and its embedded file_name. */ @@ -93,6 +105,8 @@ aofInfo *aofInfoDup(aofInfo *orig) { ai->file_name = sdsdup(orig->file_name); ai->file_seq = orig->file_seq; ai->file_type = orig->file_type; + ai->start_offset = orig->start_offset; + ai->end_offset = orig->end_offset; return ai; } @@ -105,10 +119,19 @@ sds aofInfoFormat(sds buf, aofInfo *ai) { if (sdsneedsrepr(ai->file_name)) filename_repr = sdscatrepr(sdsempty(), ai->file_name, sdslen(ai->file_name)); - sds ret = sdscatprintf(buf, "%s %s %s %lld %s %c\n", + sds ret = sdscatprintf(buf, "%s %s %s %lld %s %c", AOF_MANIFEST_KEY_FILE_NAME, filename_repr ? filename_repr : ai->file_name, AOF_MANIFEST_KEY_FILE_SEQ, ai->file_seq, AOF_MANIFEST_KEY_FILE_TYPE, ai->file_type); + + if (ai->start_offset != -1) { + ret = sdscatprintf(ret, " %s %lld", AOF_MANIFEST_KEY_FILE_STARTOFFSET, ai->start_offset); + if (ai->end_offset != -1) { + ret = sdscatprintf(ret, " %s %lld", AOF_MANIFEST_KEY_FILE_ENDOFFSET, ai->end_offset); + } + } + + ret = sdscatlen(ret, "\n", 1); sdsfree(filename_repr); return ret; @@ -304,6 +327,10 @@ aofManifest *aofLoadManifestFromFile(sds am_filepath) { ai->file_seq = atoll(argv[i+1]); } else if (!strcasecmp(argv[i], AOF_MANIFEST_KEY_FILE_TYPE)) { ai->file_type = (argv[i+1])[0]; + } else if (!strcasecmp(argv[i], AOF_MANIFEST_KEY_FILE_STARTOFFSET)) { + ai->start_offset = atoll(argv[i+1]); + } else if (!strcasecmp(argv[i], AOF_MANIFEST_KEY_FILE_ENDOFFSET)) { + ai->end_offset = atoll(argv[i+1]); } /* else if (!strcasecmp(argv[i], AOF_MANIFEST_KEY_OTHER)) {} */ } @@ -433,12 +460,13 @@ sds getNewBaseFileNameAndMarkPreAsHistory(aofManifest *am) { * for example: * appendonly.aof.1.incr.aof */ -sds getNewIncrAofName(aofManifest *am) { +sds getNewIncrAofName(aofManifest *am, long long start_reploff) { aofInfo *ai = aofInfoCreate(); ai->file_type = AOF_FILE_TYPE_INCR; ai->file_name = sdscatprintf(sdsempty(), "%s.%lld%s%s", server.aof_filename, ++am->curr_incr_file_seq, INCR_FILE_SUFFIX, AOF_FORMAT_SUFFIX); ai->file_seq = am->curr_incr_file_seq; + ai->start_offset = start_reploff; listAddNodeTail(am->incr_aof_list, ai); am->dirty = 1; return ai->file_name; @@ -456,7 +484,7 @@ sds getLastIncrAofName(aofManifest *am) { /* If 'incr_aof_list' is empty, just create a new one. */ if (!listLength(am->incr_aof_list)) { - return getNewIncrAofName(am); + return getNewIncrAofName(am, server.master_repl_offset); } /* Or return the last one. */ @@ -781,10 +809,11 @@ int openNewIncrAofForAppend(void) { if (server.aof_state == AOF_WAIT_REWRITE) { /* Use a temporary INCR AOF file to accumulate data during AOF_WAIT_REWRITE. */ new_aof_name = getTempIncrAofName(); + tempIncAofStartReplOffset = server.master_repl_offset; } else { /* Dup a temp aof_manifest to modify. */ temp_am = aofManifestDup(server.aof_manifest); - new_aof_name = sdsdup(getNewIncrAofName(temp_am)); + new_aof_name = sdsdup(getNewIncrAofName(temp_am, server.master_repl_offset)); } sds new_aof_filepath = makePath(server.aof_dirname, new_aof_name); newfd = open(new_aof_filepath, O_WRONLY|O_TRUNC|O_CREAT, 0644); @@ -833,6 +862,50 @@ cleanup: return C_ERR; } +/* When we close gracefully the AOF file, we have the chance to persist the + * end replication offset of current INCR AOF. */ +void updateCurIncrAofEndOffset(void) { + if (server.aof_state != AOF_ON) return; + serverAssert(server.aof_manifest != NULL); + + if (listLength(server.aof_manifest->incr_aof_list) == 0) return; + aofInfo *ai = listNodeValue(listLast(server.aof_manifest->incr_aof_list)); + ai->end_offset = server.master_repl_offset; + server.aof_manifest->dirty = 1; + /* It doesn't matter if the persistence fails since this information is not + * critical, we can get an approximate value by start offset plus file size. */ + persistAofManifest(server.aof_manifest); +} + +/* After loading AOF data, we need to update the `server.master_repl_offset` + * based on the information of the last INCR AOF, to avoid the rollback of + * the start offset of new INCR AOF. */ +void updateReplOffsetAndResetEndOffset(void) { + if (server.aof_state != AOF_ON) return; + serverAssert(server.aof_manifest != NULL); + + /* If the INCR file has an end offset, we directly use it, and clear it + * to avoid the next time we load the manifest file, we will use the same + * offset, but the real offset may have advanced. */ + if (listLength(server.aof_manifest->incr_aof_list) == 0) return; + aofInfo *ai = listNodeValue(listLast(server.aof_manifest->incr_aof_list)); + if (ai->end_offset != -1) { + server.master_repl_offset = ai->end_offset; + ai->end_offset = -1; + server.aof_manifest->dirty = 1; + /* We must update the end offset of INCR file correctly, otherwise we + * may keep wrong information in the manifest file, since we continue + * to append data to the same INCR file. */ + if (persistAofManifest(server.aof_manifest) != AOF_OK) + exit(1); + } else { + /* If the INCR file doesn't have an end offset, we need to calculate + * the replication offset by the start offset plus the file size. */ + server.master_repl_offset = (ai->start_offset == -1 ? 0 : ai->start_offset) + + getAppendOnlyFileSize(ai->file_name, NULL); + } +} + /* Whether to limit the execution of Background AOF rewrite. * * At present, if AOFRW fails, redis will automatically retry. If it continues @@ -938,6 +1011,7 @@ void stopAppendOnly(void) { server.aof_last_fsync = server.mstime; } close(server.aof_fd); + updateCurIncrAofEndOffset(); server.aof_fd = -1; server.aof_selected_db = -1; @@ -2664,7 +2738,7 @@ void backgroundRewriteDoneHandler(int exitcode, int bysignal) { sds temp_incr_aof_name = getTempIncrAofName(); sds temp_incr_filepath = makePath(server.aof_dirname, temp_incr_aof_name); /* Get next new incr aof name. */ - sds new_incr_filename = getNewIncrAofName(temp_am); + sds new_incr_filename = getNewIncrAofName(temp_am, tempIncAofStartReplOffset); new_incr_filepath = makePath(server.aof_dirname, new_incr_filename); latencyStartMonitor(latency); if (rename(temp_incr_filepath, new_incr_filepath) == -1) { diff --git a/src/server.c b/src/server.c index b76b3526cc..0e056c8be4 100644 --- a/src/server.c +++ b/src/server.c @@ -4600,6 +4600,9 @@ int finishShutdown(void) { } } + /* Update the end offset of current INCR AOF if possible. */ + updateCurIncrAofEndOffset(); + /* Free the AOF manifest. */ if (server.aof_manifest) aofManifestFree(server.aof_manifest); @@ -6864,6 +6867,7 @@ void loadDataFromDisk(void) { exit(1); if (ret != AOF_NOT_EXIST) serverLog(LL_NOTICE, "DB loaded from append only file: %.3f seconds", (float)(ustime()-start)/1000000); + updateReplOffsetAndResetEndOffset(); } else { rdbSaveInfo rsi = RDB_SAVE_INFO_INIT; int rsi_is_valid = 0; diff --git a/src/server.h b/src/server.h index e0634d77a4..e04035c28e 100644 --- a/src/server.h +++ b/src/server.h @@ -1620,6 +1620,8 @@ typedef struct { sds file_name; /* file name */ long long file_seq; /* file sequence */ aof_file_type file_type; /* file type */ + long long start_offset; /* the start replication offset of the file */ + long long end_offset; /* the end replication offset of the file */ } aofInfo; typedef struct { @@ -3059,6 +3061,8 @@ void aofOpenIfNeededOnServerStart(void); void aofManifestFree(aofManifest *am); int aofDelHistoryFiles(void); int aofRewriteLimited(void); +void updateCurIncrAofEndOffset(void); +void updateReplOffsetAndResetEndOffset(void); /* Child info */ void openChildInfoPipe(void); diff --git a/tests/integration/aof-multi-part.tcl b/tests/integration/aof-multi-part.tcl index bdd0382339..5a0025070a 100644 --- a/tests/integration/aof-multi-part.tcl +++ b/tests/integration/aof-multi-part.tcl @@ -1329,4 +1329,210 @@ tags {"external:skip"} { } } } + + # Test Part 3 + # + # Test if INCR AOF offset information is as expected + test {Multi Part AOF writes start offset in the manifest} { + set aof_dirpath "$server_path/$aof_dirname" + set aof_manifest_file "$server_path/$aof_dirname/${aof_basename}$::manifest_suffix" + + start_server_aof [list dir $server_path] { + set client [redis [srv host] [srv port] 0 $::tls] + wait_done_loading $client + + # The manifest file has startoffset now + assert_aof_manifest_content $aof_manifest_file { + {file appendonly.aof.1.base.rdb seq 1 type b} + {file appendonly.aof.1.incr.aof seq 1 type i startoffset 0} + } + } + + clean_aof_persistence $aof_dirpath + } + + test {Multi Part AOF won't add the offset of incr AOF from old version} { + create_aof $aof_dirpath $aof_base1_file { + append_to_aof [formatCommand set k1 v1] + } + + create_aof $aof_dirpath $aof_incr1_file { + append_to_aof [formatCommand set k2 v2] + } + + create_aof_manifest $aof_dirpath $aof_manifest_file { + append_to_manifest "file appendonly.aof.1.base.aof seq 1 type b\n" + append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i\n" + } + + start_server_aof [list dir $server_path] { + assert_equal 1 [is_alive [srv pid]] + set client [redis [srv host] [srv port] 0 $::tls] + wait_done_loading $client + + assert_equal v1 [$client get k1] + assert_equal v2 [$client get k2] + + $client set k3 v3 + catch {$client shutdown} + + # Should not add offset to the manifest since we also don't know the right + # starting replication of them. + set fp [open $aof_manifest_file r] + set content [read $fp] + close $fp + assert ![regexp {startoffset} $content] + + # The manifest file still have information from the old version + assert_aof_manifest_content $aof_manifest_file { + {file appendonly.aof.1.base.aof seq 1 type b} + {file appendonly.aof.1.incr.aof seq 1 type i} + } + } + + clean_aof_persistence $aof_dirpath + } + + test {Multi Part AOF can update master_repl_offset with only startoffset info} { + create_aof $aof_dirpath $aof_base1_file { + append_to_aof [formatCommand set k1 v1] + } + + create_aof $aof_dirpath $aof_incr1_file { + append_to_aof [formatCommand set k2 v2] + } + + create_aof_manifest $aof_dirpath $aof_manifest_file { + append_to_manifest "file appendonly.aof.1.base.aof seq 1 type b\n" + append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i startoffset 100\n" + } + + start_server [list overrides [list dir $server_path appendonly yes ]] { + wait_done_loading r + r select 0 + assert_equal v1 [r get k1] + assert_equal v2 [r get k2] + + # After loading AOF, redis will update the replication offset based on + # the information of the last INCR AOF, to avoid the rollback of the + # start offset of new INCR AOF. If the INCR file doesn't have an end offset + # info, redis will calculate the replication offset by the start offset + # plus the file size. + set file_size [file size $aof_incr1_file] + set offset [expr $file_size + 100] + assert_equal $offset [s master_repl_offset] + } + + clean_aof_persistence $aof_dirpath + } + + test {Multi Part AOF can update master_repl_offset with endoffset info} { + create_aof $aof_dirpath $aof_base1_file { + append_to_aof [formatCommand set k1 v1] + } + + create_aof $aof_dirpath $aof_incr1_file { + append_to_aof [formatCommand set k2 v2] + } + + create_aof_manifest $aof_dirpath $aof_manifest_file { + append_to_manifest "file appendonly.aof.1.base.aof seq 1 type b\n" + append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i startoffset 100 endoffset 200\n" + } + + start_server [list overrides [list dir $server_path appendonly yes ]] { + wait_done_loading r + r select 0 + assert_equal v1 [r get k1] + assert_equal v2 [r get k2] + + # If the INCR file has an end offset, redis directly uses it as replication offset + assert_equal 200 [s master_repl_offset] + + # We should reset endoffset in manifest file + set fp [open $aof_manifest_file r] + set content [read $fp] + close $fp + assert ![regexp {endoffset} $content] + } + + clean_aof_persistence $aof_dirpath + } + + test {Multi Part AOF will add the end offset if we close gracefully the AOF} { + start_server_aof [list dir $server_path] { + set client [redis [srv host] [srv port] 0 $::tls] + wait_done_loading $client + + assert_aof_manifest_content $aof_manifest_file { + {file appendonly.aof.1.base.rdb seq 1 type b} + {file appendonly.aof.1.incr.aof seq 1 type i startoffset 0} + } + + $client set k1 v1 + $client set k2 v2 + # Close AOF gracefully when stopping appendonly, we should add endoffset + # in the manifest file, 'endoffset' should be 2 since writing 2 commands + r config set appendonly no + assert_aof_manifest_content $aof_manifest_file { + {file appendonly.aof.1.base.rdb seq 1 type b} + {file appendonly.aof.1.incr.aof seq 1 type i startoffset 0 endoffset 2} + } + r config set appendonly yes + waitForBgrewriteaof $client + + $client set k3 v3 + # Close AOF gracefully when shutting down server, we should add endoffset + # in the manifest file, 'endoffset' should be 3 since writing 3 commands + catch {$client shutdown} + assert_aof_manifest_content $aof_manifest_file { + {file appendonly.aof.2.base.rdb seq 2 type b} + {file appendonly.aof.2.incr.aof seq 2 type i startoffset 2 endoffset 3} + } + } + + clean_aof_persistence $aof_dirpath + } + + test {INCR AOF has accurate start offset when AOFRW} { + start_server [list overrides [list dir $server_path appendonly yes ]] { + r config set auto-aof-rewrite-percentage 0 + + # Start write load to let the master_repl_offset continue increasing + # since appendonly is enabled + set load_handle0 [start_write_load [srv 0 host] [srv 0 port] 10] + wait_for_condition 50 100 { + [r dbsize] > 0 + } else { + fail "No write load detected." + } + + # We obtain the master_repl_offset at the time of bgrewriteaof by pausing + # the redis process, sending pipeline commands, and then resuming the process + set rd [redis_deferring_client] + pause_process [srv 0 pid] + set buf "info replication\r\n" + append buf "bgrewriteaof\r\n" + $rd write $buf + $rd flush + resume_process [srv 0 pid] + # Read the replication offset and the start of the bgrewriteaof + regexp {master_repl_offset:(\d+)} [$rd read] -> offset1 + assert_match {*rewriting started*} [$rd read] + $rd close + + # Get the start offset from the manifest file after bgrewriteaof + waitForBgrewriteaof r + set fp [open $aof_manifest_file r] + set content [read $fp] + close $fp + set offset2 [lindex [regexp -inline {startoffset (\d+)} $content] 1] + + # The start offset of INCR AOF should be the same as master_repl_offset + # when we trigger bgrewriteaof + assert {$offset1 == $offset2} + stop_write_load $load_handle0 + wait_load_handlers_disconnected + } + } } diff --git a/tests/support/aofmanifest.tcl b/tests/support/aofmanifest.tcl index 151626294f..68eed037b5 100644 --- a/tests/support/aofmanifest.tcl +++ b/tests/support/aofmanifest.tcl @@ -122,7 +122,7 @@ proc assert_aof_manifest_content {manifest_path content} { assert_equal [llength $lines] [llength $content] for { set i 0 } { $i < [llength $lines] } {incr i} { - assert_equal [lindex $lines $i] [lindex $content $i] + assert {[string first [lindex $content $i] [lindex $lines $i]] != -1} } }