]> git-server-git.apps.pok.os.sepia.ceph.com Git - rocksdb.git/commitdiff
Encode min_log_number_to_keep and delete_wals_before in one version edit (#9766)
authorYanqin Jin <yanqin@fb.com>
Fri, 1 Apr 2022 03:00:52 +0000 (20:00 -0700)
committerAndrew Kryczka <andrewkr@fb.com>
Mon, 18 Apr 2022 17:43:05 +0000 (10:43 -0700)
Summary:
min_log_number_to_keep denotes that the WALs whose numbers are below
this value **will** be deleted by RocksDB.
delete_wals_before will be used by RocksDB if
track_and_verify_wals_in_manifest is set to true. During recovery,
RocksDB uses the info encoded in delete_wals_before to reconstruct its
knowledge about what WALs to expect existing.
If these two tags are not encoded in the same VersionEdit, then it's
possible for min_log_number_to_keep=100 to exist, but
delete_wals_before=100 to be lost due to power failure. Subsequent
recovery will delete 99.log. If the db crashes again, the following
recovery will expect to see 99.log since there is no
delete_wals_before=100 in the MANIFEST, but the WAL is already deleted.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9766

Test Plan:
First of all, make check.
Second, format compatibility.
SHORT_TEST=1 ./tools/check_format_compatible.sh

Reviewed By: ltamasi

Differential Revision: D35203623

Pulled By: riversand963

fbshipit-source-id: 45623fc4b4b50d299d5e0f9559a3a4c5e9522c8f

HISTORY.md
db/db_flush_test.cc
db/memtable_list.cc
db/version_edit.cc

index 7b9368704512c6c39a921ae5b83d693729e78831..ae7d7a2be6ebc43b398fb043ea267f9547ff7c2f 100644 (file)
@@ -6,6 +6,7 @@
 * Fixed a heap use-after-free race with DropColumnFamily.
 * Fixed a bug that `rocksdb.read.block.compaction.micros` cannot track compaction stats (#9722).
 * Fixed `file_type`, `relative_filename` and `directory` fields returned by `GetLiveFilesMetaData()`, which were added in inheriting from `FileStorageInfo`.
+* Fixed a bug affecting `track_and_verify_wals_in_manifest`. Without the fix, application may see "open error: Corruption: Missing WAL with log number" while trying to open the db. The corruption is a false alarm but prevents DB open (#9766).
 
 ## 7.1.1 (04/07/2022)
 ### Bug Fixes
index a8ffb9ec5b51fd4fe28fe69b17ff3ddaf3dc57c5..76442086d6090322b71f31525cafdc8fe39b70ab 100644 (file)
@@ -2271,7 +2271,7 @@ TEST_P(DBAtomicFlushTest, ManualFlushUnder2PC) {
 
   // The recovered min log number with prepared data should be non-zero.
   // In 2pc mode, MinLogNumberToKeep returns the
-  // VersionSet::min_log_number_to_keep_2pc recovered from MANIFEST, if it's 0,
+  // VersionSet::min_log_number_to_keep recovered from MANIFEST, if it's 0,
   // it means atomic flush didn't write the min_log_number_to_keep to MANIFEST.
   cfs.push_back(kDefaultColumnFamilyName);
   ASSERT_OK(TryReopenWithColumnFamilies(cfs, options));
index b0d29bcd2411dc9dd467fff03971aa8f35e4984a..f447ee7353338622a7cc2795973d8c154a38e218 100644 (file)
@@ -505,21 +505,20 @@ Status MemTableList::TryInstallMemtableFlushResults(
         min_wal_number_to_keep =
             PrecomputeMinLogNumberToKeepNon2PC(vset, *cfd, edit_list);
       }
-      edit_list.back()->SetMinLogNumberToKeep(min_wal_number_to_keep);
 
-      std::unique_ptr<VersionEdit> wal_deletion;
+      VersionEdit wal_deletion;
+      wal_deletion.SetMinLogNumberToKeep(min_wal_number_to_keep);
       if (vset->db_options()->track_and_verify_wals_in_manifest) {
         if (min_wal_number_to_keep >
             vset->GetWalSet().GetMinWalNumberToKeep()) {
-          wal_deletion.reset(new VersionEdit);
-          wal_deletion->DeleteWalsBefore(min_wal_number_to_keep);
-          edit_list.push_back(wal_deletion.get());
+          wal_deletion.DeleteWalsBefore(min_wal_number_to_keep);
         }
         TEST_SYNC_POINT_CALLBACK(
             "MemTableList::TryInstallMemtableFlushResults:"
             "AfterComputeMinWalToKeep",
             nullptr);
       }
+      edit_list.push_back(&wal_deletion);
 
       const auto manifest_write_cb = [this, cfd, batch_count, log_buffer,
                                       to_delete, mu](const Status& status) {
@@ -805,17 +804,15 @@ Status InstallMemtableAtomicFlushResults(
     min_wal_number_to_keep =
         PrecomputeMinLogNumberToKeepNon2PC(vset, cfds, edit_lists);
   }
-  edit_lists.back().back()->SetMinLogNumberToKeep(min_wal_number_to_keep);
 
-  std::unique_ptr<VersionEdit> wal_deletion;
-  if (vset->db_options()->track_and_verify_wals_in_manifest) {
-    if (min_wal_number_to_keep > vset->GetWalSet().GetMinWalNumberToKeep()) {
-      wal_deletion.reset(new VersionEdit);
-      wal_deletion->DeleteWalsBefore(min_wal_number_to_keep);
-      edit_lists.back().push_back(wal_deletion.get());
-      ++num_entries;
-    }
+  VersionEdit wal_deletion;
+  wal_deletion.SetMinLogNumberToKeep(min_wal_number_to_keep);
+  if (vset->db_options()->track_and_verify_wals_in_manifest &&
+      min_wal_number_to_keep > vset->GetWalSet().GetMinWalNumberToKeep()) {
+    wal_deletion.DeleteWalsBefore(min_wal_number_to_keep);
   }
+  edit_lists.back().push_back(&wal_deletion);
+  ++num_entries;
 
   // Mark the version edits as an atomic group if the number of version edits
   // exceeds 1.
index e7cd0a6bee60df448a0b9a5334e99dda4ad936ae..619f67db03568eb2c33a4d5d13e566e11cab2987 100644 (file)
@@ -120,6 +120,9 @@ bool VersionEdit::EncodeTo(std::string* dst) const {
   if (has_max_column_family_) {
     PutVarint32Varint32(dst, kMaxColumnFamily, max_column_family_);
   }
+  if (has_min_log_number_to_keep_) {
+    PutVarint32Varint64(dst, kMinLogNumberToKeep, min_log_number_to_keep_);
+  }
   if (has_last_sequence_) {
     PutVarint32Varint64(dst, kLastSequence, last_sequence_);
   }