]> git-server-git.apps.pok.os.sepia.ceph.com Git - rocksdb.git/commitdiff
Reduce risk of backup or checkpoint missing a WAL file (#10083)
authorPeter Dillinger <peterd@fb.com>
Wed, 1 Jun 2022 18:02:27 +0000 (11:02 -0700)
committerFacebook GitHub Bot <facebook-github-bot@users.noreply.github.com>
Wed, 1 Jun 2022 18:02:27 +0000 (11:02 -0700)
Summary:
We recently saw a case in crash test in which a WAL file in the
middle of the list of live WALs was not included in the backup, so the
DB was not openable due to missing WAL. We are not sure why, but this
change should at least turn that into a backup-time failure by ensuring
all the WAL files expected by the manifest (according to VersionSet) are
included in `GetSortedWalFiles()` (used by `GetLiveFilesStorageInfo()`,
`BackupEngine`, and `Checkpoint`)

Related: to maximize the effectiveness of
track_and_verify_wals_in_manifest with GetSortedWalFiles() during
checkpoint/backup, we will now sync WAL in GetLiveFilesStorageInfo()
when track_and_verify_wals_in_manifest=true.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/10083

Test Plan: added new unit test for the check in GetSortedWalFiles()

Reviewed By: ajkr

Differential Revision: D36791608

Pulled By: pdillinger

fbshipit-source-id: a27bcf0213fc7ab177760fede50d4375d579afa6

HISTORY.md
db/db_filesnapshot.cc
db/db_wal_test.cc

index 762e57f05f7bc481a86c7a47bcb1ee54e764fd7c..51af876ea13c92aae46369c51b98b03771423066 100644 (file)
@@ -6,13 +6,14 @@
 * Add two-phase commit support to C API.
 * Add `rocksdb_transaction_get_writebatch_wi` and `rocksdb_transaction_rebuild_from_writebatch` to C API.
 * Add SingleDelete for DB in C API
-* Add User Defined Timestamp in C API. 
+* Add User Defined Timestamp in C API.
   * `rocksdb_comparator_with_ts_create` to create timestamp aware comparator
   * Put, Get, Delete, SingleDelete, MultiGet APIs has corresponding timestamp aware APIs with suffix `with_ts`
   * And Add C API's for Transaction, SstFileWriter, Compaction as mentioned [here](https://github.com/facebook/rocksdb/wiki/User-defined-Timestamp-(Experimental))
 
 ### New Features
 * Add FileSystem::ReadAsync API in io_tracing
+* Add an extra sanity check in `GetSortedWalFiles()` (also used by `GetLiveFilesStorageInfo()`, `BackupEngine`, and `Checkpoint`) to reduce risk of successfully created backup or checkpoint failing to open because of missing WAL file.
 
 ### Behavior changes
 * DB::Open(), DB::OpenAsSecondary() will fail if a Logger cannot be created (#9984)
index e30071341ad450723f1482ae96218457bdc06f27..515abb72890c7d31701b31a73f0d079b1cd93876 100644 (file)
@@ -124,6 +124,9 @@ Status DBImpl::GetLiveFiles(std::vector<std::string>& ret,
 }
 
 Status DBImpl::GetSortedWalFiles(VectorLogPtr& files) {
+  // Record tracked WALs as a (minimum) cross-check for directory scan
+  std::vector<uint64_t> required_by_manifest;
+
   // If caller disabled deletions, this function should return files that are
   // guaranteed not to be deleted until deletions are re-enabled. We need to
   // wait for pending purges to finish since WalManager doesn't know which
@@ -137,6 +140,13 @@ Status DBImpl::GetSortedWalFiles(VectorLogPtr& files) {
     while (pending_purge_obsolete_files_ > 0 || bg_purge_scheduled_ > 0) {
       bg_cv_.Wait();
     }
+
+    // Record tracked WALs as a (minimum) cross-check for directory scan
+    const auto& manifest_wals = versions_->GetWalSet().GetWals();
+    required_by_manifest.reserve(manifest_wals.size());
+    for (const auto& wal : manifest_wals) {
+      required_by_manifest.push_back(wal.first);
+    }
   }
 
   Status s = wal_manager_.GetSortedWalFiles(files);
@@ -150,6 +160,29 @@ Status DBImpl::GetSortedWalFiles(VectorLogPtr& files) {
     assert(deletions_disabled.IsNotSupported());
   }
 
+  if (s.ok()) {
+    // Verify includes those required by manifest (one sorted list is superset
+    // of the other)
+    auto required = required_by_manifest.begin();
+    auto included = files.begin();
+
+    while (required != required_by_manifest.end()) {
+      if (included == files.end() || *required < (*included)->LogNumber()) {
+        // FAIL - did not find
+        return Status::Corruption(
+            "WAL file " + std::to_string(*required) +
+            " required by manifest but not in directory list");
+      }
+      if (*required == (*included)->LogNumber()) {
+        ++required;
+        ++included;
+      } else {
+        assert(*required > (*included)->LogNumber());
+        ++included;
+      }
+    }
+  }
+
   return s;
 }
 
@@ -349,7 +382,13 @@ Status DBImpl::GetLiveFilesStorageInfo(
   TEST_SYNC_POINT("CheckpointImpl::CreateCheckpoint:SavedLiveFiles2");
 
   if (s.ok()) {
-    s = FlushWAL(false /* sync */);
+    // To maximize the effectiveness of track_and_verify_wals_in_manifest,
+    // sync WAL when it is enabled.
+    s = FlushWAL(
+        immutable_db_options_.track_and_verify_wals_in_manifest /* sync */);
+    if (s.IsNotSupported()) {  // read-only DB or similar
+      s = Status::OK();
+    }
   }
 
   TEST_SYNC_POINT("CheckpointImpl::CreateCustomCheckpoint:AfterGetLive1");
index 10668b9f975d5dc55505068aa8eb4379344d0537..19c1b334f7854bde05c787cfef3e98ff73975caa 100644 (file)
@@ -2165,6 +2165,59 @@ TEST_F(DBWALTest, ReadOnlyRecoveryNoTruncate) {
 #endif  // ROCKSDB_FALLOCATE_PRESENT
 #endif  // ROCKSDB_PLATFORM_POSIX
 
+TEST_F(DBWALTest, WalInManifestButNotInSortedWals) {
+  Options options = CurrentOptions();
+  options.track_and_verify_wals_in_manifest = true;
+  options.wal_recovery_mode = WALRecoveryMode::kAbsoluteConsistency;
+
+  // Build a way to make wal files selectively go missing
+  bool wals_go_missing = false;
+  struct MissingWalFs : public FileSystemWrapper {
+    MissingWalFs(const std::shared_ptr<FileSystem>& t,
+                 bool* _wals_go_missing_flag)
+        : FileSystemWrapper(t), wals_go_missing_flag(_wals_go_missing_flag) {}
+    bool* wals_go_missing_flag;
+    IOStatus GetChildren(const std::string& dir, const IOOptions& io_opts,
+                         std::vector<std::string>* r,
+                         IODebugContext* dbg) override {
+      IOStatus s = target_->GetChildren(dir, io_opts, r, dbg);
+      if (s.ok() && *wals_go_missing_flag) {
+        for (size_t i = 0; i < r->size();) {
+          if (EndsWith(r->at(i), ".log")) {
+            r->erase(r->begin() + i);
+          } else {
+            ++i;
+          }
+        }
+      }
+      return s;
+    }
+    const char* Name() const override { return "MissingWalFs"; }
+  };
+  auto my_fs =
+      std::make_shared<MissingWalFs>(env_->GetFileSystem(), &wals_go_missing);
+  std::unique_ptr<Env> my_env(NewCompositeEnv(my_fs));
+  options.env = my_env.get();
+
+  CreateAndReopenWithCF({"blah"}, options);
+
+  // Currently necessary to get a WAL tracked in manifest; see
+  // https://github.com/facebook/rocksdb/issues/10080
+  ASSERT_OK(Put(0, "x", "y"));
+  ASSERT_OK(db_->SyncWAL());
+  ASSERT_OK(Put(1, "x", "y"));
+  ASSERT_OK(db_->SyncWAL());
+  ASSERT_OK(Flush(1));
+
+  ASSERT_FALSE(dbfull()->GetVersionSet()->GetWalSet().GetWals().empty());
+  std::vector<std::unique_ptr<LogFile>> wals;
+  ASSERT_OK(db_->GetSortedWalFiles(wals));
+  wals_go_missing = true;
+  ASSERT_NOK(db_->GetSortedWalFiles(wals));
+  wals_go_missing = false;
+  Close();
+}
+
 #endif  // ROCKSDB_LITE
 
 TEST_F(DBWALTest, WalTermTest) {