]> git-server-git.apps.pok.os.sepia.ceph.com Git - rocksdb.git/commitdiff
Fix missing WAL in new manifest by rolling over the WAL deletion record from prev...
authorHui Xiao <huixiao@fb.com>
Tue, 29 Nov 2022 22:14:43 +0000 (14:14 -0800)
committerFacebook GitHub Bot <facebook-github-bot@users.noreply.github.com>
Tue, 29 Nov 2022 22:14:43 +0000 (14:14 -0800)
Summary:
**Context**
`Options::track_and_verify_wals_in_manifest = true` verifies each of the WALs tracked in manifest indeed presents in the WAL folder. If not, a corruption "Missing WAL with log number" will be thrown.

`DB::SyncWAL()` called at a specific timing (i.e, at the `TEST_SYNC_POINT("FindObsoleteFiles::PostMutexUnlock")`) can record in a new manifest the WAL addition of a WAL file that already had a WAL deletion recorded in the previous manifest.
And the WAL deletion record is not rollover-ed to the new manifest. So the new manifest creates the illusion of such WAL never gets deleted and should presents at db re/open.
- Such WAL deletion record can be caused by flushing the memtable associated with that WAL and such WAL deletion can actually happen in` PurgeObsoleteFiles()`.

As a consequence, upon `DB::Reopen()`, this WAL file can be deleted while manifest still has its WAL addition record , which causes a false alarm of corruption "Missing WAL with log number" to be thrown.

**Summary**
This PR fixes this false alarm by rolling over the WAL deletion record from prev manifest to the new manifest by adding the WAL deletion record to the new manifest.

**Test**
- Make check
- Added new unit test `TEST_F(DBWALTest, FixSyncWalOnObseletedWalWithNewManifestCausingMissingWAL)` that failed before the fix and passed after
- [Ongoing]CI stress test + aggressive value as in https://github.com/facebook/rocksdb/pull/10761 , which is how this false alarm was first surfaced, to confirm such false alarm disappears
- [Ongoing]Regular CI stress test to confirm such fix didn't harm anything

Pull Request resolved: https://github.com/facebook/rocksdb/pull/10892

Reviewed By: ajkr

Differential Revision: D40778965

Pulled By: hx235

fbshipit-source-id: a512364bfdeb0b1a55c171890e60d856c528f37f

HISTORY.md
db/db_impl/db_impl_files.cc
db/db_wal_test.cc
db/version_set.cc
java/src/test/java/org/rocksdb/RocksDBTest.java

index a78861fc2e4d66e0a077255788b797d50764fd4a..4d8a724846e33e290538caf351d6605a99a8515b 100644 (file)
@@ -18,6 +18,7 @@
 * Fixed a regression in scan for async_io. During seek, valid buffers were getting cleared causing a regression.
 * Tiered Storage: fixed excessive keys written to penultimate level in non-debug builds.
 * Fixed a bug that multi-level FIFO compaction deletes one file in non-L0 even when `CompactionOptionsFIFO::max_table_files_size` is no exceeded since #10348 or 7.8.0.
+* Fixed a bug caused by `DB::SyncWAL()` affecting `track_and_verify_wals_in_manifest`. Without the fix, application may see "open error: Corruption: Missing WAL with log number" while trying to open the db. The corruption is a false alarm but prevents DB open (#10892).
 
 ### New Features
 * Add basic support for user-defined timestamp to Merge (#10819).
index 058df4da7e78636d2357124b782a0dc392735dda..9834ff4b27f51d8bac35dc28f9597a330e2dba56 100644 (file)
@@ -315,6 +315,7 @@ void DBImpl::FindObsoleteFiles(JobContext* job_context, bool force,
     }
     log_write_mutex_.Unlock();
     mutex_.Unlock();
+    TEST_SYNC_POINT_CALLBACK("FindObsoleteFiles::PostMutexUnlock", nullptr);
     log_write_mutex_.Lock();
     while (!logs_.empty() && logs_.front().number < min_log_number) {
       auto& log = logs_.front();
@@ -360,6 +361,8 @@ void DBImpl::DeleteObsoleteFileImpl(int job_id, const std::string& fname,
   }
   TEST_SYNC_POINT_CALLBACK("DBImpl::DeleteObsoleteFileImpl:AfterDeletion",
                            &file_deletion_status);
+  TEST_SYNC_POINT_CALLBACK("DBImpl::DeleteObsoleteFileImpl:AfterDeletion2",
+                           const_cast<std::string*>(&fname));
   if (file_deletion_status.ok()) {
     ROCKS_LOG_DEBUG(immutable_db_options_.info_log,
                     "[JOB %d] Delete %s type=%d #%" PRIu64 " -- %s\n", job_id,
index 5b5ec76afa5cfe317acd0f9db5854283d715f8a0..6246cc70f37521097656d75f60308530dbdfb1e0 100644 (file)
@@ -1599,6 +1599,90 @@ TEST_F(DBWALTest, RaceInstallFlushResultsWithWalObsoletion) {
   delete db1;
 }
 
+TEST_F(DBWALTest, FixSyncWalOnObseletedWalWithNewManifestCausingMissingWAL) {
+  Options options = CurrentOptions();
+  options.track_and_verify_wals_in_manifest = true;
+  // Set a small max_manifest_file_size to force manifest creation
+  // in SyncWAL() for tet purpose
+  options.max_manifest_file_size = 170;
+  DestroyAndReopen(options);
+
+  // Accumulate memtable m1 and create the 1st wal (i.e, 4.log)
+  ASSERT_OK(Put(Key(1), ""));
+  ASSERT_OK(Put(Key(2), ""));
+  ASSERT_OK(Put(Key(3), ""));
+
+  const std::string wal_file_path = db_->GetName() + "/000004.log";
+
+  // Coerce the following sequence of events:
+  // (1) Flush() marks 4.log to be obsoleted, 8.log to be the latest (i.e,
+  // active) log and release the lock
+  // (2) SyncWAL() proceeds with the lock. It
+  // creates a new manifest and syncs all the inactive wals before the latest
+  // (i.e, active log), which is 4.log. SyncWAL() is not aware of the fact
+  // that 4.log has marked as to be obseleted.
+  // Prior to the fix, such wal sync will then add a WAL addition record of
+  // 4.log to the new manifest without any special treatment.
+  // (3) BackgroundFlush() will eventually purge 4.log.
+  bool sync_point_called = false;
+  bool new_manifest_created = false;
+  SyncPoint::GetInstance()->SetCallBack(
+      "FindObsoleteFiles::PostMutexUnlock", [&](void*) {
+        ASSERT_OK(env_->FileExists(wal_file_path));
+
+        SyncPoint::GetInstance()->SetCallBack(
+            "VersionSet::ProcessManifestWrites:BeforeNewManifest",
+            [&](void*) { new_manifest_created = true; });
+
+        ASSERT_OK(db_->SyncWAL());
+        ASSERT_TRUE(new_manifest_created);
+
+        sync_point_called = true;
+      });
+
+  SyncPoint::GetInstance()->SetCallBack(
+      "DBImpl::DeleteObsoleteFileImpl:AfterDeletion2", [&](void* arg) {
+        std::string* file_name = (std::string*)arg;
+        if (*file_name == wal_file_path) {
+          TEST_SYNC_POINT(
+              "DBWALTest::"
+              "FixSyncWalOnObseletedWalWithNewManifestCausingMissingWAL::"
+              "PostDeleteWAL");
+        }
+      });
+  SyncPoint::GetInstance()->LoadDependency(
+      {{"DBWALTest::FixSyncWalOnObseletedWalWithNewManifestCausingMissingWAL::"
+        "PostDeleteWAL",
+        "DBWALTest::FixSyncWalOnObseletedWalWithNewManifestCausingMissingWAL::"
+        "PreConfrimWALDeleted"}});
+
+  SyncPoint::GetInstance()->EnableProcessing();
+
+  ASSERT_OK(Flush());
+  ASSERT_TRUE(sync_point_called);
+
+  TEST_SYNC_POINT(
+      "DBWALTest::FixSyncWalOnObseletedWalWithNewManifestCausingMissingWAL::"
+      "PreConfrimWALDeleted");
+
+  // BackgroundFlush() purged 4.log
+  // because the memtable associated with the WAL was flushed and new WAL was
+  // created (i.e, 8.log)
+  ASSERT_TRUE(env_->FileExists(wal_file_path).IsNotFound());
+
+  SyncPoint::GetInstance()->ClearAllCallBacks();
+  SyncPoint::GetInstance()->DisableProcessing();
+
+  // To verify the corruption of "Missing WAL with log number: 4" under
+  // `options.track_and_verify_wals_in_manifest = true` is fixed.
+  //
+  // Before the fix, `db_->SyncWAL()` will sync and record WAL addtion of the
+  // obseleted WAL 4.log in a new manifest without any special treament.
+  // This will result in missing-wal corruption in DB::Reopen().
+  Status s = TryReopen(options);
+  EXPECT_OK(s);
+}
+
 // Test scope:
 // - We expect to open data store under all circumstances
 // - We expect only data upto the point where the first error was encountered
index 082e55217cfebd5610f0175c7ff99ccf67eb6a6e..3b9df2aec82536828bd81fee8b89f81e704ed60a 100644 (file)
@@ -4967,6 +4967,8 @@ Status VersionSet::ProcessManifestWrites(
   if (!descriptor_log_ ||
       manifest_file_size_ > db_options_->max_manifest_file_size) {
     TEST_SYNC_POINT("VersionSet::ProcessManifestWrites:BeforeNewManifest");
+    TEST_SYNC_POINT_CALLBACK(
+        "VersionSet::ProcessManifestWrites:BeforeNewManifest", nullptr);
     new_descriptor_log = true;
   } else {
     pending_manifest_file_number_ = manifest_file_number_;
@@ -5099,6 +5101,7 @@ Status VersionSet::ProcessManifestWrites(
           break;
         }
       }
+
       if (s.ok()) {
         io_s = SyncManifest(db_options_, descriptor_log_->file());
         manifest_io_status = io_s;
@@ -5506,7 +5509,8 @@ Status VersionSet::GetCurrentManifestPath(const std::string& dbname,
 Status VersionSet::Recover(
     const std::vector<ColumnFamilyDescriptor>& column_families, bool read_only,
     std::string* db_id, bool no_error_if_files_missing) {
-  // Read "CURRENT" file, which contains a pointer to the current manifest file
+  // Read "CURRENT" file, which contains a pointer to the current manifest
+  // file
   std::string manifest_path;
   Status s = GetCurrentManifestPath(dbname_, fs_.get(), &manifest_path,
                                     &manifest_file_number_);
@@ -6037,6 +6041,22 @@ Status VersionSet::WriteCurrentStateToManifest(
     }
   }
 
+  // New manifest should rollover the WAL deletion record from previous
+  // manifest. Otherwise, when an addition record of a deleted WAL gets added to
+  // this new manifest later (which can happens in e.g, SyncWAL()), this new
+  // manifest creates an illusion that such WAL hasn't been deleted.
+  VersionEdit wal_deletions;
+  wal_deletions.DeleteWalsBefore(min_log_number_to_keep());
+  std::string wal_deletions_record;
+  if (!wal_deletions.EncodeTo(&wal_deletions_record)) {
+    return Status::Corruption("Unable to Encode VersionEdit: " +
+                              wal_deletions.DebugString(true));
+  }
+  io_s = log->AddRecord(wal_deletions_record);
+  if (!io_s.ok()) {
+    return io_s;
+  }
+
   for (auto cfd : *column_family_set_) {
     assert(cfd);
 
index 422bed40c6d08ea77c39ba0a07e8c977874efd1b..488dbafe802c4c589ba63bff11befb1210454107 100644 (file)
@@ -1425,7 +1425,7 @@ public class RocksDBTest {
       try (final RocksDB db = RocksDB.open(options, dbPath)) {
         final RocksDB.LiveFiles livefiles = db.getLiveFiles(true);
         assertThat(livefiles).isNotNull();
-        assertThat(livefiles.manifestFileSize).isEqualTo(59);
+        assertThat(livefiles.manifestFileSize).isEqualTo(66);
         assertThat(livefiles.files.size()).isEqualTo(3);
         assertThat(livefiles.files.get(0)).isEqualTo("/CURRENT");
         assertThat(livefiles.files.get(1)).isEqualTo("/MANIFEST-000005");