delete db1;
}
+TEST_F(DBWALTest, FixSyncWalOnObseletedWalWithNewManifestCausingMissingWAL) {
+ Options options = CurrentOptions();
+ options.track_and_verify_wals_in_manifest = true;
+ // Set a small max_manifest_file_size to force manifest creation
+ // in SyncWAL() for tet purpose
+ options.max_manifest_file_size = 170;
+ DestroyAndReopen(options);
+
+ // Accumulate memtable m1 and create the 1st wal (i.e, 4.log)
+ ASSERT_OK(Put(Key(1), ""));
+ ASSERT_OK(Put(Key(2), ""));
+ ASSERT_OK(Put(Key(3), ""));
+
+ const std::string wal_file_path = db_->GetName() + "/000004.log";
+
+ // Coerce the following sequence of events:
+ // (1) Flush() marks 4.log to be obsoleted, 8.log to be the latest (i.e,
+ // active) log and release the lock
+ // (2) SyncWAL() proceeds with the lock. It
+ // creates a new manifest and syncs all the inactive wals before the latest
+ // (i.e, active log), which is 4.log. SyncWAL() is not aware of the fact
+ // that 4.log has marked as to be obseleted.
+ // Prior to the fix, such wal sync will then add a WAL addition record of
+ // 4.log to the new manifest without any special treatment.
+ // (3) BackgroundFlush() will eventually purge 4.log.
+ bool sync_point_called = false;
+ bool new_manifest_created = false;
+ SyncPoint::GetInstance()->SetCallBack(
+ "FindObsoleteFiles::PostMutexUnlock", [&](void*) {
+ ASSERT_OK(env_->FileExists(wal_file_path));
+
+ SyncPoint::GetInstance()->SetCallBack(
+ "VersionSet::ProcessManifestWrites:BeforeNewManifest",
+ [&](void*) { new_manifest_created = true; });
+
+ ASSERT_OK(db_->SyncWAL());
+ ASSERT_TRUE(new_manifest_created);
+
+ sync_point_called = true;
+ });
+
+ SyncPoint::GetInstance()->SetCallBack(
+ "DBImpl::DeleteObsoleteFileImpl:AfterDeletion2", [&](void* arg) {
+ std::string* file_name = (std::string*)arg;
+ if (*file_name == wal_file_path) {
+ TEST_SYNC_POINT(
+ "DBWALTest::"
+ "FixSyncWalOnObseletedWalWithNewManifestCausingMissingWAL::"
+ "PostDeleteWAL");
+ }
+ });
+ SyncPoint::GetInstance()->LoadDependency(
+ {{"DBWALTest::FixSyncWalOnObseletedWalWithNewManifestCausingMissingWAL::"
+ "PostDeleteWAL",
+ "DBWALTest::FixSyncWalOnObseletedWalWithNewManifestCausingMissingWAL::"
+ "PreConfrimWALDeleted"}});
+
+ SyncPoint::GetInstance()->EnableProcessing();
+
+ ASSERT_OK(Flush());
+ ASSERT_TRUE(sync_point_called);
+
+ TEST_SYNC_POINT(
+ "DBWALTest::FixSyncWalOnObseletedWalWithNewManifestCausingMissingWAL::"
+ "PreConfrimWALDeleted");
+
+ // BackgroundFlush() purged 4.log
+ // because the memtable associated with the WAL was flushed and new WAL was
+ // created (i.e, 8.log)
+ ASSERT_TRUE(env_->FileExists(wal_file_path).IsNotFound());
+
+ SyncPoint::GetInstance()->ClearAllCallBacks();
+ SyncPoint::GetInstance()->DisableProcessing();
+
+ // To verify the corruption of "Missing WAL with log number: 4" under
+ // `options.track_and_verify_wals_in_manifest = true` is fixed.
+ //
+ // Before the fix, `db_->SyncWAL()` will sync and record WAL addtion of the
+ // obseleted WAL 4.log in a new manifest without any special treament.
+ // This will result in missing-wal corruption in DB::Reopen().
+ Status s = TryReopen(options);
+ EXPECT_OK(s);
+}
+
// Test scope:
// - We expect to open data store under all circumstances
// - We expect only data upto the point where the first error was encountered
if (!descriptor_log_ ||
manifest_file_size_ > db_options_->max_manifest_file_size) {
TEST_SYNC_POINT("VersionSet::ProcessManifestWrites:BeforeNewManifest");
+ TEST_SYNC_POINT_CALLBACK(
+ "VersionSet::ProcessManifestWrites:BeforeNewManifest", nullptr);
new_descriptor_log = true;
} else {
pending_manifest_file_number_ = manifest_file_number_;
break;
}
}
+
if (s.ok()) {
io_s = SyncManifest(db_options_, descriptor_log_->file());
manifest_io_status = io_s;
Status VersionSet::Recover(
const std::vector<ColumnFamilyDescriptor>& column_families, bool read_only,
std::string* db_id, bool no_error_if_files_missing) {
- // Read "CURRENT" file, which contains a pointer to the current manifest file
+ // Read "CURRENT" file, which contains a pointer to the current manifest
+ // file
std::string manifest_path;
Status s = GetCurrentManifestPath(dbname_, fs_.get(), &manifest_path,
&manifest_file_number_);
}
}
+ // New manifest should rollover the WAL deletion record from previous
+ // manifest. Otherwise, when an addition record of a deleted WAL gets added to
+ // this new manifest later (which can happens in e.g, SyncWAL()), this new
+ // manifest creates an illusion that such WAL hasn't been deleted.
+ VersionEdit wal_deletions;
+ wal_deletions.DeleteWalsBefore(min_log_number_to_keep());
+ std::string wal_deletions_record;
+ if (!wal_deletions.EncodeTo(&wal_deletions_record)) {
+ return Status::Corruption("Unable to Encode VersionEdit: " +
+ wal_deletions.DebugString(true));
+ }
+ io_s = log->AddRecord(wal_deletions_record);
+ if (!io_s.ok()) {
+ return io_s;
+ }
+
for (auto cfd : *column_family_set_) {
assert(cfd);