From 39c6c5fc1bb2db63c4f9e5d095ea1209009dc3d8 Mon Sep 17 00:00:00 2001 From: Sergei Glushchenko Date: Sat, 6 Apr 2019 06:36:42 -0700 Subject: [PATCH] Expose DB methods to lock and unlock the WAL (#5146) Summary: Expose DB methods to lock and unlock the WAL. These methods are intended to use by MyRocks in order to obtain WAL coordinates in consistent way. Usage scenario is following: MySQL has performance_schema.log_status which provides information that enables a backup tool to copy the required log files without locking for the duration of copy. To populate this table MySQL does following: 1. Lock the binary log. Transactions are not allowed to commit now 2. Save the binary log coordinates 3. Walk through the storage engines and lock writes on each engine. For InnoDB, redo log is locked. For MyRocks, WAL should be locked. 4. Ask storage engines for their coordinates. InnoDB reports its current LSN and checkpoint LSN. MyRocks should report active WAL files names and sizes. 5. Release storage engine's locks 6. Unlock binary log Backup tool will then use this information to copy InnoDB, RocksDB and MySQL binary logs up to specified positions to end up with consistent DB state after restore. Currently, RocksDB allows to obtain the list of WAL files. Only missing bit is the method to lock the writes to WAL files. LockWAL method must flush the WAL in order for the reported size to be accurate (GetSortedWALFiles is using file system stat call to return the file size), also, since backup tool is going to copy the WAL, it is better to be flushed. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5146 Differential Revision: D14815447 Pulled By: maysamyabandeh fbshipit-source-id: eec9535a6025229ed471119f19fe7b3d8ae888a3 --- db/db_impl.cc | 19 +++++++++++++++++++ db/db_impl.h | 4 +++- db/db_impl_debug.cc | 12 +++++++++--- db/db_write_test.cc | 19 +++++++++++++++++++ include/rocksdb/db.h | 10 ++++++++++ include/rocksdb/utilities/stackable_db.h | 4 ++++ 6 files changed, 64 insertions(+), 4 deletions(-) diff --git a/db/db_impl.cc b/db/db_impl.cc index 4eb7091c..8180564c 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -1112,6 +1112,25 @@ Status DBImpl::SyncWAL() { return status; } +Status DBImpl::LockWAL() { + log_write_mutex_.Lock(); + auto cur_log_writer = logs_.back().writer; + auto status = cur_log_writer->WriteBuffer(); + if (!status.ok()) { + ROCKS_LOG_ERROR(immutable_db_options_.info_log, "WAL flush error %s", + status.ToString().c_str()); + // In case there is a fs error we should set it globally to prevent the + // future writes + WriteStatusCheck(status); + } + return status; +} + +Status DBImpl::UnlockWAL() { + log_write_mutex_.Unlock(); + return Status::OK(); +} + void DBImpl::MarkLogsSynced(uint64_t up_to, bool synced_dir, const Status& status) { mutex_.AssertHeld(); diff --git a/db/db_impl.h b/db/db_impl.h index 5af6e2bf..e834e0fb 100644 --- a/db/db_impl.h +++ b/db/db_impl.h @@ -234,8 +234,10 @@ class DBImpl : public DB { const FlushOptions& options, const std::vector& column_families) override; virtual Status FlushWAL(bool sync) override; - bool TEST_WALBufferIsEmpty(); + bool TEST_WALBufferIsEmpty(bool lock = true); virtual Status SyncWAL() override; + virtual Status LockWAL() override; + virtual Status UnlockWAL() override; virtual SequenceNumber GetLatestSequenceNumber() const override; virtual SequenceNumber GetLastPublishedSequence() const { diff --git a/db/db_impl_debug.cc b/db/db_impl_debug.cc index 2f99e7d0..98222714 100644 --- a/db/db_impl_debug.cc +++ b/db/db_impl_debug.cc @@ -26,10 +26,16 @@ void DBImpl::TEST_SwitchWAL() { SwitchWAL(&write_context); } -bool DBImpl::TEST_WALBufferIsEmpty() { - InstrumentedMutexLock wl(&log_write_mutex_); +bool DBImpl::TEST_WALBufferIsEmpty(bool lock) { + if (lock) { + log_write_mutex_.Lock(); + } log::Writer* cur_log_writer = logs_.back().writer; - return cur_log_writer->TEST_BufferIsEmpty(); + auto res = cur_log_writer->TEST_BufferIsEmpty(); + if (lock) { + log_write_mutex_.Unlock(); + } + return res; } int64_t DBImpl::TEST_MaxNextLevelOverlappingBytes( diff --git a/db/db_write_test.cc b/db/db_write_test.cc index 3208f34b..e6bab875 100644 --- a/db/db_write_test.cc +++ b/db/db_write_test.cc @@ -166,6 +166,25 @@ TEST_P(DBWriteTest, IOErrorOnSwitchMemtable) { Close(); } +// Test that db->LockWAL() flushes the WAL after locking. +TEST_P(DBWriteTest, LockWalInEffect) { + Options options = GetOptions(); + Reopen(options); + // try the 1st WAL created during open + ASSERT_OK(Put("key" + ToString(0), "value")); + ASSERT_TRUE(options.manual_wal_flush != dbfull()->TEST_WALBufferIsEmpty()); + ASSERT_OK(dbfull()->LockWAL()); + ASSERT_TRUE(dbfull()->TEST_WALBufferIsEmpty(false)); + ASSERT_OK(dbfull()->UnlockWAL()); + // try the 2nd wal created during SwitchWAL + dbfull()->TEST_SwitchWAL(); + ASSERT_OK(Put("key" + ToString(0), "value")); + ASSERT_TRUE(options.manual_wal_flush != dbfull()->TEST_WALBufferIsEmpty()); + ASSERT_OK(dbfull()->LockWAL()); + ASSERT_TRUE(dbfull()->TEST_WALBufferIsEmpty(false)); + ASSERT_OK(dbfull()->UnlockWAL()); +} + INSTANTIATE_TEST_CASE_P(DBWriteTestInstance, DBWriteTest, testing::Values(DBTestBase::kDefault, DBTestBase::kConcurrentWALWrites, diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index 7e2556f7..b40af20e 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -986,6 +986,16 @@ class DB { // Currently only works if allow_mmap_writes = false in Options. virtual Status SyncWAL() = 0; + // Lock the WAL. Also flushes the WAL after locking. + virtual Status LockWAL() { + return Status::NotSupported("LockWAL not implemented"); + } + + // Unlock the WAL. + virtual Status UnlockWAL() { + return Status::NotSupported("UnlockWAL not implemented"); + } + // The sequence number of the most recent transaction. virtual SequenceNumber GetLatestSequenceNumber() const = 0; diff --git a/include/rocksdb/utilities/stackable_db.h b/include/rocksdb/utilities/stackable_db.h index aac0745f..8fef9b3e 100644 --- a/include/rocksdb/utilities/stackable_db.h +++ b/include/rocksdb/utilities/stackable_db.h @@ -281,6 +281,10 @@ class StackableDB : public DB { virtual Status FlushWAL(bool sync) override { return db_->FlushWAL(sync); } + virtual Status LockWAL() override { return db_->LockWAL(); } + + virtual Status UnlockWAL() override { return db_->UnlockWAL(); } + #ifndef ROCKSDB_LITE virtual Status DisableFileDeletions() override { -- 2.47.3