]> git-server-git.apps.pok.os.sepia.ceph.com Git - rocksdb.git/commitdiff
Mitigate regression bug of options.max_successive_merges hit during DB Recovery
authorsdong <siying.d@fb.com>
Thu, 25 Aug 2016 21:43:12 +0000 (14:43 -0700)
committersdong <siying.d@fb.com>
Tue, 30 Aug 2016 18:24:55 +0000 (11:24 -0700)
Summary:
After 1b8a2e8fdd1db0dac3cb50228065f8e7e43095f0, DB Pointer is passed to WriteBatchInternal::InsertInto() while DB recovery. This can cause deadlock if options.max_successive_merges hits. In that case DB::Get() will be called. Get() will try to acquire the DB mutex, which is already held by the DB::Open(), causing a deadlock condition.

This commit mitigates the problem by not passing the DB pointer unless 2PC is allowed.

Test Plan: Add a new test and run it.

Reviewers: IslamAbdelRahman, andrewkr, kradhakrishnan, horuff

Reviewed By: kradhakrishnan

Subscribers: leveldb, andrewkr, dhruba

Differential Revision: https://reviews.facebook.net/D62625

db/db_impl.cc
db/db_test2.cc

index 626b9756ea4f818310a0480629dc8c31a858e5f5..126c3e943e05613e04a472dc18c104c289e2dc5a 100644 (file)
@@ -1576,10 +1576,16 @@ Status DBImpl::RecoverLogFiles(const std::vector<uint64_t>& log_numbers,
       // we just ignore the update.
       // That's why we set ignore missing column families to true
       bool has_valid_writes = false;
+      // If we pass DB through and options.max_successive_merges is hit
+      // during recovery, Get() will be issued which will try to acquire
+      // DB mutex and cause deadlock, as DB mutex is already held.
+      // The DB pointer is not needed unless 2PC is used.
+      // TODO(sdong) fix the allow_2pc case too.
       status = WriteBatchInternal::InsertInto(
           &batch, column_family_memtables_.get(), &flush_scheduler_, true,
-          log_number, this, false /* concurrent_memtable_writes */,
-          next_sequence, &has_valid_writes);
+          log_number, db_options_.allow_2pc ? this : nullptr,
+          false /* concurrent_memtable_writes */, next_sequence,
+          &has_valid_writes);
       // If it is the first log file and there is no column family updated
       // after replaying the file, this file may be a stale file. We ignore
       // sequence IDs from the file. Otherwise, if a newer stale log file that
index 147715a7d0aa527cf566f1d1cc04cfeed663c9f5..b4e989389ff07395f18a10cf63d1043a12beeca2 100644 (file)
@@ -1816,6 +1816,22 @@ TEST_P(MergeOperatorPinningTest, EvictCacheBeforeMerge) {
 }
 #endif  // ROCKSDB_LITE
 
+TEST_F(DBTest2, MaxSuccessiveMergesInRecovery) {
+  Options options;
+  options = CurrentOptions(options);
+  options.merge_operator = MergeOperators::CreatePutOperator();
+  DestroyAndReopen(options);
+
+  db_->Put(WriteOptions(), "foo", "bar");
+  ASSERT_OK(db_->Merge(WriteOptions(), "foo", "bar"));
+  ASSERT_OK(db_->Merge(WriteOptions(), "foo", "bar"));
+  ASSERT_OK(db_->Merge(WriteOptions(), "foo", "bar"));
+  ASSERT_OK(db_->Merge(WriteOptions(), "foo", "bar"));
+  ASSERT_OK(db_->Merge(WriteOptions(), "foo", "bar"));
+
+  options.max_successive_merges = 3;
+  Reopen(options);
+}
 }  // namespace rocksdb
 
 int main(int argc, char** argv) {