]> git-server-git.apps.pok.os.sepia.ceph.com Git - rocksdb.git/commitdiff
fix hanging after CompactFiles with L0 overlap
authorAndrew Kryczka <andrewkr@fb.com>
Wed, 13 Sep 2017 22:31:34 +0000 (15:31 -0700)
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>
Wed, 13 Sep 2017 22:41:38 +0000 (15:41 -0700)
Summary:
Bug report: https://www.facebook.com/groups/rocksdb.dev/permalink/1389452781153232/

Non-empty `level0_compactions_in_progress_` was aborting `CompactFiles` after incrementing `bg_compaction_scheduled_`, and in that case we never decremented it. This blocked future compactions and prevented DB close as we wait for scheduled compactions to finish/abort during close.

I eliminated `CompactFiles`'s dependency on `level0_compactions_in_progress_`. Since it takes a contiguous span of L0 files -- through the last L0 file if any L1+ files are included -- it's fine to run in parallel with other compactions involving L0. We make the same assumption in intra-L0 compaction.
Closes https://github.com/facebook/rocksdb/pull/2849

Differential Revision: D5780440

Pulled By: ajkr

fbshipit-source-id: 15b15d3faf5a699aed4b82a58352d4a7bb23e027

db/compaction_picker.cc
db/compaction_picker.h
db/db_compaction_test.cc
db/db_impl_compaction_flush.cc

index 961231e947f5c863a8f774c00ea5e2677dd17b0a..a3fafe67feaaa214679bfe2dc292c3a46546797c 100644 (file)
@@ -292,25 +292,16 @@ Compaction* CompactionPicker::CompactFiles(
     VersionStorageInfo* vstorage, const MutableCFOptions& mutable_cf_options,
     uint32_t output_path_id) {
   assert(input_files.size());
+  // This compaction output should not overlap with a running compaction as
+  // `SanitizeCompactionInputFiles` should've checked earlier and db mutex
+  // shouldn't have been released since.
+  assert(!FilesRangeOverlapWithCompaction(input_files, output_level));
 
-  // TODO(rven ): we might be able to run concurrent level 0 compaction
-  // if the key ranges of the two compactions do not overlap, but for now
-  // we do not allow it.
-  if ((input_files[0].level == 0) && !level0_compactions_in_progress_.empty()) {
-    return nullptr;
-  }
-  // This compaction output could overlap with a running compaction
-  if (FilesRangeOverlapWithCompaction(input_files, output_level)) {
-    return nullptr;
-  }
   auto c =
       new Compaction(vstorage, ioptions_, mutable_cf_options, input_files,
                      output_level, compact_options.output_file_size_limit,
                      mutable_cf_options.max_compaction_bytes, output_path_id,
                      compact_options.compression, /* grandparents */ {}, true);
-
-  // If it's level 0 compaction, make sure we don't execute any other level 0
-  // compactions in parallel
   RegisterCompaction(c);
   return c;
 }
index f44139c2dd91938bcbae737d9586c84405002579..3172a68e85f38767f869aff6e865455c930860ef 100644 (file)
@@ -88,6 +88,10 @@ class CompactionPicker {
 
   // Takes a list of CompactionInputFiles and returns a (manual) Compaction
   // object.
+  //
+  // Caller must provide a set of input files that has been passed through
+  // `SanitizeCompactionInputFiles` earlier. The lock should not be released
+  // between that call and this one.
   Compaction* CompactFiles(const CompactionOptions& compact_options,
                            const std::vector<CompactionInputFiles>& input_files,
                            int output_level, VersionStorageInfo* vstorage,
index 64028768dedbf9e8dcf8aa70eff1f593d84064dc..594f239885376119191512899b7ac747650fec1c 100644 (file)
@@ -2724,6 +2724,55 @@ TEST_F(DBCompactionTest, OptimizedDeletionObsoleting) {
             options.statistics->getTickerCount(COMPACTION_KEY_DROP_OBSOLETE));
 }
 
+TEST_F(DBCompactionTest, CompactFilesPendingL0Bug) {
+  // https://www.facebook.com/groups/rocksdb.dev/permalink/1389452781153232/
+  // CompactFiles() had a bug where it failed to pick a compaction when an L0
+  // compaction existed, but marked it as scheduled anyways. It'd never be
+  // unmarked as scheduled, so future compactions or DB close could hang.
+  const int kNumL0Files = 5;
+  Options options = CurrentOptions();
+  options.level0_file_num_compaction_trigger = kNumL0Files - 1;
+  options.max_background_compactions = 2;
+  DestroyAndReopen(options);
+
+  rocksdb::SyncPoint::GetInstance()->LoadDependency(
+      {{"LevelCompactionPicker::PickCompaction:Return",
+        "DBCompactionTest::CompactFilesPendingL0Bug:Picked"},
+       {"DBCompactionTest::CompactFilesPendingL0Bug:ManualCompacted",
+        "DBImpl::BackgroundCompaction:NonTrivial:AfterRun"}});
+  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
+
+  auto schedule_multi_compaction_token =
+      dbfull()->TEST_write_controler().GetCompactionPressureToken();
+
+  // Files 0-3 will be included in an L0->L1 compaction.
+  //
+  // File 4 will be included in a call to CompactFiles() while the first
+  // compaction is running.
+  for (int i = 0; i < kNumL0Files - 1; ++i) {
+    ASSERT_OK(Put(Key(0), "val"));  // sentinel to prevent trivial move
+    ASSERT_OK(Put(Key(i + 1), "val"));
+    ASSERT_OK(Flush());
+  }
+  TEST_SYNC_POINT("DBCompactionTest::CompactFilesPendingL0Bug:Picked");
+  // file 4 flushed after 0-3 picked
+  ASSERT_OK(Put(Key(kNumL0Files), "val"));
+  ASSERT_OK(Flush());
+
+  // previously DB close would hang forever as this situation caused scheduled
+  // compactions count to never decrement to zero.
+  ColumnFamilyMetaData cf_meta;
+  dbfull()->GetColumnFamilyMetaData(dbfull()->DefaultColumnFamily(), &cf_meta);
+  ASSERT_EQ(kNumL0Files, cf_meta.levels[0].files.size());
+  std::vector<std::string> input_filenames;
+  input_filenames.push_back(cf_meta.levels[0].files.front().name);
+  ASSERT_OK(dbfull()
+                  ->CompactFiles(CompactionOptions(), input_filenames,
+                                 0 /* output_level */));
+  TEST_SYNC_POINT("DBCompactionTest::CompactFilesPendingL0Bug:ManualCompacted");
+  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
+}
+
 TEST_F(DBCompactionTest, CompactFilesOverlapInL0Bug) {
   // Regression test for bug of not pulling in L0 files that overlap the user-
   // specified input files in time- and key-ranges.
index 18e2e88da9fe5eb0fa0f8225e856bb8f03c0e9be..277aee6edf6e2249174f0300a43a8ea46ec9447b 100644 (file)
@@ -515,9 +515,10 @@ Status DBImpl::CompactFilesImpl(
   c.reset(cfd->compaction_picker()->CompactFiles(
       compact_options, input_files, output_level, version->storage_info(),
       *cfd->GetLatestMutableCFOptions(), output_path_id));
-  if (!c) {
-    return Status::Aborted("Another Level 0 compaction is running");
-  }
+  // we already sanitized the set of input files and checked for conflicts
+  // without releasing the lock, so we're guaranteed a compaction can be formed.
+  assert(c != nullptr);
+
   c->SetInputVersion(version);
   // deletion compaction currently not allowed in CompactFiles.
   assert(!c->deletion_compaction());