]> git-server-git.apps.pok.os.sepia.ceph.com Git - rocksdb.git/commitdiff
Fix DeleteRange file boundary correctness issue with max_compaction_bytes
authorAndrew Kryczka <andrewkr@fb.com>
Wed, 18 Jan 2017 19:47:07 +0000 (11:47 -0800)
committerAndrew Kryczka <andrewkr@fb.com>
Wed, 18 Jan 2017 21:14:19 +0000 (13:14 -0800)
Summary:
Cockroachdb exposed this bug in #1778. The bug happens when a compaction's output files are ended due to exceeding max_compaction_bytes. In that case we weren't taking into account the next file's start key when deciding how far to extend the current file's max_key. This caused the non-overlapping key-range invariant to be violated.

Note this was correctly handled for the usual case of cutting compaction output, which is file size exceeding max_output_file_size. I am not sure why these are two separate code paths, but we can consider refactoring it to prevent such errors in the future.
Closes https://github.com/facebook/rocksdb/pull/1784

Differential Revision: D4430235

Pulled By: ajkr

fbshipit-source-id: 80af748

db/compaction_job.cc
db/db_range_del_test.cc

index 4e3a5ecbb225a9adf6db98d18ca849c1e2c07957..70ff863d5c89d3a8208ff578b94db11f2739e278 100644 (file)
@@ -772,9 +772,9 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
                    key, sub_compact->current_output_file_size) &&
                sub_compact->builder != nullptr) {
       CompactionIterationStats range_del_out_stats;
-      status =
-          FinishCompactionOutputFile(input->status(), sub_compact,
-                                     range_del_agg.get(), &range_del_out_stats);
+      status = FinishCompactionOutputFile(input->status(), sub_compact,
+                                          range_del_agg.get(),
+                                          &range_del_out_stats, &key);
       RecordDroppedKeys(range_del_out_stats,
                         &sub_compact->compaction_job_stats);
       if (!status.ok()) {
index 8e3498e125b67780c28fc02a08ec78586f76b110..34fb1586cad3e30de4b993985654d6347e061a87 100644 (file)
@@ -112,6 +112,59 @@ TEST_F(DBRangeDelTest, CompactionOutputFilesExactlyFilled) {
   db_->ReleaseSnapshot(snapshot);
 }
 
+TEST_F(DBRangeDelTest, MaxCompactionBytesCutsOutputFiles) {
+  // Ensures range deletion spanning multiple compaction output files that are
+  // cut by max_compaction_bytes will have non-overlapping key-ranges.
+  // https://github.com/facebook/rocksdb/issues/1778
+  const int kNumFiles = 2, kNumPerFile = 1 << 8, kBytesPerVal = 1 << 12;
+  Options opts = CurrentOptions();
+  opts.comparator = test::Uint64Comparator();
+  opts.disable_auto_compactions = true;
+  opts.level0_file_num_compaction_trigger = kNumFiles;
+  opts.max_compaction_bytes = kNumPerFile * kBytesPerVal;
+  opts.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile));
+  // Want max_compaction_bytes to trigger the end of compaction output file, not
+  // target_file_size_base, so make the latter much bigger
+  opts.target_file_size_base = 100 * opts.max_compaction_bytes;
+  Reopen(opts);
+
+  // snapshot protects range tombstone from dropping due to becoming obsolete.
+  const Snapshot* snapshot = db_->GetSnapshot();
+
+  // It spans the whole key-range, thus will be included in all output files
+  ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
+                             GetNumericStr(0),
+                             GetNumericStr(kNumFiles * kNumPerFile - 1)));
+  Random rnd(301);
+  for (int i = 0; i < kNumFiles; ++i) {
+    std::vector<std::string> values;
+    // Write 1MB (256 values, each 4K)
+    for (int j = 0; j < kNumPerFile; j++) {
+      values.push_back(RandomString(&rnd, kBytesPerVal));
+      ASSERT_OK(Put(GetNumericStr(kNumPerFile * i + j), values[j]));
+    }
+    // extra entry to trigger SpecialSkipListFactory's flush
+    ASSERT_OK(Put(GetNumericStr(kNumPerFile), ""));
+    dbfull()->TEST_WaitForFlushMemTable();
+    ASSERT_EQ(i + 1, NumTableFilesAtLevel(0));
+  }
+
+  dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
+                              true /* disallow_trivial_move */);
+  ASSERT_EQ(0, NumTableFilesAtLevel(0));
+  ASSERT_GE(NumTableFilesAtLevel(1), 2);
+
+  std::vector<std::vector<FileMetaData>> files;
+  dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &files);
+
+  for (size_t i = 0; i < files[1].size() - 1; ++i) {
+    ASSERT_TRUE(InternalKeyComparator(opts.comparator)
+                    .Compare(files[1][i].largest, files[1][i + 1].smallest) <
+                0);
+  }
+  db_->ReleaseSnapshot(snapshot);
+}
+
 TEST_F(DBRangeDelTest, FlushRangeDelsSameStartKey) {
   db_->Put(WriteOptions(), "b1", "val");
   ASSERT_OK(