Summary:
When a CompactRange() call for a level is truncated before the end key
is reached, because it exceeds max_compaction_bytes, we need to properly
set the compaction_end parameter to indicate the stop key. The next
CompactRange will use that as the begin key. We set it to the smallest
key of the next file in the level after expanding inputs to get a clean
cut.
Previously, we were setting it before expanding inputs. So we could end
up recompacting some files. In a pathological case, where a single key
has many entries spanning all the files in the level (possibly due to
merge operands without a partial merge operator, thus resulting in
compaction output identical to the input), this would result in
an endless loop over the same set of files.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4496
Differential Revision:
D10395026
Pulled By: anand1976
fbshipit-source-id:
f0c2f89fee29b4b3be53b6467b53abba8e9146a9
## Unreleased
### Bug Fixes
* Fix slow flush/compaction when DB contains many snapshots. The problem became noticeable to us in DBs with 100,000+ snapshots, though it will affect others at different thresholds.
+* Properly set the stop key for a truncated manual CompactRange
## 5.16.4 (10/10/2018)
### Bug Fixes
bool CompactionPicker::ExpandInputsToCleanCut(const std::string& /*cf_name*/,
VersionStorageInfo* vstorage,
- CompactionInputFiles* inputs) {
+ CompactionInputFiles* inputs,
+ InternalKey** next_smallest) {
// This isn't good compaction
assert(!inputs->empty());
GetRange(*inputs, &smallest, &largest);
inputs->clear();
vstorage->GetOverlappingInputs(level, &smallest, &largest, &inputs->files,
- hint_index, &hint_index);
+ hint_index, &hint_index, true,
+ next_smallest);
} while (inputs->size() > old_size);
// we started off with inputs non-empty and the previous loop only grew
uint64_t s = inputs[i]->compensated_file_size;
total += s;
if (total >= limit) {
- **compaction_end = inputs[i + 1]->smallest;
covering_the_whole_range = false;
inputs.files.resize(i + 1);
break;
}
assert(output_path_id < static_cast<uint32_t>(ioptions_.cf_paths.size()));
- if (ExpandInputsToCleanCut(cf_name, vstorage, &inputs) == false) {
+ InternalKey key_storage;
+ InternalKey* next_smallest = &key_storage;
+ if (ExpandInputsToCleanCut(cf_name, vstorage, &inputs, &next_smallest) ==
+ false) {
// manual compaction is now multi-threaded, so it can
// happen that ExpandWhileOverlapping fails
// we handle it higher in RunManualCompaction
return nullptr;
}
- if (covering_the_whole_range) {
+ if (covering_the_whole_range || !next_smallest) {
*compaction_end = nullptr;
+ } else {
+ **compaction_end = *next_smallest;
}
CompactionInputFiles output_level_inputs;
// Will return false if it is impossible to apply this compaction.
bool ExpandInputsToCleanCut(const std::string& cf_name,
VersionStorageInfo* vstorage,
- CompactionInputFiles* inputs);
+ CompactionInputFiles* inputs,
+ InternalKey** next_smallest = nullptr);
// Returns true if any one of the parent files are being compacted
bool IsRangeInCompaction(VersionStorageInfo* vstorage,
CompactionPri::kOldestSmallestSeqFirst,
CompactionPri::kMinOverlappingRatio));
+class NoopMergeOperator : public MergeOperator {
+ public:
+ NoopMergeOperator() {}
+
+ virtual bool FullMergeV2(const MergeOperationInput& /*merge_in*/,
+ MergeOperationOutput* merge_out) const override {
+ std::string val("bar");
+ merge_out->new_value = val;
+ return true;
+ }
+
+ virtual const char* Name() const override { return "Noop"; }
+};
+
+TEST_F(DBCompactionTest, PartialManualCompaction) {
+ Options opts = CurrentOptions();
+ opts.num_levels = 3;
+ opts.level0_file_num_compaction_trigger = 10;
+ opts.compression = kNoCompression;
+ opts.merge_operator.reset(new NoopMergeOperator());
+ opts.target_file_size_base = 10240;
+ DestroyAndReopen(opts);
+
+ Random rnd(301);
+ for (auto i = 0; i < 8; ++i) {
+ for (auto j = 0; j < 10; ++j) {
+ Merge("foo", RandomString(&rnd, 1024));
+ }
+ Flush();
+ }
+
+ MoveFilesToLevel(2);
+
+ std::string prop;
+ EXPECT_TRUE(dbfull()->GetProperty(DB::Properties::kLiveSstFilesSize, &prop));
+ uint64_t max_compaction_bytes = atoi(prop.c_str()) / 2;
+ ASSERT_OK(dbfull()->SetOptions(
+ {{"max_compaction_bytes", std::to_string(max_compaction_bytes)}}));
+
+ CompactRangeOptions cro;
+ cro.bottommost_level_compaction = BottommostLevelCompaction::kForce;
+ dbfull()->CompactRange(cro, nullptr, nullptr);
+}
+
#endif // !defined(ROCKSDB_LITE)
} // namespace rocksdb
void VersionStorageInfo::GetOverlappingInputs(
int level, const InternalKey* begin, const InternalKey* end,
std::vector<FileMetaData*>* inputs, int hint_index, int* file_index,
- bool expand_range) const {
+ bool expand_range, InternalKey** next_smallest) const {
if (level >= num_non_empty_levels_) {
// this level is empty, no overlapping inputs
return;
}
const Comparator* user_cmp = user_comparator_;
if (level > 0) {
- GetOverlappingInputsRangeBinarySearch(level, begin, end, inputs,
- hint_index, file_index);
+ GetOverlappingInputsRangeBinarySearch(level, begin, end, inputs, hint_index,
+ file_index, false, next_smallest);
return;
}
+ if (next_smallest) {
+ // next_smallest key only makes sense for non-level 0, where files are
+ // non-overlapping
+ *next_smallest = nullptr;
+ }
+
for (size_t i = 0; i < level_files_brief_[level].num_files; ) {
FdWithKeyRange* f = &(level_files_brief_[level].files[i++]);
const Slice file_start = ExtractUserKey(f->smallest_key);
void VersionStorageInfo::GetOverlappingInputsRangeBinarySearch(
int level, const InternalKey* begin, const InternalKey* end,
std::vector<FileMetaData*>* inputs, int hint_index, int* file_index,
- bool within_interval) const {
+ bool within_interval, InternalKey** next_smallest) const {
assert(level > 0);
int min = 0;
int mid = 0;
// If there were no overlapping files, return immediately.
if (!foundOverlap) {
+ if (next_smallest) {
+ next_smallest = nullptr;
+ }
return;
}
// returns the index where an overlap is found
for (int i = start_index; i <= end_index; i++) {
inputs->push_back(files_[level][i]);
}
+
+ if (next_smallest != nullptr) {
+ // Provide the next key outside the range covered by inputs
+ if (++end_index < static_cast<int>(files_[level].size())) {
+ **next_smallest = files_[level][end_index]->smallest;
+ } else {
+ *next_smallest = nullptr;
+ }
+ }
}
// Store in *start_index and *end_index the range of all files in
std::vector<FileMetaData*>* inputs,
int hint_index = -1, // index of overlap file
int* file_index = nullptr, // return index of overlap file
- bool expand_range = true) // if set, returns files which overlap the
- const; // range and overlap each other. If false,
+ bool expand_range = true, // if set, returns files which overlap the
+ // range and overlap each other. If false,
// then just files intersecting the range
+ InternalKey** next_smallest = nullptr) // if non-null, returns the
+ const; // smallest key of next file not included
void GetCleanInputsWithinInterval(
int level, const InternalKey* begin, // nullptr means before all keys
const InternalKey* end, // nullptr means after all keys
const;
void GetOverlappingInputsRangeBinarySearch(
- int level, // level > 0
+ int level, // level > 0
const InternalKey* begin, // nullptr means before all keys
const InternalKey* end, // nullptr means after all keys
std::vector<FileMetaData*>* inputs,
int hint_index, // index of overlap file
int* file_index, // return index of overlap file
- bool within_interval = false) // if set, force the inputs within interval
- const;
+ bool within_interval = false, // if set, force the inputs within interval
+ InternalKey** next_smallest = nullptr) // if non-null, returns the
+ const; // smallest key of next file not included
void ExtendFileRangeOverlappingInterval(
int level,