Summary:
Compaction stats code is not so straightforward to understand. Here's a bit of context for this PR and why this change was made.
- **CompactionStats (compaction_stats_.stats):** Internal stats about the compaction used for logging and public metrics.
- **CompactionJobStats (compaction_job_stats_)**: The public stats at job level. It's part of Compaction event listener and included in the CompactionResult.
- **CompactionOutputsStats**: output stats only. resides in CompactionOutputs. It gets aggregated toward the CompactionStats (internal stats).
The internal stats, `compaction_stats_.stats`, has the output information recorded from the compaction iterator, but it does not have any input information (input records, input output files) until `UpdateCompactionStats()` gets called. We cannot simply call `UpdateCompactionStats()` to fill in the input information in the remote compaction (which is a subcompaction of the primary host's compaction) because the `compaction->inputs()` have the full list of input files and `UpdateCompactionStats()` takes the entire list of records in all files. `num_input_records` gets double-counted if multiple sub-compactions are submitted to the remote worker.
The job level stats (in the case of remote compaction, it's subcompaction level stat), `compaction_job_stats_`, has the correct input records, but has no output information. We can use `UpdateCompactionJobStats(compaction_stats_.stats)` to set the output information (num_output_records, num_output_files, etc.) from the `compaction_stats_.stats`, but it also sets all other fields including the input information which sets all back to 0.
Therefore, we are overriding `UpdateCompactionJobStats()` in remote worker only to update job level stats, `compaction_job_stats_`, with output information of the internal stats.
Baiscally, we are merging the aggregated output info from the internal stats and aggregated input info from the compaction job stats.
In this PR we are also fixing how we are setting `is_remote_compaction` in CompactionJobStats.
- OnCompactionBegin event, if options.compaction_service is set, `is_remote_compaction=true` for all compactions except for trivial moves
- OnCompactionCompleted event, if any of the sub_compactions were done remotely, compaction level stats's `is_remote_compaction` will be true
Other minor changes
- num_output_records is already available in CompactionJobStats. No need to store separately in CompactionResult.
- total_bytes is not needed.
- Renamed `SubcompactionState::AggregateCompactionStats()` to `SubcompactionState::AggregateCompactionOutputStats()` to make it clear that it's only aggregating output stats.
- Renamed `SetTotalBytes()` to `AddBytesWritten()` to make it more clear that it's adding total written bytes from the compaction output.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/13071
Test Plan:
Unit Tests added and updated
```
./compaction_service_test
```
Reviewed By: anand1976
Differential Revision:
D64479657
Pulled By: jaykorean
fbshipit-source-id:
a7a776a00dc718abae95d856b661bcbafd3b0ed5
}
}
+ // Before the compaction starts, is_remote_compaction was set to true if
+ // compaction_service is set. We now know whether each sub_compaction was
+ // done remotely or not. Reset is_remote_compaction back to false and allow
+ // AggregateCompactionStats() to set the right value.
+ compaction_job_stats_->is_remote_compaction = false;
+
// Finish up all bookkeeping to unify the subcompaction results.
compact_->AggregateCompactionStats(compaction_stats_, *compaction_job_stats_);
uint64_t num_input_range_del = 0;
}
// fallback to local compaction
assert(comp_status == CompactionServiceJobStatus::kUseLocal);
+ sub_compact->compaction_job_stats.is_remote_compaction = false;
}
uint64_t prev_cpu_micros = db_options_.clock->CPUMicros();
// Returns true iff compaction_stats_.stats.num_input_records and
// num_input_range_del are calculated successfully.
bool UpdateCompactionStats(uint64_t* num_input_range_del = nullptr);
+ virtual void UpdateCompactionJobStats(
+ const InternalStats::CompactionStats& stats) const;
void LogCompaction();
virtual void RecordCompactionIOStats();
void CleanupCompaction();
bool* compaction_released);
Status OpenCompactionOutputFile(SubcompactionState* sub_compact,
CompactionOutputs& outputs);
- void UpdateCompactionJobStats(
- const InternalStats::CompactionStats& stats) const;
+
void RecordDroppedKeys(const CompactionIterationStats& c_iter_stats,
CompactionJobStats* compaction_job_stats = nullptr);
// location of the output files
std::string output_path;
- // some statistics about the compaction
- uint64_t num_output_records = 0;
- uint64_t total_bytes = 0;
uint64_t bytes_read = 0;
uint64_t bytes_written = 0;
CompactionJobStats stats;
protected:
void RecordCompactionIOStats() override;
+ void UpdateCompactionJobStats(
+ const InternalStats::CompactionStats& stats) const override;
+
private:
// Get table file name in output_path
std::string GetTableFileName(uint64_t file_number) override;
}
result.output_level = rnd.Uniform(10);
result.output_path = rnd.RandomString(rnd.Uniform(kStrMaxLen));
- result.num_output_records = rnd64.Uniform(UINT64_MAX);
- result.total_bytes = rnd64.Uniform(UINT64_MAX);
+ result.stats.num_output_records = rnd64.Uniform(UINT64_MAX);
result.bytes_read = 123;
result.bytes_written = rnd64.Uniform(UINT64_MAX);
result.stats.elapsed_micros = rnd64.Uniform(UINT64_MAX);
}
// TODO: Remove it when remote compaction support tiered compaction
- void SetTotalBytes(uint64_t bytes) { stats_.bytes_written += bytes; }
+ void AddBytesWritten(uint64_t bytes) { stats_.bytes_written += bytes; }
void SetNumOutputRecords(uint64_t num) { stats_.num_output_records = num; }
+ void SetNumOutputFiles(uint64_t num) { stats_.num_output_files = num; }
// TODO: Move the BlobDB builder into CompactionOutputs
const std::vector<BlobFileAddition>& GetBlobFileAdditions() const {
}
sub_compact->compaction_job_stats = compaction_result.stats;
sub_compact->Current().SetNumOutputRecords(
- compaction_result.num_output_records);
- sub_compact->Current().SetTotalBytes(compaction_result.total_bytes);
+ compaction_result.stats.num_output_records);
+ sub_compact->Current().SetNumOutputFiles(
+ compaction_result.stats.num_output_files);
+ sub_compact->Current().AddBytesWritten(compaction_result.bytes_written);
RecordTick(stats_, REMOTE_COMPACT_READ_BYTES, compaction_result.bytes_read);
RecordTick(stats_, REMOTE_COMPACT_WRITE_BYTES,
compaction_result.bytes_written);
CompactionJob::RecordCompactionIOStats();
}
+void CompactionServiceCompactionJob::UpdateCompactionJobStats(
+ const InternalStats::CompactionStats& stats) const {
+ compaction_job_stats_->elapsed_micros = stats.micros;
+
+ // output information only in remote compaction
+ compaction_job_stats_->total_output_bytes = stats.bytes_written;
+ compaction_job_stats_->total_output_bytes_blob = stats.bytes_written_blob;
+ compaction_job_stats_->num_output_records = stats.num_output_records;
+ compaction_job_stats_->num_output_files = stats.num_output_files;
+ compaction_job_stats_->num_output_files_blob = stats.num_output_files_blob;
+}
+
CompactionServiceCompactionJob::CompactionServiceCompactionJob(
int job_id, Compaction* compaction, const ImmutableDBOptions& db_options,
const MutableDBOptions& mutable_db_options, const FileOptions& file_options,
log_buffer_->FlushBufferToLog();
LogCompaction();
+
+ compaction_result_->stats.Reset();
+
const uint64_t start_micros = db_options_.clock->NowMicros();
c->GetOrInitInputTableProperties();
status = io_s;
}
- // Finish up all book-keeping to unify the subcompaction results
- compact_->AggregateCompactionStats(compaction_stats_, *compaction_job_stats_);
- UpdateCompactionStats();
- RecordCompactionIOStats();
-
LogFlush(db_options_.info_log);
compact_->status = status;
compact_->status.PermitUncheckedError();
- // Build compaction result
+ // Build Compaction Job Stats
+
+ // 1. Aggregate CompactionOutputStats into Internal Compaction Stats
+ // (compaction_stats_) and aggregate Compaction Job Stats
+ // (compaction_job_stats_) from the sub compactions
+ compact_->AggregateCompactionStats(compaction_stats_, *compaction_job_stats_);
+
+ // 2. Update the Output information in the Compaction Job Stats with
+ // aggregated Internal Compaction Stats.
+ UpdateCompactionJobStats(compaction_stats_.stats);
+
+ // 3. Set fields that are not propagated as part of aggregations above
+ compaction_result_->stats.is_manual_compaction = c->is_manual_compaction();
+ compaction_result_->stats.is_full_compaction = c->is_full_compaction();
+ compaction_result_->stats.is_remote_compaction = true;
+
+ // 4. Update IO Stats that are not part of the aggregations above (bytes_read,
+ // bytes_written)
+ RecordCompactionIOStats();
+
+ // Build Output
compaction_result_->output_level = compact_->compaction->output_level();
compaction_result_->output_path = output_path_;
- compaction_result_->stats.is_remote_compaction = true;
for (const auto& output_file : sub_compact->GetOutputs()) {
auto& meta = output_file.meta;
compaction_result_->output_files.emplace_back(
TEST_SYNC_POINT_CALLBACK("CompactionServiceCompactionJob::Run:0",
&compaction_result_);
-
- InternalStats::CompactionStatsFull compaction_stats;
- sub_compact->AggregateCompactionStats(compaction_stats);
- compaction_result_->num_output_records =
- compaction_stats.stats.num_output_records;
- compaction_result_->total_bytes = compaction_stats.TotalBytesWritten();
-
return status;
}
{offsetof(struct CompactionServiceResult, output_path),
OptionType::kEncodedString, OptionVerificationType::kNormal,
OptionTypeFlags::kNone}},
- {"num_output_records",
- {offsetof(struct CompactionServiceResult, num_output_records),
- OptionType::kUInt64T, OptionVerificationType::kNormal,
- OptionTypeFlags::kNone}},
- {"total_bytes",
- {offsetof(struct CompactionServiceResult, total_bytes),
- OptionType::kUInt64T, OptionVerificationType::kNormal,
- OptionTypeFlags::kNone}},
{"bytes_read",
{offsetof(struct CompactionServiceResult, bytes_read),
OptionType::kUInt64T, OptionVerificationType::kNormal,
} else {
ASSERT_OK(result.status);
}
+ ASSERT_GE(result.stats.elapsed_micros, 1);
+ ASSERT_GE(result.stats.cpu_micros, 1);
+
+ ASSERT_EQ(20, result.stats.num_output_records);
+ ASSERT_EQ(result.output_files.size(), result.stats.num_output_files);
+
+ uint64_t total_size = 0;
+ for (auto output_file : result.output_files) {
+ std::string file_name = result.output_path + "/" + output_file.file_name;
+
+ uint64_t file_size = 0;
+ ASSERT_OK(options.env->GetFileSize(file_name, &file_size));
+ ASSERT_GT(file_size, 0);
+ total_size += file_size;
+ }
+ ASSERT_EQ(total_size, result.stats.total_output_bytes);
+
ASSERT_TRUE(result.stats.is_remote_compaction);
+ ASSERT_TRUE(result.stats.is_manual_compaction);
+ ASSERT_FALSE(result.stats.is_full_compaction);
+
Close();
}
ASSERT_TRUE(result.stats.is_remote_compaction);
}
+class EventVerifier : public EventListener {
+ public:
+ explicit EventVerifier(uint64_t expected_num_input_records,
+ size_t expected_num_input_files,
+ uint64_t expected_num_output_records,
+ size_t expected_num_output_files,
+ const std::string& expected_smallest_output_key_prefix,
+ const std::string& expected_largest_output_key_prefix,
+ bool expected_is_remote_compaction_on_begin,
+ bool expected_is_remote_compaction_on_complete)
+ : expected_num_input_records_(expected_num_input_records),
+ expected_num_input_files_(expected_num_input_files),
+ expected_num_output_records_(expected_num_output_records),
+ expected_num_output_files_(expected_num_output_files),
+ expected_smallest_output_key_prefix_(
+ expected_smallest_output_key_prefix),
+ expected_largest_output_key_prefix_(expected_largest_output_key_prefix),
+ expected_is_remote_compaction_on_begin_(
+ expected_is_remote_compaction_on_begin),
+ expected_is_remote_compaction_on_complete_(
+ expected_is_remote_compaction_on_complete) {}
+ void OnCompactionBegin(DB* /*db*/, const CompactionJobInfo& ci) override {
+ ASSERT_EQ(expected_num_input_files_, ci.input_files.size());
+ ASSERT_EQ(expected_num_input_files_, ci.input_file_infos.size());
+ ASSERT_EQ(expected_is_remote_compaction_on_begin_,
+ ci.stats.is_remote_compaction);
+ ASSERT_TRUE(ci.stats.is_manual_compaction);
+ ASSERT_FALSE(ci.stats.is_full_compaction);
+ }
+ void OnCompactionCompleted(DB* /*db*/, const CompactionJobInfo& ci) override {
+ ASSERT_GT(ci.stats.elapsed_micros, 0);
+ ASSERT_GT(ci.stats.cpu_micros, 0);
+ ASSERT_EQ(expected_num_input_records_, ci.stats.num_input_records);
+ ASSERT_EQ(expected_num_input_files_, ci.stats.num_input_files);
+ ASSERT_EQ(expected_num_output_records_, ci.stats.num_output_records);
+ ASSERT_EQ(expected_num_output_files_, ci.stats.num_output_files);
+ ASSERT_EQ(expected_smallest_output_key_prefix_,
+ ci.stats.smallest_output_key_prefix);
+ ASSERT_EQ(expected_largest_output_key_prefix_,
+ ci.stats.largest_output_key_prefix);
+ ASSERT_GT(ci.stats.total_input_bytes, 0);
+ ASSERT_GT(ci.stats.total_output_bytes, 0);
+ ASSERT_EQ(ci.stats.num_input_records,
+ ci.stats.num_output_records + ci.stats.num_records_replaced);
+ ASSERT_EQ(expected_is_remote_compaction_on_complete_,
+ ci.stats.is_remote_compaction);
+ ASSERT_TRUE(ci.stats.is_manual_compaction);
+ ASSERT_FALSE(ci.stats.is_full_compaction);
+ }
+
+ private:
+ uint64_t expected_num_input_records_;
+ size_t expected_num_input_files_;
+ uint64_t expected_num_output_records_;
+ size_t expected_num_output_files_;
+ std::string expected_smallest_output_key_prefix_;
+ std::string expected_largest_output_key_prefix_;
+ bool expected_is_remote_compaction_on_begin_;
+ bool expected_is_remote_compaction_on_complete_;
+};
+
+TEST_F(CompactionServiceTest, VerifyStats) {
+ Options options = CurrentOptions();
+ options.disable_auto_compactions = true;
+ auto event_verifier = std::make_shared<EventVerifier>(
+ 30 /* expected_num_input_records */, 3 /* expected_num_input_files */,
+ 20 /* expected_num_output_records */, 1 /* expected_num_output_files */,
+ "key00000" /* expected_smallest_output_key_prefix */,
+ "key00001" /* expected_largest_output_key_prefix */,
+ true /* expected_is_remote_compaction_on_begin */,
+ true /* expected_is_remote_compaction_on_complete */);
+ options.listeners.push_back(event_verifier);
+ ReopenWithCompactionService(&options);
+ GenerateTestData();
+
+ auto my_cs = GetCompactionService();
+
+ std::string start_str = Key(0);
+ std::string end_str = Key(1);
+ Slice start(start_str);
+ Slice end(end_str);
+ uint64_t comp_num = my_cs->GetCompactionNum();
+ ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &start, &end));
+ ASSERT_GE(my_cs->GetCompactionNum(), comp_num + 1);
+ VerifyTestData();
+
+ CompactionServiceResult result;
+ my_cs->GetResult(&result);
+ ASSERT_OK(result.status);
+ ASSERT_TRUE(result.stats.is_manual_compaction);
+ ASSERT_TRUE(result.stats.is_remote_compaction);
+}
+
+TEST_F(CompactionServiceTest, VerifyStatsLocalFallback) {
+ Options options = CurrentOptions();
+ options.disable_auto_compactions = true;
+ auto event_verifier = std::make_shared<EventVerifier>(
+ 30 /* expected_num_input_records */, 3 /* expected_num_input_files */,
+ 20 /* expected_num_output_records */, 1 /* expected_num_output_files */,
+ "key00000" /* expected_smallest_output_key_prefix */,
+ "key00001" /* expected_largest_output_key_prefix */,
+ true /* expected_is_remote_compaction_on_begin */,
+ false /* expected_is_remote_compaction_on_complete */);
+ options.listeners.push_back(event_verifier);
+ ReopenWithCompactionService(&options);
+ GenerateTestData();
+
+ auto my_cs = GetCompactionService();
+ my_cs->OverrideStartStatus(CompactionServiceJobStatus::kUseLocal);
+
+ std::string start_str = Key(0);
+ std::string end_str = Key(1);
+ Slice start(start_str);
+ Slice end(end_str);
+ uint64_t comp_num = my_cs->GetCompactionNum();
+ ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &start, &end));
+ // Remote Compaction did not happen
+ ASSERT_EQ(my_cs->GetCompactionNum(), comp_num);
+ VerifyTestData();
+}
+
TEST_F(CompactionServiceTest, CorruptedOutput) {
Options options = CurrentOptions();
options.disable_auto_compactions = true;
InternalStats::CompactionStatsFull& compaction_stats,
CompactionJobStats& compaction_job_stats) {
for (const auto& sc : sub_compact_states) {
- sc.AggregateCompactionStats(compaction_stats);
+ sc.AggregateCompactionOutputStats(compaction_stats);
compaction_job_stats.Add(sc.compaction_job_stats);
}
}
#include "rocksdb/sst_partitioner.h"
namespace ROCKSDB_NAMESPACE {
-void SubcompactionState::AggregateCompactionStats(
+void SubcompactionState::AggregateCompactionOutputStats(
InternalStats::CompactionStatsFull& compaction_stats) const {
compaction_stats.stats.Add(compaction_outputs_.stats_);
if (HasPenultimateLevelOutputs()) {
void Cleanup(Cache* cache);
- void AggregateCompactionStats(
+ void AggregateCompactionOutputStats(
InternalStats::CompactionStatsFull& compaction_stats) const;
CompactionOutputs& Current() const {
is_manual && manual_compaction->disallow_trivial_move;
CompactionJobStats compaction_job_stats;
+ // Set is_remote_compaction to true on CompactionBegin Event if
+ // compaction_service is set except for trivial moves. We do not know whether
+ // remote compaction will actually be successfully scheduled, or fall back to
+ // local at this time. CompactionCompleted event will tell the truth where
+ // the compaction actually happened.
+ compaction_job_stats.is_remote_compaction =
+ immutable_db_options().compaction_service != nullptr;
+
Status status;
if (!error_handler_.IsBGWorkStopped()) {
if (shutting_down_.load(std::memory_order_acquire)) {
ThreadStatusUtil::SetThreadOperation(ThreadStatus::OP_COMPACTION);
compaction_job_stats.num_input_files = c->num_input_files(0);
+ // Trivial moves do not get compacted remotely
+ compaction_job_stats.is_remote_compaction = false;
NotifyOnCompactionBegin(c->column_family_data(), c.get(), status,
compaction_job_stats, job_context->job_id);
ASSERT_EQ(largest.user_key().ToString(), "foo");
ASSERT_EQ(result.output_level, 1);
ASSERT_EQ(result.output_path, this->secondary_path_);
- ASSERT_EQ(result.num_output_records, 2);
+ ASSERT_EQ(result.stats.num_output_records, 2);
ASSERT_GT(result.bytes_written, 0);
ASSERT_OK(result.status);
}
num_single_del_fallthru += stats.num_single_del_fallthru;
num_single_del_mismatch += stats.num_single_del_mismatch;
+
+ is_remote_compaction |= stats.is_remote_compaction;
}