]> git-server-git.apps.pok.os.sepia.ceph.com Git - rocksdb.git/commitdiff
Add statistics support to integrated BlobDB (#8667)
authorLevi Tamasi <ltamasi@fb.com>
Wed, 18 Aug 2021 00:21:16 +0000 (17:21 -0700)
committerFacebook GitHub Bot <facebook-github-bot@users.noreply.github.com>
Wed, 18 Aug 2021 00:22:31 +0000 (17:22 -0700)
Summary:
The patch adds statistics support to the integrated BlobDB implementation,
namely the tickers `BLOB_DB_BLOB_FILE_BYTES_READ` and
`BLOB_DB_GC_{NUM_KEYS,BYTES}_RELOCATED`, and the histograms
`BLOB_DB_(DE)COMPRESSION_MICROS`. (Some other statistics, like
`BLOB_DB_BLOB_FILE_BYTES_WRITTEN`, `BLOB_DB_BLOB_FILE_SYNCED`,
`BLOB_DB_BLOB_FILE_{READ,WRITE,SYNC}_MICROS` were already supported.)
Note that the vast majority of the old BlobDB's tickers/histograms are not
really applicable to the new implementation, since they e.g. pertain to calling
dedicated BlobDB APIs (which the integrated BlobDB does not have) or are
tied to the legacy BlobDB's design of writing blob files synchronously when
a write API is called. Such statistics are marked "legacy BlobDB only" in
`statistics.h`.

Fixes https://github.com/facebook/rocksdb/issues/8645 .

Pull Request resolved: https://github.com/facebook/rocksdb/pull/8667

Test Plan: Ran `make check` and tested the new statistics using `db_bench`.

Reviewed By: riversand963

Differential Revision: D30356884

Pulled By: ltamasi

fbshipit-source-id: 5f8a833faee60401c5643c2f0a6c0415488190a4

HISTORY.md
db/blob/blob_file_builder.cc
db/blob/blob_file_reader.cc
db/blob/blob_file_reader.h
db/compaction/compaction_iteration_stats.h
db/compaction/compaction_iterator.cc
db/compaction/compaction_job.cc
include/rocksdb/statistics.h

index a85ad6335052b02927f3356a51e3a333b7f5de7c..03a36ba7d1a5bf54aa24f13e4540aaa99c6e5ce6 100644 (file)
@@ -18,6 +18,7 @@
 * Fast forward option in Trace replay changed to double type to allow replaying at a lower speed, by settings the value between 0 and 1. This option can be set via `ReplayOptions` in `Replayer::Replay()`, or via `--trace_replay_fast_forward` in db_bench.
 * Add property `LiveSstFilesSizeAtTemperature` to retrieve sst file size at different temperature.
 * Added a stat rocksdb.secondary.cache.hits
+* The integrated BlobDB implementation now supports the tickers `BLOB_DB_BLOB_FILE_BYTES_READ`, `BLOB_DB_GC_NUM_KEYS_RELOCATED`, and `BLOB_DB_GC_BYTES_RELOCATED`, as well as the histograms `BLOB_DB_COMPRESSION_MICROS` and `BLOB_DB_DECOMPRESSION_MICROS`.
 
 ## Public API change
 * Added APIs to decode and replay trace file via Replayer class. Added `DB::NewDefaultReplayer()` to create a default Replayer instance. Added `TraceReader::Reset()` to restart reading a trace file. Created trace_record.h and utilities/replayer.h files to access decoded Trace records and replay them.
index d9809ef7d54c82ee4023449e0f44e42059a6020d..4a3f3d4b02afa42c19f4a36111bdb9350cbf7a3e 100644 (file)
@@ -228,6 +228,7 @@ Status BlobFileBuilder::CompressBlobIfNeeded(
   assert(blob);
   assert(compressed_blob);
   assert(compressed_blob->empty());
+  assert(immutable_options_);
 
   if (blob_compression_type_ == kNoCompression) {
     return Status::OK();
@@ -242,7 +243,16 @@ Status BlobFileBuilder::CompressBlobIfNeeded(
 
   constexpr uint32_t compression_format_version = 2;
 
-  if (!CompressData(*blob, info, compression_format_version, compressed_blob)) {
+  bool success = false;
+
+  {
+    StopWatch stop_watch(immutable_options_->clock, immutable_options_->stats,
+                         BLOB_DB_COMPRESSION_MICROS);
+    success =
+        CompressData(*blob, info, compression_format_version, compressed_blob);
+  }
+
+  if (!success) {
     return Status::Corruption("Error compressing blob");
   }
 
index 1b4b82150da5777a6215434c63e69321c2c07985..6f64da48bacb0acfd82da10a3a8b466f9e8f6de8 100644 (file)
@@ -10,6 +10,7 @@
 
 #include "db/blob/blob_log_format.h"
 #include "file/filename.h"
+#include "monitoring/statistics.h"
 #include "options/cf_options.h"
 #include "rocksdb/file_system.h"
 #include "rocksdb/slice.h"
@@ -17,6 +18,7 @@
 #include "test_util/sync_point.h"
 #include "util/compression.h"
 #include "util/crc32c.h"
+#include "util/stop_watch.h"
 
 namespace ROCKSDB_NAMESPACE {
 
@@ -42,25 +44,28 @@ Status BlobFileReader::Create(
 
   assert(file_reader);
 
+  Statistics* const statistics = immutable_options.stats;
+
   CompressionType compression_type = kNoCompression;
 
   {
-    const Status s =
-        ReadHeader(file_reader.get(), column_family_id, &compression_type);
+    const Status s = ReadHeader(file_reader.get(), column_family_id, statistics,
+                                &compression_type);
     if (!s.ok()) {
       return s;
     }
   }
 
   {
-    const Status s = ReadFooter(file_size, file_reader.get());
+    const Status s = ReadFooter(file_reader.get(), file_size, statistics);
     if (!s.ok()) {
       return s;
     }
   }
 
   blob_file_reader->reset(
-      new BlobFileReader(std::move(file_reader), file_size, compression_type));
+      new BlobFileReader(std::move(file_reader), file_size, compression_type,
+                         immutable_options.clock, statistics));
 
   return Status::OK();
 }
@@ -127,6 +132,7 @@ Status BlobFileReader::OpenFile(
 
 Status BlobFileReader::ReadHeader(const RandomAccessFileReader* file_reader,
                                   uint32_t column_family_id,
+                                  Statistics* statistics,
                                   CompressionType* compression_type) {
   assert(file_reader);
   assert(compression_type);
@@ -141,8 +147,9 @@ Status BlobFileReader::ReadHeader(const RandomAccessFileReader* file_reader,
     constexpr uint64_t read_offset = 0;
     constexpr size_t read_size = BlobLogHeader::kSize;
 
-    const Status s = ReadFromFile(file_reader, read_offset, read_size,
-                                  &header_slice, &buf, &aligned_buf);
+    const Status s =
+        ReadFromFile(file_reader, read_offset, read_size, statistics,
+                     &header_slice, &buf, &aligned_buf);
     if (!s.ok()) {
       return s;
     }
@@ -175,8 +182,8 @@ Status BlobFileReader::ReadHeader(const RandomAccessFileReader* file_reader,
   return Status::OK();
 }
 
-Status BlobFileReader::ReadFooter(uint64_t file_size,
-                                  const RandomAccessFileReader* file_reader) {
+Status BlobFileReader::ReadFooter(const RandomAccessFileReader* file_reader,
+                                  uint64_t file_size, Statistics* statistics) {
   assert(file_size >= BlobLogHeader::kSize + BlobLogFooter::kSize);
   assert(file_reader);
 
@@ -190,8 +197,9 @@ Status BlobFileReader::ReadFooter(uint64_t file_size,
     const uint64_t read_offset = file_size - BlobLogFooter::kSize;
     constexpr size_t read_size = BlobLogFooter::kSize;
 
-    const Status s = ReadFromFile(file_reader, read_offset, read_size,
-                                  &footer_slice, &buf, &aligned_buf);
+    const Status s =
+        ReadFromFile(file_reader, read_offset, read_size, statistics,
+                     &footer_slice, &buf, &aligned_buf);
     if (!s.ok()) {
       return s;
     }
@@ -220,14 +228,16 @@ Status BlobFileReader::ReadFooter(uint64_t file_size,
 
 Status BlobFileReader::ReadFromFile(const RandomAccessFileReader* file_reader,
                                     uint64_t read_offset, size_t read_size,
-                                    Slice* slice, Buffer* buf,
-                                    AlignedBuf* aligned_buf) {
+                                    Statistics* statistics, Slice* slice,
+                                    Buffer* buf, AlignedBuf* aligned_buf) {
   assert(slice);
   assert(buf);
   assert(aligned_buf);
 
   assert(file_reader);
 
+  RecordTick(statistics, BLOB_DB_BLOB_FILE_BYTES_READ, read_size);
+
   Status s;
 
   if (file_reader->use_direct_io()) {
@@ -256,10 +266,13 @@ Status BlobFileReader::ReadFromFile(const RandomAccessFileReader* file_reader,
 
 BlobFileReader::BlobFileReader(
     std::unique_ptr<RandomAccessFileReader>&& file_reader, uint64_t file_size,
-    CompressionType compression_type)
+    CompressionType compression_type, SystemClock* clock,
+    Statistics* statistics)
     : file_reader_(std::move(file_reader)),
       file_size_(file_size),
-      compression_type_(compression_type) {
+      compression_type_(compression_type),
+      clock_(clock),
+      statistics_(statistics) {
   assert(file_reader_);
 }
 
@@ -304,7 +317,7 @@ Status BlobFileReader::GetBlob(const ReadOptions& read_options,
     TEST_SYNC_POINT("BlobFileReader::GetBlob:ReadFromFile");
 
     const Status s = ReadFromFile(file_reader_.get(), record_offset,
-                                  static_cast<size_t>(record_size),
+                                  static_cast<size_t>(record_size), statistics_,
                                   &record_slice, &buf, &aligned_buf);
     if (!s.ok()) {
       return s;
@@ -324,8 +337,8 @@ Status BlobFileReader::GetBlob(const ReadOptions& read_options,
   const Slice value_slice(record_slice.data() + adjustment, value_size);
 
   {
-    const Status s =
-        UncompressBlobIfNeeded(value_slice, compression_type, value);
+    const Status s = UncompressBlobIfNeeded(value_slice, compression_type,
+                                            clock_, statistics_, value);
     if (!s.ok()) {
       return s;
     }
@@ -382,6 +395,8 @@ Status BlobFileReader::VerifyBlob(const Slice& record_slice,
 
 Status BlobFileReader::UncompressBlobIfNeeded(const Slice& value_slice,
                                               CompressionType compression_type,
+                                              SystemClock* clock,
+                                              Statistics* statistics,
                                               PinnableSlice* value) {
   assert(value);
 
@@ -399,9 +414,14 @@ Status BlobFileReader::UncompressBlobIfNeeded(const Slice& value_slice,
   constexpr uint32_t compression_format_version = 2;
   constexpr MemoryAllocator* allocator = nullptr;
 
-  CacheAllocationPtr output =
-      UncompressData(info, value_slice.data(), value_slice.size(),
-                     &uncompressed_size, compression_format_version, allocator);
+  CacheAllocationPtr output;
+
+  {
+    StopWatch stop_watch(clock, statistics, BLOB_DB_DECOMPRESSION_MICROS);
+    output = UncompressData(info, value_slice.data(), value_slice.size(),
+                            &uncompressed_size, compression_format_version,
+                            allocator);
+  }
 
   TEST_SYNC_POINT_CALLBACK(
       "BlobFileReader::UncompressBlobIfNeeded:TamperWithResult", &output);
index 9b3f5ebd6248dc6ccca841da19003cab0a368361..3ab0d52c2f256003199a4d3e3622507f2170497a 100644 (file)
@@ -21,6 +21,7 @@ class HistogramImpl;
 struct ReadOptions;
 class Slice;
 class PinnableSlice;
+class Statistics;
 
 class BlobFileReader {
  public:
@@ -44,7 +45,8 @@ class BlobFileReader {
 
  private:
   BlobFileReader(std::unique_ptr<RandomAccessFileReader>&& file_reader,
-                 uint64_t file_size, CompressionType compression_type);
+                 uint64_t file_size, CompressionType compression_type,
+                 SystemClock* clock, Statistics* statistics);
 
   static Status OpenFile(const ImmutableOptions& immutable_options,
                          const FileOptions& file_opts,
@@ -55,17 +57,17 @@ class BlobFileReader {
                          std::unique_ptr<RandomAccessFileReader>* file_reader);
 
   static Status ReadHeader(const RandomAccessFileReader* file_reader,
-                           uint32_t column_family_id,
+                           uint32_t column_family_id, Statistics* statistics,
                            CompressionType* compression_type);
 
-  static Status ReadFooter(uint64_t file_size,
-                           const RandomAccessFileReader* file_reader);
+  static Status ReadFooter(const RandomAccessFileReader* file_reader,
+                           uint64_t file_size, Statistics* statistics);
 
   using Buffer = std::unique_ptr<char[]>;
 
   static Status ReadFromFile(const RandomAccessFileReader* file_reader,
                              uint64_t read_offset, size_t read_size,
-                             Slice* slice, Buffer* buf,
+                             Statistics* statistics, Slice* slice, Buffer* buf,
                              AlignedBuf* aligned_buf);
 
   static Status VerifyBlob(const Slice& record_slice, const Slice& user_key,
@@ -73,6 +75,8 @@ class BlobFileReader {
 
   static Status UncompressBlobIfNeeded(const Slice& value_slice,
                                        CompressionType compression_type,
+                                       SystemClock* clock,
+                                       Statistics* statistics,
                                        PinnableSlice* value);
 
   static void SaveValue(const Slice& src, PinnableSlice* dst);
@@ -80,6 +84,8 @@ class BlobFileReader {
   std::unique_ptr<RandomAccessFileReader> file_reader_;
   uint64_t file_size_;
   CompressionType compression_type_;
+  SystemClock* clock_;
+  Statistics* statistics_;
 };
 
 }  // namespace ROCKSDB_NAMESPACE
index cb7b82c65ab4db7d9be2c049ba57a3e9dc4be175..894aea1d96f9a6fe65d5b349a2f6f3b1ad2f850a 100644 (file)
@@ -38,4 +38,6 @@ struct CompactionIterationStats {
   // Blob related statistics
   uint64_t num_blobs_read = 0;
   uint64_t total_blob_bytes_read = 0;
+  uint64_t num_blobs_relocated = 0;
+  uint64_t total_blob_bytes_relocated = 0;
 };
index 617856e2809bd3193650ef286a3d5a746093c925..c84f03d8aeb43a8f09051f6ee6b6c9dda0af28aa 100644 (file)
@@ -898,6 +898,9 @@ void CompactionIterator::GarbageCollectBlobIfNeeded() {
     ++iter_stats_.num_blobs_read;
     iter_stats_.total_blob_bytes_read += bytes_read;
 
+    ++iter_stats_.num_blobs_relocated;
+    iter_stats_.total_blob_bytes_relocated += blob_index.size();
+
     value_ = blob_value_;
 
     if (ExtractLargeValueIfNeededImpl()) {
index e6ff030fb7de6c7a8c97a970c39cadb60a827eb3..fbbb3f8420bfa3cc5b45bb66b54f4a25ed034805 100644 (file)
@@ -1369,6 +1369,16 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
 
   RecordTick(stats_, FILTER_OPERATION_TOTAL_TIME,
              c_iter_stats.total_filter_time);
+
+  if (c_iter_stats.num_blobs_relocated > 0) {
+    RecordTick(stats_, BLOB_DB_GC_NUM_KEYS_RELOCATED,
+               c_iter_stats.num_blobs_relocated);
+  }
+  if (c_iter_stats.total_blob_bytes_relocated > 0) {
+    RecordTick(stats_, BLOB_DB_GC_BYTES_RELOCATED,
+               c_iter_stats.total_blob_bytes_relocated);
+  }
+
   RecordDroppedKeys(c_iter_stats, &sub_compact->compaction_job_stats);
   RecordCompactionIOStats();
 
index fa59397b130ba0006fe8d60f91a3beba36866dea..45da88121de8e38890b0d265fcf2954fdb36dccf 100644 (file)
@@ -247,35 +247,42 @@ enum Tickers : uint32_t {
   NUMBER_ITER_SKIP,
 
   // BlobDB specific stats
-  // # of Put/PutTTL/PutUntil to BlobDB.
+  // # of Put/PutTTL/PutUntil to BlobDB. Only applicable to legacy BlobDB.
   BLOB_DB_NUM_PUT,
-  // # of Write to BlobDB.
+  // # of Write to BlobDB. Only applicable to legacy BlobDB.
   BLOB_DB_NUM_WRITE,
-  // # of Get to BlobDB.
+  // # of Get to BlobDB. Only applicable to legacy BlobDB.
   BLOB_DB_NUM_GET,
-  // # of MultiGet to BlobDB.
+  // # of MultiGet to BlobDB. Only applicable to legacy BlobDB.
   BLOB_DB_NUM_MULTIGET,
-  // # of Seek/SeekToFirst/SeekToLast/SeekForPrev to BlobDB iterator.
+  // # of Seek/SeekToFirst/SeekToLast/SeekForPrev to BlobDB iterator. Only
+  // applicable to legacy BlobDB.
   BLOB_DB_NUM_SEEK,
-  // # of Next to BlobDB iterator.
+  // # of Next to BlobDB iterator. Only applicable to legacy BlobDB.
   BLOB_DB_NUM_NEXT,
-  // # of Prev to BlobDB iterator.
+  // # of Prev to BlobDB iterator. Only applicable to legacy BlobDB.
   BLOB_DB_NUM_PREV,
-  // # of keys written to BlobDB.
+  // # of keys written to BlobDB. Only applicable to legacy BlobDB.
   BLOB_DB_NUM_KEYS_WRITTEN,
-  // # of keys read from BlobDB.
+  // # of keys read from BlobDB. Only applicable to legacy BlobDB.
   BLOB_DB_NUM_KEYS_READ,
-  // # of bytes (key + value) written to BlobDB.
+  // # of bytes (key + value) written to BlobDB. Only applicable to legacy
+  // BlobDB.
   BLOB_DB_BYTES_WRITTEN,
-  // # of bytes (keys + value) read from BlobDB.
+  // # of bytes (keys + value) read from BlobDB. Only applicable to legacy
+  // BlobDB.
   BLOB_DB_BYTES_READ,
-  // # of keys written by BlobDB as non-TTL inlined value.
+  // # of keys written by BlobDB as non-TTL inlined value. Only applicable to
+  // legacy BlobDB.
   BLOB_DB_WRITE_INLINED,
-  // # of keys written by BlobDB as TTL inlined value.
+  // # of keys written by BlobDB as TTL inlined value. Only applicable to legacy
+  // BlobDB.
   BLOB_DB_WRITE_INLINED_TTL,
-  // # of keys written by BlobDB as non-TTL blob value.
+  // # of keys written by BlobDB as non-TTL blob value. Only applicable to
+  // legacy BlobDB.
   BLOB_DB_WRITE_BLOB,
-  // # of keys written by BlobDB as TTL blob value.
+  // # of keys written by BlobDB as TTL blob value. Only applicable to legacy
+  // BlobDB.
   BLOB_DB_WRITE_BLOB_TTL,
   // # of bytes written to blob file.
   BLOB_DB_BLOB_FILE_BYTES_WRITTEN,
@@ -284,22 +291,24 @@ enum Tickers : uint32_t {
   // # of times a blob files being synced.
   BLOB_DB_BLOB_FILE_SYNCED,
   // # of blob index evicted from base DB by BlobDB compaction filter because
-  // of expiration.
+  // of expiration. Only applicable to legacy BlobDB.
   BLOB_DB_BLOB_INDEX_EXPIRED_COUNT,
   // size of blob index evicted from base DB by BlobDB compaction filter
-  // because of expiration.
+  // because of expiration. Only applicable to legacy BlobDB.
   BLOB_DB_BLOB_INDEX_EXPIRED_SIZE,
   // # of blob index evicted from base DB by BlobDB compaction filter because
-  // of corresponding file deleted.
+  // of corresponding file deleted. Only applicable to legacy BlobDB.
   BLOB_DB_BLOB_INDEX_EVICTED_COUNT,
   // size of blob index evicted from base DB by BlobDB compaction filter
-  // because of corresponding file deleted.
+  // because of corresponding file deleted. Only applicable to legacy BlobDB.
   BLOB_DB_BLOB_INDEX_EVICTED_SIZE,
-  // # of blob files that were obsoleted by garbage collection.
+  // # of blob files that were obsoleted by garbage collection. Only applicable
+  // to legacy BlobDB.
   BLOB_DB_GC_NUM_FILES,
-  // # of blob files generated by garbage collection.
+  // # of blob files generated by garbage collection. Only applicable to legacy
+  // BlobDB.
   BLOB_DB_GC_NUM_NEW_FILES,
-  // # of BlobDB garbage collection failures.
+  // # of BlobDB garbage collection failures. Only applicable to legacy BlobDB.
   BLOB_DB_GC_FAILURES,
   // # of keys dropped by BlobDB garbage collection because they had been
   // overwritten. DEPRECATED.
@@ -317,11 +326,14 @@ enum Tickers : uint32_t {
   BLOB_DB_GC_BYTES_EXPIRED,
   // # of bytes relocated to new blob file by garbage collection.
   BLOB_DB_GC_BYTES_RELOCATED,
-  // # of blob files evicted because of BlobDB is full.
+  // # of blob files evicted because of BlobDB is full. Only applicable to
+  // legacy BlobDB.
   BLOB_DB_FIFO_NUM_FILES_EVICTED,
-  // # of keys in the blob files evicted because of BlobDB is full.
+  // # of keys in the blob files evicted because of BlobDB is full. Only
+  // applicable to legacy BlobDB.
   BLOB_DB_FIFO_NUM_KEYS_EVICTED,
-  // # of bytes in the blob files evicted because of BlobDB is full.
+  // # of bytes in the blob files evicted because of BlobDB is full. Only
+  // applicable to legacy BlobDB.
   BLOB_DB_FIFO_BYTES_EVICTED,
 
   // These counters indicate a performance issue in WritePrepared transactions.
@@ -450,21 +462,23 @@ enum Histograms : uint32_t {
   READ_NUM_MERGE_OPERANDS,
 
   // BlobDB specific stats
-  // Size of keys written to BlobDB.
+  // Size of keys written to BlobDB. Only applicable to legacy BlobDB.
   BLOB_DB_KEY_SIZE,
-  // Size of values written to BlobDB.
+  // Size of values written to BlobDB. Only applicable to legacy BlobDB.
   BLOB_DB_VALUE_SIZE,
-  // BlobDB Put/PutWithTTL/PutUntil/Write latency.
+  // BlobDB Put/PutWithTTL/PutUntil/Write latency. Only applicable to legacy
+  // BlobDB.
   BLOB_DB_WRITE_MICROS,
-  // BlobDB Get latency.
+  // BlobDB Get latency. Only applicable to legacy BlobDB.
   BLOB_DB_GET_MICROS,
-  // BlobDB MultiGet latency.
+  // BlobDB MultiGet latency. Only applicable to legacy BlobDB.
   BLOB_DB_MULTIGET_MICROS,
-  // BlobDB Seek/SeekToFirst/SeekToLast/SeekForPrev latency.
+  // BlobDB Seek/SeekToFirst/SeekToLast/SeekForPrev latency. Only applicable to
+  // legacy BlobDB.
   BLOB_DB_SEEK_MICROS,
-  // BlobDB Next latency.
+  // BlobDB Next latency. Only applicable to legacy BlobDB.
   BLOB_DB_NEXT_MICROS,
-  // BlobDB Prev latency.
+  // BlobDB Prev latency. Only applicable to legacy BlobDB.
   BLOB_DB_PREV_MICROS,
   // Blob file write latency.
   BLOB_DB_BLOB_FILE_WRITE_MICROS,