* Fast forward option in Trace replay changed to double type to allow replaying at a lower speed, by settings the value between 0 and 1. This option can be set via `ReplayOptions` in `Replayer::Replay()`, or via `--trace_replay_fast_forward` in db_bench.
* Add property `LiveSstFilesSizeAtTemperature` to retrieve sst file size at different temperature.
* Added a stat rocksdb.secondary.cache.hits
+* The integrated BlobDB implementation now supports the tickers `BLOB_DB_BLOB_FILE_BYTES_READ`, `BLOB_DB_GC_NUM_KEYS_RELOCATED`, and `BLOB_DB_GC_BYTES_RELOCATED`, as well as the histograms `BLOB_DB_COMPRESSION_MICROS` and `BLOB_DB_DECOMPRESSION_MICROS`.
## Public API change
* Added APIs to decode and replay trace file via Replayer class. Added `DB::NewDefaultReplayer()` to create a default Replayer instance. Added `TraceReader::Reset()` to restart reading a trace file. Created trace_record.h and utilities/replayer.h files to access decoded Trace records and replay them.
assert(blob);
assert(compressed_blob);
assert(compressed_blob->empty());
+ assert(immutable_options_);
if (blob_compression_type_ == kNoCompression) {
return Status::OK();
constexpr uint32_t compression_format_version = 2;
- if (!CompressData(*blob, info, compression_format_version, compressed_blob)) {
+ bool success = false;
+
+ {
+ StopWatch stop_watch(immutable_options_->clock, immutable_options_->stats,
+ BLOB_DB_COMPRESSION_MICROS);
+ success =
+ CompressData(*blob, info, compression_format_version, compressed_blob);
+ }
+
+ if (!success) {
return Status::Corruption("Error compressing blob");
}
#include "db/blob/blob_log_format.h"
#include "file/filename.h"
+#include "monitoring/statistics.h"
#include "options/cf_options.h"
#include "rocksdb/file_system.h"
#include "rocksdb/slice.h"
#include "test_util/sync_point.h"
#include "util/compression.h"
#include "util/crc32c.h"
+#include "util/stop_watch.h"
namespace ROCKSDB_NAMESPACE {
assert(file_reader);
+ Statistics* const statistics = immutable_options.stats;
+
CompressionType compression_type = kNoCompression;
{
- const Status s =
- ReadHeader(file_reader.get(), column_family_id, &compression_type);
+ const Status s = ReadHeader(file_reader.get(), column_family_id, statistics,
+ &compression_type);
if (!s.ok()) {
return s;
}
}
{
- const Status s = ReadFooter(file_size, file_reader.get());
+ const Status s = ReadFooter(file_reader.get(), file_size, statistics);
if (!s.ok()) {
return s;
}
}
blob_file_reader->reset(
- new BlobFileReader(std::move(file_reader), file_size, compression_type));
+ new BlobFileReader(std::move(file_reader), file_size, compression_type,
+ immutable_options.clock, statistics));
return Status::OK();
}
Status BlobFileReader::ReadHeader(const RandomAccessFileReader* file_reader,
uint32_t column_family_id,
+ Statistics* statistics,
CompressionType* compression_type) {
assert(file_reader);
assert(compression_type);
constexpr uint64_t read_offset = 0;
constexpr size_t read_size = BlobLogHeader::kSize;
- const Status s = ReadFromFile(file_reader, read_offset, read_size,
- &header_slice, &buf, &aligned_buf);
+ const Status s =
+ ReadFromFile(file_reader, read_offset, read_size, statistics,
+ &header_slice, &buf, &aligned_buf);
if (!s.ok()) {
return s;
}
return Status::OK();
}
-Status BlobFileReader::ReadFooter(uint64_t file_size,
- const RandomAccessFileReader* file_reader) {
+Status BlobFileReader::ReadFooter(const RandomAccessFileReader* file_reader,
+ uint64_t file_size, Statistics* statistics) {
assert(file_size >= BlobLogHeader::kSize + BlobLogFooter::kSize);
assert(file_reader);
const uint64_t read_offset = file_size - BlobLogFooter::kSize;
constexpr size_t read_size = BlobLogFooter::kSize;
- const Status s = ReadFromFile(file_reader, read_offset, read_size,
- &footer_slice, &buf, &aligned_buf);
+ const Status s =
+ ReadFromFile(file_reader, read_offset, read_size, statistics,
+ &footer_slice, &buf, &aligned_buf);
if (!s.ok()) {
return s;
}
Status BlobFileReader::ReadFromFile(const RandomAccessFileReader* file_reader,
uint64_t read_offset, size_t read_size,
- Slice* slice, Buffer* buf,
- AlignedBuf* aligned_buf) {
+ Statistics* statistics, Slice* slice,
+ Buffer* buf, AlignedBuf* aligned_buf) {
assert(slice);
assert(buf);
assert(aligned_buf);
assert(file_reader);
+ RecordTick(statistics, BLOB_DB_BLOB_FILE_BYTES_READ, read_size);
+
Status s;
if (file_reader->use_direct_io()) {
BlobFileReader::BlobFileReader(
std::unique_ptr<RandomAccessFileReader>&& file_reader, uint64_t file_size,
- CompressionType compression_type)
+ CompressionType compression_type, SystemClock* clock,
+ Statistics* statistics)
: file_reader_(std::move(file_reader)),
file_size_(file_size),
- compression_type_(compression_type) {
+ compression_type_(compression_type),
+ clock_(clock),
+ statistics_(statistics) {
assert(file_reader_);
}
TEST_SYNC_POINT("BlobFileReader::GetBlob:ReadFromFile");
const Status s = ReadFromFile(file_reader_.get(), record_offset,
- static_cast<size_t>(record_size),
+ static_cast<size_t>(record_size), statistics_,
&record_slice, &buf, &aligned_buf);
if (!s.ok()) {
return s;
const Slice value_slice(record_slice.data() + adjustment, value_size);
{
- const Status s =
- UncompressBlobIfNeeded(value_slice, compression_type, value);
+ const Status s = UncompressBlobIfNeeded(value_slice, compression_type,
+ clock_, statistics_, value);
if (!s.ok()) {
return s;
}
Status BlobFileReader::UncompressBlobIfNeeded(const Slice& value_slice,
CompressionType compression_type,
+ SystemClock* clock,
+ Statistics* statistics,
PinnableSlice* value) {
assert(value);
constexpr uint32_t compression_format_version = 2;
constexpr MemoryAllocator* allocator = nullptr;
- CacheAllocationPtr output =
- UncompressData(info, value_slice.data(), value_slice.size(),
- &uncompressed_size, compression_format_version, allocator);
+ CacheAllocationPtr output;
+
+ {
+ StopWatch stop_watch(clock, statistics, BLOB_DB_DECOMPRESSION_MICROS);
+ output = UncompressData(info, value_slice.data(), value_slice.size(),
+ &uncompressed_size, compression_format_version,
+ allocator);
+ }
TEST_SYNC_POINT_CALLBACK(
"BlobFileReader::UncompressBlobIfNeeded:TamperWithResult", &output);
struct ReadOptions;
class Slice;
class PinnableSlice;
+class Statistics;
class BlobFileReader {
public:
private:
BlobFileReader(std::unique_ptr<RandomAccessFileReader>&& file_reader,
- uint64_t file_size, CompressionType compression_type);
+ uint64_t file_size, CompressionType compression_type,
+ SystemClock* clock, Statistics* statistics);
static Status OpenFile(const ImmutableOptions& immutable_options,
const FileOptions& file_opts,
std::unique_ptr<RandomAccessFileReader>* file_reader);
static Status ReadHeader(const RandomAccessFileReader* file_reader,
- uint32_t column_family_id,
+ uint32_t column_family_id, Statistics* statistics,
CompressionType* compression_type);
- static Status ReadFooter(uint64_t file_size,
- const RandomAccessFileReader* file_reader);
+ static Status ReadFooter(const RandomAccessFileReader* file_reader,
+ uint64_t file_size, Statistics* statistics);
using Buffer = std::unique_ptr<char[]>;
static Status ReadFromFile(const RandomAccessFileReader* file_reader,
uint64_t read_offset, size_t read_size,
- Slice* slice, Buffer* buf,
+ Statistics* statistics, Slice* slice, Buffer* buf,
AlignedBuf* aligned_buf);
static Status VerifyBlob(const Slice& record_slice, const Slice& user_key,
static Status UncompressBlobIfNeeded(const Slice& value_slice,
CompressionType compression_type,
+ SystemClock* clock,
+ Statistics* statistics,
PinnableSlice* value);
static void SaveValue(const Slice& src, PinnableSlice* dst);
std::unique_ptr<RandomAccessFileReader> file_reader_;
uint64_t file_size_;
CompressionType compression_type_;
+ SystemClock* clock_;
+ Statistics* statistics_;
};
} // namespace ROCKSDB_NAMESPACE
// Blob related statistics
uint64_t num_blobs_read = 0;
uint64_t total_blob_bytes_read = 0;
+ uint64_t num_blobs_relocated = 0;
+ uint64_t total_blob_bytes_relocated = 0;
};
++iter_stats_.num_blobs_read;
iter_stats_.total_blob_bytes_read += bytes_read;
+ ++iter_stats_.num_blobs_relocated;
+ iter_stats_.total_blob_bytes_relocated += blob_index.size();
+
value_ = blob_value_;
if (ExtractLargeValueIfNeededImpl()) {
RecordTick(stats_, FILTER_OPERATION_TOTAL_TIME,
c_iter_stats.total_filter_time);
+
+ if (c_iter_stats.num_blobs_relocated > 0) {
+ RecordTick(stats_, BLOB_DB_GC_NUM_KEYS_RELOCATED,
+ c_iter_stats.num_blobs_relocated);
+ }
+ if (c_iter_stats.total_blob_bytes_relocated > 0) {
+ RecordTick(stats_, BLOB_DB_GC_BYTES_RELOCATED,
+ c_iter_stats.total_blob_bytes_relocated);
+ }
+
RecordDroppedKeys(c_iter_stats, &sub_compact->compaction_job_stats);
RecordCompactionIOStats();
NUMBER_ITER_SKIP,
// BlobDB specific stats
- // # of Put/PutTTL/PutUntil to BlobDB.
+ // # of Put/PutTTL/PutUntil to BlobDB. Only applicable to legacy BlobDB.
BLOB_DB_NUM_PUT,
- // # of Write to BlobDB.
+ // # of Write to BlobDB. Only applicable to legacy BlobDB.
BLOB_DB_NUM_WRITE,
- // # of Get to BlobDB.
+ // # of Get to BlobDB. Only applicable to legacy BlobDB.
BLOB_DB_NUM_GET,
- // # of MultiGet to BlobDB.
+ // # of MultiGet to BlobDB. Only applicable to legacy BlobDB.
BLOB_DB_NUM_MULTIGET,
- // # of Seek/SeekToFirst/SeekToLast/SeekForPrev to BlobDB iterator.
+ // # of Seek/SeekToFirst/SeekToLast/SeekForPrev to BlobDB iterator. Only
+ // applicable to legacy BlobDB.
BLOB_DB_NUM_SEEK,
- // # of Next to BlobDB iterator.
+ // # of Next to BlobDB iterator. Only applicable to legacy BlobDB.
BLOB_DB_NUM_NEXT,
- // # of Prev to BlobDB iterator.
+ // # of Prev to BlobDB iterator. Only applicable to legacy BlobDB.
BLOB_DB_NUM_PREV,
- // # of keys written to BlobDB.
+ // # of keys written to BlobDB. Only applicable to legacy BlobDB.
BLOB_DB_NUM_KEYS_WRITTEN,
- // # of keys read from BlobDB.
+ // # of keys read from BlobDB. Only applicable to legacy BlobDB.
BLOB_DB_NUM_KEYS_READ,
- // # of bytes (key + value) written to BlobDB.
+ // # of bytes (key + value) written to BlobDB. Only applicable to legacy
+ // BlobDB.
BLOB_DB_BYTES_WRITTEN,
- // # of bytes (keys + value) read from BlobDB.
+ // # of bytes (keys + value) read from BlobDB. Only applicable to legacy
+ // BlobDB.
BLOB_DB_BYTES_READ,
- // # of keys written by BlobDB as non-TTL inlined value.
+ // # of keys written by BlobDB as non-TTL inlined value. Only applicable to
+ // legacy BlobDB.
BLOB_DB_WRITE_INLINED,
- // # of keys written by BlobDB as TTL inlined value.
+ // # of keys written by BlobDB as TTL inlined value. Only applicable to legacy
+ // BlobDB.
BLOB_DB_WRITE_INLINED_TTL,
- // # of keys written by BlobDB as non-TTL blob value.
+ // # of keys written by BlobDB as non-TTL blob value. Only applicable to
+ // legacy BlobDB.
BLOB_DB_WRITE_BLOB,
- // # of keys written by BlobDB as TTL blob value.
+ // # of keys written by BlobDB as TTL blob value. Only applicable to legacy
+ // BlobDB.
BLOB_DB_WRITE_BLOB_TTL,
// # of bytes written to blob file.
BLOB_DB_BLOB_FILE_BYTES_WRITTEN,
// # of times a blob files being synced.
BLOB_DB_BLOB_FILE_SYNCED,
// # of blob index evicted from base DB by BlobDB compaction filter because
- // of expiration.
+ // of expiration. Only applicable to legacy BlobDB.
BLOB_DB_BLOB_INDEX_EXPIRED_COUNT,
// size of blob index evicted from base DB by BlobDB compaction filter
- // because of expiration.
+ // because of expiration. Only applicable to legacy BlobDB.
BLOB_DB_BLOB_INDEX_EXPIRED_SIZE,
// # of blob index evicted from base DB by BlobDB compaction filter because
- // of corresponding file deleted.
+ // of corresponding file deleted. Only applicable to legacy BlobDB.
BLOB_DB_BLOB_INDEX_EVICTED_COUNT,
// size of blob index evicted from base DB by BlobDB compaction filter
- // because of corresponding file deleted.
+ // because of corresponding file deleted. Only applicable to legacy BlobDB.
BLOB_DB_BLOB_INDEX_EVICTED_SIZE,
- // # of blob files that were obsoleted by garbage collection.
+ // # of blob files that were obsoleted by garbage collection. Only applicable
+ // to legacy BlobDB.
BLOB_DB_GC_NUM_FILES,
- // # of blob files generated by garbage collection.
+ // # of blob files generated by garbage collection. Only applicable to legacy
+ // BlobDB.
BLOB_DB_GC_NUM_NEW_FILES,
- // # of BlobDB garbage collection failures.
+ // # of BlobDB garbage collection failures. Only applicable to legacy BlobDB.
BLOB_DB_GC_FAILURES,
// # of keys dropped by BlobDB garbage collection because they had been
// overwritten. DEPRECATED.
BLOB_DB_GC_BYTES_EXPIRED,
// # of bytes relocated to new blob file by garbage collection.
BLOB_DB_GC_BYTES_RELOCATED,
- // # of blob files evicted because of BlobDB is full.
+ // # of blob files evicted because of BlobDB is full. Only applicable to
+ // legacy BlobDB.
BLOB_DB_FIFO_NUM_FILES_EVICTED,
- // # of keys in the blob files evicted because of BlobDB is full.
+ // # of keys in the blob files evicted because of BlobDB is full. Only
+ // applicable to legacy BlobDB.
BLOB_DB_FIFO_NUM_KEYS_EVICTED,
- // # of bytes in the blob files evicted because of BlobDB is full.
+ // # of bytes in the blob files evicted because of BlobDB is full. Only
+ // applicable to legacy BlobDB.
BLOB_DB_FIFO_BYTES_EVICTED,
// These counters indicate a performance issue in WritePrepared transactions.
READ_NUM_MERGE_OPERANDS,
// BlobDB specific stats
- // Size of keys written to BlobDB.
+ // Size of keys written to BlobDB. Only applicable to legacy BlobDB.
BLOB_DB_KEY_SIZE,
- // Size of values written to BlobDB.
+ // Size of values written to BlobDB. Only applicable to legacy BlobDB.
BLOB_DB_VALUE_SIZE,
- // BlobDB Put/PutWithTTL/PutUntil/Write latency.
+ // BlobDB Put/PutWithTTL/PutUntil/Write latency. Only applicable to legacy
+ // BlobDB.
BLOB_DB_WRITE_MICROS,
- // BlobDB Get latency.
+ // BlobDB Get latency. Only applicable to legacy BlobDB.
BLOB_DB_GET_MICROS,
- // BlobDB MultiGet latency.
+ // BlobDB MultiGet latency. Only applicable to legacy BlobDB.
BLOB_DB_MULTIGET_MICROS,
- // BlobDB Seek/SeekToFirst/SeekToLast/SeekForPrev latency.
+ // BlobDB Seek/SeekToFirst/SeekToLast/SeekForPrev latency. Only applicable to
+ // legacy BlobDB.
BLOB_DB_SEEK_MICROS,
- // BlobDB Next latency.
+ // BlobDB Next latency. Only applicable to legacy BlobDB.
BLOB_DB_NEXT_MICROS,
- // BlobDB Prev latency.
+ // BlobDB Prev latency. Only applicable to legacy BlobDB.
BLOB_DB_PREV_MICROS,
// Blob file write latency.
BLOB_DB_BLOB_FILE_WRITE_MICROS,