From: Ranjan Banerjee Date: Tue, 11 Nov 2025 01:13:34 +0000 (-0800) Subject: Api to get SST file with key ranges for a particular level and key range (startKey... X-Git-Tag: v10.8.3~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=3a196076d1ac06506434b01943a992b95e042e97;p=rocksdb.git Api to get SST file with key ranges for a particular level and key range (startKey, EndKey)rocksdb [Internal version] (#14009) Summary: There are instances where an application might be interested in knowing the distribution in SST files for a key range in a particular level. This implementation creates an overloaded GetColumnFamilyMetaData api where (startKey, EndKey) can be passed along with level information to filter the necessary sst files along with the keyranges for each sst file Pull Request resolved: https://github.com/facebook/rocksdb/pull/14009 Reviewed By: anand1976 Differential Revision: D83389707 fbshipit-source-id: 6df1dc1f9233efe9000b03cc1831b3c618cbcef3 --- diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 1ece5b066..96d4c64fc 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -5047,6 +5047,19 @@ void DBImpl::GetColumnFamilyMetaData(ColumnFamilyHandle* column_family, } } +void DBImpl::GetColumnFamilyMetaData( + ColumnFamilyHandle* column_family, + const GetColumnFamilyMetaDataOptions& options, + ColumnFamilyMetaData* metadata) { + assert(column_family); + auto* cfd = + static_cast_with_check(column_family)->cfd(); + { + InstrumentedMutexLock l(&mutex_); + cfd->current()->GetColumnFamilyMetaData(options, metadata); + } +} + void DBImpl::GetAllColumnFamilyMetaData( std::vector* metadata) { InstrumentedMutexLock l(&mutex_); diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index c3d045725..9168c94f6 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -573,6 +573,11 @@ class DBImpl : public DB { void GetColumnFamilyMetaData(ColumnFamilyHandle* column_family, ColumnFamilyMetaData* metadata) override; + // Get column family metadata with filtering based on key range and level + void GetColumnFamilyMetaData(ColumnFamilyHandle* column_family, + const GetColumnFamilyMetaDataOptions& options, + ColumnFamilyMetaData* metadata) override; + void GetAllColumnFamilyMetaData( std::vector* metadata) override; diff --git a/db/db_test.cc b/db/db_test.cc index 1919be904..9474f6843 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -1492,6 +1492,246 @@ TEST_F(DBTest, MetaDataTest) { CheckLiveFilesMeta(live_file_meta, files_by_level); } +TEST_F(DBTest, GetColumnFamilyMetaDataWithKeyRangeAndLevel) { + Options options = CurrentOptions(); + options.create_if_missing = true; + options.disable_auto_compactions = true; + + int64_t temp_time = 0; + ASSERT_OK(options.env->GetCurrentTime(&temp_time)); + + DestroyAndReopen(options); + + Random rnd(301); + int key_index = 0; + for (int i = 0; i < 100; ++i) { + // Add a single blob reference to each file + std::string blob_index; + BlobIndex::EncodeBlob(&blob_index, /* blob_file_number */ i + 1000, + /* offset */ 1234, /* size */ 5678, kNoCompression); + + WriteBatch batch; + ASSERT_OK(WriteBatchInternal::PutBlobIndex(&batch, 0, Key(key_index), + blob_index)); + ASSERT_OK(dbfull()->Write(WriteOptions(), &batch)); + + ++key_index; + + // Fill up the rest of the file with random values. + GenerateNewFile(&rnd, &key_index, /* nowait */ true); + + ASSERT_OK(Flush()); + } + + std::vector> files_by_level; + dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &files_by_level); + + ASSERT_OK(options.env->GetCurrentTime(&temp_time)); + + ColumnFamilyMetaData cf_meta; + // Keys in the SST files are distributed + // (key000000, key000100) ->File 1 + // (key000101, key000201) -> File 2 + // (key000202, key000302) -> File 3 + // (key009999, key010099) -> File 100 + + // With keySlice (key000050, key000150) => should only pick 2 files(instead of + // default 100 that is in the level) + auto startKey = Slice("key000050"); + auto endKey = Slice("key000150"); + GetColumnFamilyMetaDataOptions cf_options(startKey, endKey, 0); + db_->GetColumnFamilyMetaData(cf_options, &cf_meta); + ASSERT_EQ(cf_meta.levels.size(), 1); + const auto& level_meta_from_cf = cf_meta.levels[0]; + ASSERT_EQ(level_meta_from_cf.files.size(), 2); + ASSERT_LT(level_meta_from_cf.files[1].smallestkey, + std::string(startKey.data())); + ASSERT_GT(level_meta_from_cf.files[0].largestkey, std::string(endKey.data())); + + GetColumnFamilyMetaDataOptions cf_option_default; + db_->GetColumnFamilyMetaData(cf_option_default, &cf_meta); + ASSERT_EQ(cf_meta.levels.size(), 1); + ASSERT_EQ(cf_meta.levels[0].files.size(), 100); + + // Test with start key valid and end key unbounded + // This should get all files from key000150 onwards (99 files) + auto startKeyUnbounded = Slice("key000150"); + GetColumnFamilyMetaDataOptions cf_options_unbounded_end(startKeyUnbounded, + OptSlice(), 0); + db_->GetColumnFamilyMetaData(cf_options_unbounded_end, &cf_meta); + ASSERT_EQ(cf_meta.levels.size(), 1); + ASSERT_EQ(cf_meta.levels[0].files.size(), 99); + + // Test with end key valid and start key unbounded + // This should get all files from beginning to key000250 ( 3 files) + auto endKeyUnbounded = Slice("key000250"); + GetColumnFamilyMetaDataOptions cf_options_unbounded_start(OptSlice(), + endKeyUnbounded, 0); + db_->GetColumnFamilyMetaData(cf_options_unbounded_start, &cf_meta); + ASSERT_EQ(cf_meta.levels.size(), 1); + ASSERT_EQ(cf_meta.levels[0].files.size(), 3); +} + +TEST_F(DBTest, GetColumnFamilyMetaDataBottommostLevel) { + Options options = CurrentOptions(); + options.create_if_missing = true; + options.disable_auto_compactions = true; + options.num_levels = 7; + + DestroyAndReopen(options); + + Random rnd(301); + int key_index = 0; + + for (int i = 0; i < 100; ++i) { + GenerateNewFile(&rnd, &key_index, /* nowait */ true); + ASSERT_OK(Flush()); + } + + CompactRangeOptions compact_options; + compact_options.bottommost_level_compaction = + BottommostLevelCompaction::kForce; + compact_options.change_level = true; + compact_options.target_level = 6; + ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr)); + + // Nothing on Level 0 after compaction + ColumnFamilyMetaData cf_meta; + GetColumnFamilyMetaDataOptions cf_options_0(OptSlice(), OptSlice(), 0); + db_->GetColumnFamilyMetaData(cf_options_0, &cf_meta); + + ASSERT_EQ(cf_meta.levels.size(), 0); + ASSERT_EQ(cf_meta.file_count, 0); + + // Data should be in Level 6 + GetColumnFamilyMetaDataOptions cf_options(OptSlice(), OptSlice(), 6); + db_->GetColumnFamilyMetaData(cf_options, &cf_meta); + + ASSERT_EQ(cf_meta.levels.size(), 1); + ASSERT_EQ(cf_meta.levels[0].level, 6); + ASSERT_GT(cf_meta.levels[0].files.size(), 0); + size_t all_files = cf_meta.levels[0].files.size(); + + // Keys in the SST files are distributed across level 6 + // Test with key range - should only return files within the range + auto startKey = Slice("key000050"); + auto endKey = Slice("key000150"); + GetColumnFamilyMetaDataOptions cf_options_range(startKey, endKey, 6); + db_->GetColumnFamilyMetaData(cf_options_range, &cf_meta); + + ASSERT_EQ(cf_meta.levels.size(), 1); + ASSERT_EQ(cf_meta.levels[0].level, 6); + ASSERT_GT(cf_meta.levels[0].files.size(), 0); + size_t files_in_range = cf_meta.levels[0].files.size(); + + // Files in range should be less than or equal to all files + ASSERT_LE(files_in_range, all_files); +} + +TEST_F(DBTest, GetColumnFamilyMetaDataMultipleLevels) { + Options options = CurrentOptions(); + options.create_if_missing = true; + options.disable_auto_compactions = true; + options.num_levels = 7; + + DestroyAndReopen(options); + + Random rnd(301); + int key_index = 0; + + for (int i = 0; i < 50; ++i) { + GenerateNewFile(&rnd, &key_index, /* nowait */ true); + ASSERT_OK(Flush()); + } + + CompactRangeOptions compact_options; + compact_options.bottommost_level_compaction = + BottommostLevelCompaction::kForce; + compact_options.change_level = true; + compact_options.target_level = 6; + ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr)); + + for (int i = 0; i < 30; ++i) { + GenerateNewFile(&rnd, &key_index, /* nowait */ true); + ASSERT_OK(Flush()); + } + + // First verify both levels have files without key range filter + ColumnFamilyMetaData cf_meta_all_no_range; + GetColumnFamilyMetaDataOptions cf_options_all_no_range; + db_->GetColumnFamilyMetaData(cf_options_all_no_range, &cf_meta_all_no_range); + + bool has_level_0 = false; + bool has_level_6 = false; + for (const auto& level : cf_meta_all_no_range.levels) { + if (level.level == 0 && level.files.size() > 0) { + has_level_0 = true; + } + if (level.level == 6 && level.files.size() > 0) { + has_level_6 = true; + } + } + + ASSERT_TRUE(has_level_0); + ASSERT_TRUE(has_level_6); + + // Test querying bottommost level only with key range + // Use a range that should be in the first set of files (now in level 6) + auto startKey = Slice("key000050"); + auto endKey = Slice("key000150"); + ColumnFamilyMetaData cf_meta_bottommost; + GetColumnFamilyMetaDataOptions cf_options_bottommost(startKey, endKey, 6); + db_->GetColumnFamilyMetaData(cf_options_bottommost, &cf_meta_bottommost); + + ASSERT_EQ(cf_meta_bottommost.levels.size(), 1); + ASSERT_EQ(cf_meta_bottommost.levels[0].level, 6); + ASSERT_GT(cf_meta_bottommost.levels[0].files.size(), 0); + size_t level_6_files_in_range = cf_meta_bottommost.levels[0].files.size(); + + // Test querying all levels with same key range + ColumnFamilyMetaData cf_meta_all; + GetColumnFamilyMetaDataOptions cf_options_all(startKey, endKey); + db_->GetColumnFamilyMetaData(cf_options_all, &cf_meta_all); + + size_t level_6_files_in_range_from_all = 0; + for (const auto& level : cf_meta_all.levels) { + if (level.level == 6) { + level_6_files_in_range_from_all = level.files.size(); + } + } + + ASSERT_GT(level_6_files_in_range_from_all, 0); + ASSERT_EQ(level_6_files_in_range, level_6_files_in_range_from_all); +} + +TEST_F(DBTest, GetColumnFamilyMetaDataEmptyDB) { + Options options = CurrentOptions(); + options.create_if_missing = true; + options.num_levels = 7; + + DestroyAndReopen(options); + + // Test on empty database + ColumnFamilyMetaData cf_meta_empty_db; + GetColumnFamilyMetaDataOptions cf_options_empty_db; + db_->GetColumnFamilyMetaData(cf_options_empty_db, &cf_meta_empty_db); + + ASSERT_EQ(cf_meta_empty_db.levels.size(), 0); + ASSERT_EQ(cf_meta_empty_db.file_count, 0); + ASSERT_EQ(cf_meta_empty_db.size, 0); + + // Test on empty database with key range + auto startKey = Slice("key000050"); + auto endKey = Slice("key000150"); + ColumnFamilyMetaData cf_meta_empty_range; + GetColumnFamilyMetaDataOptions cf_options_empty_range(startKey, endKey); + db_->GetColumnFamilyMetaData(cf_options_empty_range, &cf_meta_empty_range); + + ASSERT_EQ(cf_meta_empty_range.levels.size(), 0); + ASSERT_EQ(cf_meta_empty_range.file_count, 0); + ASSERT_EQ(cf_meta_empty_range.size, 0); +} + TEST_F(DBTest, AllMetaDataTest) { Options options = CurrentOptions(); options.create_if_missing = true; @@ -3535,6 +3775,11 @@ class ModelDB : public DB { void GetColumnFamilyMetaData(ColumnFamilyHandle* /*column_family*/, ColumnFamilyMetaData* /*metadata*/) override {} + void GetColumnFamilyMetaData( + ColumnFamilyHandle* /*column_family*/, + const GetColumnFamilyMetaDataOptions& /*options*/, + ColumnFamilyMetaData* /*metadata*/) override {} + Status GetDbIdentity(std::string& /*identity*/) const override { return Status::OK(); } diff --git a/db/version_set.cc b/db/version_set.cc index ff3d89f58..1569ff24e 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -2024,6 +2024,79 @@ void Version::GetColumnFamilyMetaData(ColumnFamilyMetaData* cf_meta) { } } +void Version::GetColumnFamilyMetaData( + const GetColumnFamilyMetaDataOptions& options, + ColumnFamilyMetaData* cf_meta) { + assert(cf_meta); + assert(cfd_); + + cf_meta->name = cfd_->GetName(); + cf_meta->size = 0; + cf_meta->file_count = 0; + cf_meta->levels.clear(); + cf_meta->blob_file_size = 0; + cf_meta->blob_file_count = 0; + cf_meta->blob_files.clear(); + + const auto& ioptions = cfd_->ioptions(); + auto* vstorage = storage_info(); + + int first_level = (options.level >= 0) ? options.level : 0; + int last_level = + (options.level >= 0) ? options.level + 1 : cfd_->NumberLevels(); + + InternalKey ikey_start, ikey_end; + const InternalKey* begin = nullptr; + const InternalKey* end = nullptr; + + if (options.range.start.has_value()) { + ikey_start = InternalKey(options.range.start.value(), kMaxSequenceNumber, + kValueTypeForSeek); + begin = &ikey_start; + } + + if (options.range.limit.has_value()) { + ikey_end = InternalKey(options.range.limit.value(), kMaxSequenceNumber, + kValueTypeForSeek); + end = &ikey_end; + } + + for (int l = first_level; l < last_level; ++l) { + uint64_t level_size = 0; + std::vector files; + std::vector overlapping_files; + vstorage->GetOverlappingInputs(l, begin, end, &overlapping_files); + + for (const auto& file : overlapping_files) { + uint32_t path_id = file->fd.GetPathId(); + const auto& file_path = (path_id < ioptions.cf_paths.size()) + ? ioptions.cf_paths[path_id].path + : ioptions.cf_paths.back().path; + const uint64_t file_number = file->fd.GetNumber(); + files.emplace_back( + MakeTableFileName("", file_number), file_number, file_path, + file->fd.GetFileSize(), file->fd.smallest_seqno, + file->fd.largest_seqno, file->smallest.user_key().ToString(), + file->largest.user_key().ToString(), + file->stats.num_reads_sampled.load(std::memory_order_relaxed), + file->being_compacted, file->temperature, + file->oldest_blob_file_number, file->TryGetOldestAncesterTime(), + file->TryGetFileCreationTime(), file->epoch_number, + file->file_checksum, file->file_checksum_func_name); + files.back().num_entries = file->num_entries; + files.back().num_deletions = file->num_deletions; + files.back().smallest = file->smallest.Encode().ToString(); + files.back().largest = file->largest.Encode().ToString(); + level_size += file->fd.GetFileSize(); + cf_meta->file_count++; + } + if (!files.empty()) { + cf_meta->levels.emplace_back(l, level_size, std::move(files)); + cf_meta->size += level_size; + } + } +} + uint64_t Version::GetSstFilesSize() { uint64_t sst_files_size = 0; for (int level = 0; level < storage_info_.num_levels_; level++) { diff --git a/db/version_set.h b/db/version_set.h index 175cebd65..d08aecf77 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -1044,6 +1044,10 @@ class Version { void GetColumnFamilyMetaData(ColumnFamilyMetaData* cf_meta); + // Get column family metadata with optional filtering by key range and level. + void GetColumnFamilyMetaData(const GetColumnFamilyMetaDataOptions& options, + ColumnFamilyMetaData* cf_meta); + void GetSstFilesBoundaryKeys(Slice* smallest_user_key, Slice* largest_user_key); diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index ff6218879..f996addf2 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -1935,11 +1935,24 @@ class DB { virtual void GetColumnFamilyMetaData(ColumnFamilyHandle* /*column_family*/, ColumnFamilyMetaData* /*metadata*/) {} + // Obtains the LSM-tree meta data of the specified column family of the DB + // with optional filtering by key range and level. + virtual void GetColumnFamilyMetaData( + ColumnFamilyHandle* /*column_family*/, + const GetColumnFamilyMetaDataOptions& /*options*/, + ColumnFamilyMetaData* /*metadata*/) {} + // Get the metadata of the default column family. void GetColumnFamilyMetaData(ColumnFamilyMetaData* metadata) { GetColumnFamilyMetaData(DefaultColumnFamily(), metadata); } + // Get the metadata of the default column family with optional filtering. + void GetColumnFamilyMetaData(const GetColumnFamilyMetaDataOptions& options, + ColumnFamilyMetaData* metadata) { + GetColumnFamilyMetaData(DefaultColumnFamily(), options, metadata); + } + // Obtains the LSM-tree meta data of all column families of the DB, including // metadata for each live table (SST) file and each blob file in the DB. virtual void GetAllColumnFamilyMetaData( diff --git a/include/rocksdb/metadata.h b/include/rocksdb/metadata.h index 4c6c79f4c..29e6b6dc5 100644 --- a/include/rocksdb/metadata.h +++ b/include/rocksdb/metadata.h @@ -224,6 +224,20 @@ struct LevelMetaData { const std::vector files; }; +// Options for filtering column family metadata by key range. +struct GetColumnFamilyMetaDataOptions { + RangeOpt range; + + // The level to filter on. If -1, all levels are included. + int level = -1; + + GetColumnFamilyMetaDataOptions() = default; + + GetColumnFamilyMetaDataOptions(const OptSlice& _start_key, + const OptSlice& _end_key, int _level = -1) + : range(_start_key, _end_key), level(_level) {} +}; + // The metadata that describes a column family. struct ColumnFamilyMetaData { ColumnFamilyMetaData() : size(0), file_count(0), name("") {} diff --git a/include/rocksdb/utilities/stackable_db.h b/include/rocksdb/utilities/stackable_db.h index 0710c713d..c84dc06b8 100644 --- a/include/rocksdb/utilities/stackable_db.h +++ b/include/rocksdb/utilities/stackable_db.h @@ -456,6 +456,12 @@ class StackableDB : public DB { db_->GetColumnFamilyMetaData(column_family, cf_meta); } + void GetColumnFamilyMetaData(ColumnFamilyHandle* column_family, + const GetColumnFamilyMetaDataOptions& options, + ColumnFamilyMetaData* metadata) override { + db_->GetColumnFamilyMetaData(column_family, options, metadata); + } + using DB::StartBlockCacheTrace; Status StartBlockCacheTrace( const TraceOptions& trace_options,