]> git-server-git.apps.pok.os.sepia.ceph.com Git - rocksdb.git/commitdiff
Add blob cache option in the column family options (#10155)
authorGang Liao <gangliao@fb.com>
Tue, 14 Jun 2022 21:19:26 +0000 (14:19 -0700)
committerFacebook GitHub Bot <facebook-github-bot@users.noreply.github.com>
Tue, 14 Jun 2022 21:19:26 +0000 (14:19 -0700)
Summary:
There is currently no caching mechanism for blobs, which is not ideal especially when the database resides on remote storage (where we cannot rely on the OS page cache). As part of this task, we would like to make it possible for the application to configure a blob cache.
This PR is a part of https://github.com/facebook/rocksdb/issues/10156

Pull Request resolved: https://github.com/facebook/rocksdb/pull/10155

Reviewed By: ltamasi

Differential Revision: D37150819

Pulled By: gangliao

fbshipit-source-id: b807c7916ea5d411588128f8e22a49f171388fe2

13 files changed:
db/c.cc
db/db_options_test.cc
include/rocksdb/advanced_options.h
include/rocksdb/c.h
include/rocksdb/file_system.h
include/rocksdb/options.h
options/cf_options.cc
options/cf_options.h
options/options.cc
options/options_helper.cc
options/options_settable_test.cc
options/options_test.cc
table/block_based/block_based_table_reader.cc

diff --git a/db/c.cc b/db/c.cc
index d745d5aee61f11a389a8aa064d4c60b19788bfe5..d1d0aafe1ebc1e71e8409aa818105c04cc12f9f7 100644 (file)
--- a/db/c.cc
+++ b/db/c.cc
@@ -3048,6 +3048,11 @@ int rocksdb_options_get_blob_file_starting_level(rocksdb_options_t* opt) {
   return opt->rep.blob_file_starting_level;
 }
 
+void rocksdb_options_set_blob_cache(rocksdb_options_t* opt,
+                                    rocksdb_cache_t* blob_cache) {
+  opt->rep.blob_cache = blob_cache->rep;
+}
+
 void rocksdb_options_set_num_levels(rocksdb_options_t* opt, int n) {
   opt->rep.num_levels = n;
 }
index 46aa252112cbeba938009f4fa08f99d1fe3ad6ed..229ad904ec45d0cabf7de8741a2d8b68cb1aa2f2 100644 (file)
@@ -220,6 +220,7 @@ TEST_F(DBOptionsTest, SetMutableTableOptions) {
 
   ColumnFamilyHandle* cfh = dbfull()->DefaultColumnFamily();
   Options c_opts = dbfull()->GetOptions(cfh);
+
   const auto* c_bbto =
       c_opts.table_factory->GetOptions<BlockBasedTableOptions>();
   ASSERT_NE(c_bbto, nullptr);
index e3f4ccee72a26ffc60b6eb8581633d9d6708f409..9cdd947bed7d1ab25fad459465d65a113f7169b8 100644 (file)
@@ -10,6 +10,7 @@
 
 #include <memory>
 
+#include "rocksdb/cache.h"
 #include "rocksdb/compression_type.h"
 #include "rocksdb/memtablerep.h"
 #include "rocksdb/universal_compaction.h"
@@ -227,7 +228,7 @@ enum class Temperature : uint8_t {
 };
 
 // The control option of how the cache tiers will be used. Currently rocksdb
-// support block cahe (volatile tier), secondary cache (non-volatile tier).
+// support block cache (volatile tier), secondary cache (non-volatile tier).
 // In the future, we may add more caching layers.
 enum class CacheTier : uint8_t {
   kVolatileTier = 0,
@@ -953,6 +954,13 @@ struct AdvancedColumnFamilyOptions {
   // Dynamically changeable through the SetOptions() API
   int blob_file_starting_level = 0;
 
+  // This feature is WORK IN PROGRESS
+  // If non-NULL use the specified cache for blobs.
+  // If NULL, rocksdb will not use a blob cache.
+  //
+  // Default: nullptr (disabled)
+  std::shared_ptr<Cache> blob_cache = nullptr;
+
   // Create ColumnFamilyOptions with default values for all fields
   AdvancedColumnFamilyOptions();
   // Create ColumnFamilyOptions from Options
index 5563335512183f27dede76c4121b07e32ea3be7a..93737c4bdb965a91e2aa6e027c5f871c6adbb0e5 100644 (file)
@@ -1264,6 +1264,9 @@ extern ROCKSDB_LIBRARY_API void rocksdb_options_set_blob_file_starting_level(
 extern ROCKSDB_LIBRARY_API int rocksdb_options_get_blob_file_starting_level(
     rocksdb_options_t* opt);
 
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_blob_cache(
+    rocksdb_options_t* opt, rocksdb_cache_t* blob_cache);
+
 /* returns a pointer to a malloc()-ed, null terminated string */
 extern ROCKSDB_LIBRARY_API char* rocksdb_options_statistics_get_string(
     rocksdb_options_t* opt);
index 7bc19976b7e9036b454b813dfa9aa678640a1d95..19f4a402aee9e54acd573d857404a33415868ebc 100644 (file)
@@ -762,7 +762,7 @@ struct FSReadRequest {
   // returns fewer bytes if end of file is hit (or `status` is not OK).
   size_t len;
 
-  // A buffer that MultiRead()  can optionally place data in. It can
+  // A buffer that MultiRead() can optionally place data in. It can
   // ignore this and allocate its own buffer.
   // The lifecycle of scratch will be until IO is completed.
   //
index cc175dccc6ee6b50319fe954347301d1d31e42fc..19bc3cb190a5c2e5cde69e7ef8ea3b5c8b663549 100644 (file)
@@ -1412,7 +1412,6 @@ struct Options : public DBOptions, public ColumnFamilyOptions {
   Options* DisableExtraChecks();
 };
 
-//
 // An application can issue a read request (via Get/Iterators) and specify
 // if that read should process data that ALREADY resides on a specified cache
 // level. For example, if an application specifies kBlockCacheTier then the
index 2da55a6fe64270f26286084a30b6f648e0a874e6..1e986526574d2f1d522039c46e9d5cc7464abdcf 100644 (file)
@@ -732,6 +732,16 @@ static std::unordered_map<std::string, OptionTypeInfo>
          OptionTypeInfo::AsCustomSharedPtr<SstPartitionerFactory>(
              offsetof(struct ImmutableCFOptions, sst_partitioner_factory),
              OptionVerificationType::kByName, OptionTypeFlags::kAllowNull)},
+        {"blob_cache",
+         {offsetof(struct ImmutableCFOptions, blob_cache), OptionType::kUnknown,
+          OptionVerificationType::kNormal,
+          (OptionTypeFlags::kCompareNever | OptionTypeFlags::kDontSerialize),
+          // Parses the input value as a Cache
+          [](const ConfigOptions& opts, const std::string&,
+             const std::string& value, void* addr) {
+            auto* cache = static_cast<std::shared_ptr<Cache>*>(addr);
+            return Cache::CreateFromString(opts, value, cache);
+          }}},
 };
 
 const std::string OptionsHelper::kCFOptionsName = "ColumnFamilyOptions";
@@ -870,7 +880,8 @@ ImmutableCFOptions::ImmutableCFOptions(const ColumnFamilyOptions& cf_options)
           cf_options.memtable_insert_with_hint_prefix_extractor),
       cf_paths(cf_options.cf_paths),
       compaction_thread_limiter(cf_options.compaction_thread_limiter),
-      sst_partitioner_factory(cf_options.sst_partitioner_factory) {}
+      sst_partitioner_factory(cf_options.sst_partitioner_factory),
+      blob_cache(cf_options.blob_cache) {}
 
 ImmutableOptions::ImmutableOptions() : ImmutableOptions(Options()) {}
 
index c6bfe8f78e2f78901fab2cfbdef8b7b7f0d45492..bfdc2e102ac35af5fabe6aa7649014c92da68af7 100644 (file)
@@ -78,6 +78,8 @@ struct ImmutableCFOptions {
   std::shared_ptr<ConcurrentTaskLimiter> compaction_thread_limiter;
 
   std::shared_ptr<SstPartitionerFactory> sst_partitioner_factory;
+
+  std::shared_ptr<Cache> blob_cache;
 };
 
 struct ImmutableOptions : public ImmutableDBOptions, public ImmutableCFOptions {
index 8424549b7516a5ba3f1cd7b1fc4ed06524f40749..bba166be49f73c4f0b849359c7aae48cecdf0065 100644 (file)
@@ -101,7 +101,8 @@ AdvancedColumnFamilyOptions::AdvancedColumnFamilyOptions(const Options& options)
       blob_garbage_collection_force_threshold(
           options.blob_garbage_collection_force_threshold),
       blob_compaction_readahead_size(options.blob_compaction_readahead_size),
-      blob_file_starting_level(options.blob_file_starting_level) {
+      blob_file_starting_level(options.blob_file_starting_level),
+      blob_cache(options.blob_cache) {
   assert(memtable_factory.get() != nullptr);
   if (max_bytes_for_level_multiplier_additional.size() <
       static_cast<unsigned int>(num_levels)) {
@@ -417,6 +418,12 @@ void ColumnFamilyOptions::Dump(Logger* log) const {
         blob_compaction_readahead_size);
     ROCKS_LOG_HEADER(log, "               Options.blob_file_starting_level: %d",
                      blob_file_starting_level);
+    if (blob_cache) {
+      ROCKS_LOG_HEADER(log, "                          Options.blob_cache: %s",
+                       blob_cache->Name());
+      ROCKS_LOG_HEADER(log, "                          blob_cache options: %s",
+                       blob_cache->GetPrintableOptions().c_str());
+    }
 }  // ColumnFamilyOptions::Dump
 
 void Options::Dump(Logger* log) const {
index 65eb708c16537d8bfa65b196771041bedb74c9a4..6af73c840d94cad70a8c5f390428781ecc90acaf 100644 (file)
@@ -303,6 +303,7 @@ void UpdateColumnFamilyOptions(const ImmutableCFOptions& ioptions,
   cf_opts->cf_paths = ioptions.cf_paths;
   cf_opts->compaction_thread_limiter = ioptions.compaction_thread_limiter;
   cf_opts->sst_partitioner_factory = ioptions.sst_partitioner_factory;
+  cf_opts->blob_cache = ioptions.blob_cache;
 
   // TODO(yhchiang): find some way to handle the following derived options
   // * max_file_size
index 944248da25948957e5a906a349b0e7ee9360ddf2..42a6fd577aa6b3884abf4dadde433a2a22ff1412 100644 (file)
@@ -377,7 +377,7 @@ TEST_F(OptionsSettableTest, DBOptionsAllFieldsSettable) {
 // test is not updated accordingly.
 // After adding an option, we need to make sure it is settable by
 // GetColumnFamilyOptionsFromString() and add the option to the input
-// string passed to GetColumnFamilyOptionsFromString()in this test.
+// string passed to GetColumnFamilyOptionsFromString() in this test.
 // If it is a complicated type, you also need to add the field to
 // kColumnFamilyOptionsExcluded, and maybe add customized verification
 // for it.
@@ -400,6 +400,8 @@ TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) {
       {offsetof(struct ColumnFamilyOptions,
                 table_properties_collector_factories),
        sizeof(ColumnFamilyOptions::TablePropertiesCollectorFactories)},
+      {offsetof(struct ColumnFamilyOptions, blob_cache),
+       sizeof(std::shared_ptr<Cache>)},
       {offsetof(struct ColumnFamilyOptions, comparator), sizeof(Comparator*)},
       {offsetof(struct ColumnFamilyOptions, merge_operator),
        sizeof(std::shared_ptr<MergeOperator>)},
@@ -523,9 +525,12 @@ TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) {
       "blob_file_starting_level=1;"
       "bottommost_temperature=kWarm;"
       "compaction_options_fifo={max_table_files_size=3;allow_"
-      "compaction=false;age_for_warm=1;};",
+      "compaction=false;age_for_warm=1;};"
+      "blob_cache=1M;",
       new_options));
 
+  ASSERT_NE(new_options->blob_cache.get(), nullptr);
+
   ASSERT_EQ(unset_bytes_base,
             NumUnsetBytes(new_options_ptr, sizeof(ColumnFamilyOptions),
                           kColumnFamilyOptionsExcluded));
index 1992e39a54ba4c0bfccd25fdefb71f62860deb18..7c688f290d7ff6bd3feabf6eb719b19c7ba0dfb8 100644 (file)
@@ -601,6 +601,22 @@ TEST_F(OptionsTest, GetColumnFamilyOptionsFromStringTest) {
   ASSERT_TRUE(new_cf_opt.memtable_factory != nullptr);
   ASSERT_EQ(std::string(new_cf_opt.memtable_factory->Name()), "SkipListFactory");
   ASSERT_TRUE(new_cf_opt.memtable_factory->IsInstanceOf("SkipListFactory"));
+
+  // blob cache
+  ASSERT_OK(GetColumnFamilyOptionsFromString(
+      config_options, base_cf_opt,
+      "blob_cache={capacity=1M;num_shard_bits=4;"
+      "strict_capacity_limit=true;high_pri_pool_ratio=0.5;};",
+      &new_cf_opt));
+  ASSERT_NE(new_cf_opt.blob_cache, nullptr);
+  ASSERT_EQ(new_cf_opt.blob_cache->GetCapacity(), 1024UL * 1024UL);
+  ASSERT_EQ(static_cast<ShardedCache*>(new_cf_opt.blob_cache.get())
+                ->GetNumShardBits(),
+            4);
+  ASSERT_EQ(new_cf_opt.blob_cache->HasStrictCapacityLimit(), true);
+  ASSERT_EQ(static_cast<LRUCache*>(new_cf_opt.blob_cache.get())
+                ->GetHighPriPoolRatio(),
+            0.5);
 }
 
 TEST_F(OptionsTest, CompressionOptionsFromString) {
@@ -2767,6 +2783,22 @@ TEST_F(OptionsOldApiTest, GetColumnFamilyOptionsFromStringTest) {
             &new_cf_opt));
   ASSERT_TRUE(new_cf_opt.memtable_factory != nullptr);
   ASSERT_TRUE(new_cf_opt.memtable_factory->IsInstanceOf("SkipListFactory"));
+
+  // blob cache
+  ASSERT_OK(GetColumnFamilyOptionsFromString(
+      base_cf_opt,
+      "blob_cache={capacity=1M;num_shard_bits=4;"
+      "strict_capacity_limit=true;high_pri_pool_ratio=0.5;};",
+      &new_cf_opt));
+  ASSERT_NE(new_cf_opt.blob_cache, nullptr);
+  ASSERT_EQ(new_cf_opt.blob_cache->GetCapacity(), 1024UL * 1024UL);
+  ASSERT_EQ(static_cast<ShardedCache*>(new_cf_opt.blob_cache.get())
+                ->GetNumShardBits(),
+            4);
+  ASSERT_EQ(new_cf_opt.blob_cache->HasStrictCapacityLimit(), true);
+  ASSERT_EQ(static_cast<LRUCache*>(new_cf_opt.blob_cache.get())
+                ->GetHighPriPoolRatio(),
+            0.5);
 }
 
 TEST_F(OptionsTest, SliceTransformCreateFromString) {
index e119d1bc71cc29110f2b6c42fa09bc845be5d9db..31b75cf5ef95fcf6212a4b1d84bc67906b3e3765 100644 (file)
@@ -563,7 +563,7 @@ void BlockBasedTable::SetupBaseCacheKey(const TableProperties* properties,
   // assert(!db_id.empty());
 
   // Minimum block size is 5 bytes; therefore we can trim off two lower bits
-  // from offets. See GetCacheKey.
+  // from offsets. See GetCacheKey.
   *out_base_cache_key = OffsetableCacheKey(db_id, db_session_id, file_num,
                                            /*max_offset*/ file_size >> 2);
 }