]> git-server-git.apps.pok.os.sepia.ceph.com Git - rocksdb.git/commitdiff
Stable cache keys on ingested SST files (#8669)
authorPeter Dillinger <peterd@fb.com>
Wed, 18 Aug 2021 18:32:00 +0000 (11:32 -0700)
committerFacebook GitHub Bot <facebook-github-bot@users.noreply.github.com>
Wed, 18 Aug 2021 18:33:03 +0000 (11:33 -0700)
Summary:
Extends https://github.com/facebook/rocksdb/issues/8659 to work for ingested external SST files, even
the same file ingested into different DBs sharing a block cache.

Note: These new cache keys are currently only enabled when FileSystem
does not provide GetUniqueId. For now, they are typically larger,
so slightly less efficient.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/8669

Test Plan: Extended unit test

Reviewed By: zhichao-cao

Differential Revision: D30398532

Pulled By: pdillinger

fbshipit-source-id: 1f13e2af4b8bfff5741953a69466e9589fbc23c7

db/db_block_cache_test.cc
db/db_impl/db_impl.cc
db/db_impl/db_impl.h
db/repair.cc
table/block_based/block_based_table_reader.cc
table/sst_file_writer.cc
util/defer.h

index c7c100f6341a38ca5b94e435b271f7e916f41f05..8d9a883b69706b92095f3f5e2e2d7e538a514676 100644 (file)
@@ -18,6 +18,7 @@
 #include "rocksdb/statistics.h"
 #include "rocksdb/table.h"
 #include "util/compression.h"
+#include "util/defer.h"
 #include "util/random.h"
 #include "utilities/fault_injection_fs.h"
 
@@ -1310,7 +1311,7 @@ class StableCacheKeyTestFS : public FaultInjectionTestFS {
     SetFailGetUniqueId(true);
   }
 
-  virtual ~StableCacheKeyTestFS() {}
+  virtual ~StableCacheKeyTestFS() override {}
 
   IOStatus LinkFile(const std::string&, const std::string&, const IOOptions&,
                     IODebugContext*) override {
@@ -1342,16 +1343,17 @@ TEST_F(DBBlockCacheTest, StableCacheKeys) {
       table_options.no_block_cache = true;
       table_options.block_cache_compressed = NewLRUCache(1 << 25, 0, false);
       verify_stats = [&options] {
+        // One for ordinary SST file and one for external SST file
         ASSERT_EQ(
-            1, options.statistics->getTickerCount(BLOCK_CACHE_COMPRESSED_ADD));
+            2, options.statistics->getTickerCount(BLOCK_CACHE_COMPRESSED_ADD));
       };
     } else {
       table_options.cache_index_and_filter_blocks = true;
       table_options.block_cache = NewLRUCache(1 << 25, 0, false);
       verify_stats = [&options] {
-        ASSERT_EQ(1, options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD));
-        ASSERT_EQ(1, options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD));
-        ASSERT_EQ(1,
+        ASSERT_EQ(2, options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD));
+        ASSERT_EQ(2, options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD));
+        ASSERT_EQ(2,
                   options.statistics->getTickerCount(BLOCK_CACHE_FILTER_ADD));
       };
     }
@@ -1360,18 +1362,41 @@ TEST_F(DBBlockCacheTest, StableCacheKeys) {
     options.table_factory.reset(NewBlockBasedTableFactory(table_options));
     DestroyAndReopen(options);
 
+    // Ordinary SST file
     ASSERT_OK(Put("key1", "abc"));
     std::string something_compressible(500U, 'x');
-    ASSERT_OK(Put("key2", something_compressible));
+    ASSERT_OK(Put("key1a", something_compressible));
     ASSERT_OK(Flush());
 
+#ifndef ROCKSDB_LITE
+    // External SST file
+    std::string external = dbname_ + "/external.sst";
+    {
+      SstFileWriter sst_file_writer(EnvOptions(), options);
+      ASSERT_OK(sst_file_writer.Open(external));
+      ASSERT_OK(sst_file_writer.Put("key2", "abc"));
+      ASSERT_OK(sst_file_writer.Put("key2a", something_compressible));
+      ExternalSstFileInfo external_info;
+      ASSERT_OK(sst_file_writer.Finish(&external_info));
+      IngestExternalFileOptions ingest_opts;
+      ASSERT_OK(db_->IngestExternalFile({external}, ingest_opts));
+    }
+#else
+    // Another ordinary SST file
+    ASSERT_OK(Put("key2", "abc"));
+    ASSERT_OK(Put("key2a", something_compressible));
+    ASSERT_OK(Flush());
+#endif
+
     ASSERT_EQ(Get("key1"), std::string("abc"));
+    ASSERT_EQ(Get("key2"), std::string("abc"));
     verify_stats();
 
     // Make sure we can cache hit after re-open
     Reopen(options);
 
     ASSERT_EQ(Get("key1"), std::string("abc"));
+    ASSERT_EQ(Get("key2"), std::string("abc"));
     verify_stats();
 
     // Make sure we can cache hit even on a full copy of the DB. Using
@@ -1386,14 +1411,26 @@ TEST_F(DBBlockCacheTest, StableCacheKeys) {
 
     Close();
     Destroy(options);
-    dbname_ = db_copy_name;
+    SaveAndRestore<std::string> save_dbname(&dbname_, db_copy_name);
     Reopen(options);
 
     ASSERT_EQ(Get("key1"), std::string("abc"));
+    ASSERT_EQ(Get("key2"), std::string("abc"));
+    verify_stats();
+
+    // And ensure that re-ingesting the same external file into a different DB
+    // uses same cache keys
+    DestroyAndReopen(options);
+
+    IngestExternalFileOptions ingest_opts;
+    ASSERT_OK(db_->IngestExternalFile({external}, ingest_opts));
+
+    ASSERT_EQ(Get("key2"), std::string("abc"));
     verify_stats();
 #endif  // !ROCKSDB_LITE
 
     Close();
+    Destroy(options);
   }
 }
 
index 20c3c2bd426352cad46c20b1b6e49df252949041..d617fe3e7de7205b10024c5c05554f12d0dc1558 100644 (file)
@@ -3945,10 +3945,10 @@ Status DBImpl::GetDbSessionId(std::string& session_id) const {
   return Status::OK();
 }
 
-void DBImpl::SetDbSessionId() {
+std::string DBImpl::GenerateDbSessionId(Env* env) {
   // GenerateUniqueId() generates an identifier that has a negligible
   // probability of being duplicated, ~128 bits of entropy
-  std::string uuid = env_->GenerateUniqueId();
+  std::string uuid = env->GenerateUniqueId();
 
   // Hash and reformat that down to a more compact format, 20 characters
   // in base-36 ([0-9A-Z]), which is ~103 bits of entropy, which is enough
@@ -3959,15 +3959,21 @@ void DBImpl::SetDbSessionId() {
   // * Visually distinct from DB id format
   uint64_t a = NPHash64(uuid.data(), uuid.size(), 1234U);
   uint64_t b = NPHash64(uuid.data(), uuid.size(), 5678U);
-  db_session_id_.resize(20);
+  std::string db_session_id;
+  db_session_id.resize(20);
   static const char* const base36 = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
   size_t i = 0;
   for (; i < 10U; ++i, a /= 36U) {
-    db_session_id_[i] = base36[a % 36];
+    db_session_id[i] = base36[a % 36];
   }
   for (; i < 20U; ++i, b /= 36U) {
-    db_session_id_[i] = base36[b % 36];
+    db_session_id[i] = base36[b % 36];
   }
+  return db_session_id;
+}
+
+void DBImpl::SetDbSessionId() {
+  db_session_id_ = GenerateDbSessionId(env_);
   TEST_SYNC_POINT_CALLBACK("DBImpl::SetDbSessionId", &db_session_id_);
 }
 
index c4bb5b68b60e41d1dfaffbc6ab223851c13b9086..68107a0e8761d03588ff58b481698ddd8bbce016 100644 (file)
@@ -1114,6 +1114,8 @@ class DBImpl : public DB {
     State state_;
   };
 
+  static std::string GenerateDbSessionId(Env* env);
+
  protected:
   const std::string dbname_;
   std::string db_id_;
index 7eaf8adcc66099c0072dd4f438ff19d3117bb533..3efe63dfcc5363e59f13469238ea3ac142469044 100644 (file)
@@ -93,6 +93,7 @@ class Repairer {
            const ColumnFamilyOptions& default_cf_opts,
            const ColumnFamilyOptions& unknown_cf_opts, bool create_unknown_cfs)
       : dbname_(dbname),
+        db_session_id_(DBImpl::GenerateDbSessionId(db_options.env)),
         env_(db_options.env),
         file_options_(),
         db_options_(SanitizeOptions(dbname_, db_options)),
@@ -109,21 +110,16 @@ class Repairer {
             // TableCache can be small since we expect each table to be opened
             // once.
             NewLRUCache(10, db_options_.table_cache_numshardbits)),
-        table_cache_(
-            // TODO: db_session_id for TableCache should be initialized after
-            // db_session_id_ is set.
-            new TableCache(default_iopts_, &file_options_,
-                           raw_table_cache_.get(),
-                           /*block_cache_tracer=*/nullptr,
-                           /*io_tracer=*/nullptr, /*db_session_id*/ "")),
+        table_cache_(new TableCache(default_iopts_, &file_options_,
+                                    raw_table_cache_.get(),
+                                    /*block_cache_tracer=*/nullptr,
+                                    /*io_tracer=*/nullptr, db_session_id_)),
         wb_(db_options_.db_write_buffer_size),
         wc_(db_options_.delayed_write_rate),
-        // TODO: db_session_id for VersionSet should be initialized after
-        // db_session_id_ is set and use it for initialization.
         vset_(dbname_, &immutable_db_options_, file_options_,
               raw_table_cache_.get(), &wb_, &wc_,
               /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr,
-              /*db_session_id*/ ""),
+              db_session_id_),
         next_file_number_(1),
         db_lock_(nullptr),
         closed_(false) {
@@ -198,10 +194,6 @@ class Repairer {
       }
       // Just create a DBImpl temporarily so we can reuse NewDB()
       db_impl = new DBImpl(db_options_, dbname_);
-      // Also use this temp DBImpl to get a session id
-      status = db_impl->GetDbSessionId(db_session_id_);
-    }
-    if (status.ok()) {
       status = db_impl->NewDB(/*new_filenames=*/nullptr);
     }
     delete db_impl;
index 5c020084d62c835258fdece2f72af505177eb30b..c9ee60aeb9060b282ecdb7d42bb39f48d8b5de0a 100644 (file)
@@ -649,8 +649,6 @@ Status BlockBasedTable::Open(
       // under these portable/stable keys.
       // Note: For now, each external SST file gets its own unique session id,
       // so we can use a fixed file number under than session id.
-      // ... except FIXME (peterd): sst_file_writer currently uses wrong
-      // format for db_session_ids so this approach doesn't work yet.
       db_session_id = rep->table_properties->db_session_id;
       file_num = 1;
     }
index 0d46c764bb40f866ac22aea5059ad349bca66ad5..0ce8f69314edcc022a457b143836a1208d2fa1d9 100644 (file)
@@ -7,6 +7,7 @@
 
 #include <vector>
 
+#include "db/db_impl/db_impl.h"
 #include "db/dbformat.h"
 #include "file/writable_file_writer.h"
 #include "rocksdb/file_system.h"
@@ -245,7 +246,7 @@ Status SstFileWriter::Open(const std::string& file_path) {
   // Here we mimic the way db_session_id behaves by resetting the db_session_id
   // every time SstFileWriter is used, and in this case db_id is set to be "SST
   // Writer".
-  std::string db_session_id = r->ioptions.env->GenerateUniqueId();
+  std::string db_session_id = DBImpl::GenerateDbSessionId(r->ioptions.env);
   if (!db_session_id.empty() && db_session_id.back() == '\n') {
     db_session_id.pop_back();
   }
index 81b0df255c831719e930996bea40d033baf90cff..a781931733da880e3a6b916c3b39daed7b360120 100644 (file)
@@ -57,6 +57,14 @@ class SaveAndRestore {
  public:
   // obj is non-null pointer to value to be saved and later restored.
   explicit SaveAndRestore(T* obj) : obj_(obj), saved_(*obj) {}
+  // new_value is stored in *obj
+  SaveAndRestore(T* obj, const T& new_value)
+      : obj_(obj), saved_(std::move(*obj)) {
+    *obj = new_value;
+  }
+  SaveAndRestore(T* obj, T&& new_value) : obj_(obj), saved_(std::move(*obj)) {
+    *obj = std::move(new_value);
+  }
   ~SaveAndRestore() { *obj_ = std::move(saved_); }
 
   // No copies