]> git-server-git.apps.pok.os.sepia.ceph.com Git - rocksdb.git/commitdiff
Revert "Remove PlainTable's feature store_index_in_file (#4914)" (#5034)
authorSiying Dong <siying.d@fb.com>
Fri, 1 Mar 2019 23:41:55 +0000 (15:41 -0800)
committerFosco Marotto <fjm@fb.com>
Mon, 4 Mar 2019 18:37:59 +0000 (10:37 -0800)
Summary:
This reverts commit ee1818081ff4ca2a49a48cb4ca5b97665b8dcddf.

We are not ready to deprecate this feature. revert it for now.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5034

Differential Revision: D14287246

Pulled By: siying

fbshipit-source-id: e4beafdeaee1c94364fdaa6ba198218d158339f7

12 files changed:
HISTORY.md
db/plain_table_db_test.cc
include/rocksdb/table.h
java/rocksjni/table.cc
java/src/main/java/org/rocksdb/PlainTableConfig.java
java/src/test/java/org/rocksdb/PlainTableConfigTest.java
options/options_test.cc
table/plain_table_builder.cc
table/plain_table_builder.h
table/plain_table_factory.cc
table/plain_table_factory.h
table/plain_table_reader.cc

index 14502d37ac7387225f87fa7782abe43faeeecf29..78e3cc88bebf075dc3a4efb98e86f1d706a7af9a 100644 (file)
@@ -25,7 +25,6 @@
 * With "ldb ----try_load_options", when wal_dir specified by the option file doesn't exist, ignore it.
 * Change time resolution in FileOperationInfo.
 * Deleting Blob files also go through SStFileManager.
-* Remove PlainTable's store_index_in_file feature. When opening an existing DB with index in SST files, the index and bloom filter will still be rebuild while SST files are opened, in the same way as there is no index in the file.
 * Remove CuckooHash memtable.
 * The counter stat `number.block.not_compressed` now also counts blocks not compressed due to poor compression ratio.
 * Remove ttl option from `CompactionOptionsFIFO`. The option has been deprecated and ttl in `ColumnFamilyOptions` is used instead.
index 6c9057164ded07c71e5234ea322a92ab330136b5..2dd0cff0b41e317fbe0626fb57158dc1a9478039 100644 (file)
@@ -134,6 +134,7 @@ class PlainTableDBTest : public testing::Test,
     plain_table_options.huge_page_tlb_size = 0;
     plain_table_options.encoding_type = kPrefix;
     plain_table_options.full_scan_mode = false;
+    plain_table_options.store_index_in_file = false;
 
     options.table_factory.reset(NewPlainTableFactory(plain_table_options));
     options.memtable_factory.reset(NewHashLinkListRepFactory(4, 0, 3, true));
@@ -271,7 +272,8 @@ class TestPlainTableReader : public PlainTableReader {
                        std::unique_ptr<RandomAccessFileReader>&& file,
                        const ImmutableCFOptions& ioptions,
                        const SliceTransform* prefix_extractor,
-                       bool* expect_bloom_not_match, uint32_t column_family_id,
+                       bool* expect_bloom_not_match, bool store_index_in_file,
+                       uint32_t column_family_id,
                        const std::string& column_family_name)
       : PlainTableReader(ioptions, std::move(file), env_options, icomparator,
                          encoding_type, file_size, table_properties,
@@ -288,6 +290,17 @@ class TestPlainTableReader : public PlainTableReader {
     TableProperties* props = const_cast<TableProperties*>(table_properties);
     EXPECT_EQ(column_family_id, static_cast<uint32_t>(props->column_family_id));
     EXPECT_EQ(column_family_name, props->column_family_name);
+    if (store_index_in_file) {
+      auto bloom_version_ptr = props->user_collected_properties.find(
+          PlainTablePropertyNames::kBloomVersion);
+      EXPECT_TRUE(bloom_version_ptr != props->user_collected_properties.end());
+      EXPECT_EQ(bloom_version_ptr->second, std::string("1"));
+      if (ioptions.bloom_locality > 0) {
+        auto num_blocks_ptr = props->user_collected_properties.find(
+            PlainTablePropertyNames::kNumBloomBlocks);
+        EXPECT_TRUE(num_blocks_ptr != props->user_collected_properties.end());
+      }
+    }
   }
 
   ~TestPlainTableReader() override {}
@@ -316,6 +329,7 @@ class TestPlainTableFactory : public PlainTableFactory {
         bloom_bits_per_key_(options.bloom_bits_per_key),
         hash_table_ratio_(options.hash_table_ratio),
         index_sparseness_(options.index_sparseness),
+        store_index_in_file_(options.store_index_in_file),
         expect_bloom_not_match_(expect_bloom_not_match),
         column_family_id_(column_family_id),
         column_family_name_(std::move(column_family_name)) {}
@@ -332,6 +346,22 @@ class TestPlainTableFactory : public PlainTableFactory {
                             true /* compression_type_missing */);
     EXPECT_TRUE(s.ok());
 
+    if (store_index_in_file_) {
+      BlockHandle bloom_block_handle;
+      s = FindMetaBlock(file.get(), file_size, kPlainTableMagicNumber,
+                        table_reader_options.ioptions,
+                        BloomBlockBuilder::kBloomBlock, &bloom_block_handle,
+                        /* compression_type_missing */ true);
+      EXPECT_TRUE(s.ok());
+
+      BlockHandle index_block_handle;
+      s = FindMetaBlock(file.get(), file_size, kPlainTableMagicNumber,
+                        table_reader_options.ioptions,
+                        PlainTableIndexBuilder::kPlainTableIndexBlock,
+                        &index_block_handle, /* compression_type_missing */ true);
+      EXPECT_TRUE(s.ok());
+    }
+
     auto& user_props = props->user_collected_properties;
     auto encoding_type_prop =
         user_props.find(PlainTablePropertyNames::kEncodingType);
@@ -345,7 +375,7 @@ class TestPlainTableFactory : public PlainTableFactory {
         bloom_bits_per_key_, hash_table_ratio_, index_sparseness_, props,
         std::move(file), table_reader_options.ioptions,
         table_reader_options.prefix_extractor, expect_bloom_not_match_,
-        column_family_id_, column_family_name_));
+        store_index_in_file_, column_family_id_, column_family_name_));
 
     *table = std::move(new_reader);
     return s;
@@ -355,6 +385,7 @@ class TestPlainTableFactory : public PlainTableFactory {
   int bloom_bits_per_key_;
   double hash_table_ratio_;
   size_t index_sparseness_;
+  bool store_index_in_file_;
   bool* expect_bloom_not_match_;
   const uint32_t column_family_id_;
   const std::string column_family_name_;
@@ -364,8 +395,10 @@ TEST_P(PlainTableDBTest, Flush) {
   for (size_t huge_page_tlb_size = 0; huge_page_tlb_size <= 2 * 1024 * 1024;
        huge_page_tlb_size += 2 * 1024 * 1024) {
     for (EncodingType encoding_type : {kPlain, kPrefix}) {
-      for (int bloom_bits = 0; bloom_bits <= 117; bloom_bits += 117) {
-        for (int total_order = 0; total_order <= 1; total_order++) {
+    for (int bloom_bits = 0; bloom_bits <= 117; bloom_bits += 117) {
+      for (int total_order = 0; total_order <= 1; total_order++) {
+        for (int store_index_in_file = 0; store_index_in_file <= 1;
+             ++store_index_in_file) {
           Options options = CurrentOptions();
           options.create_if_missing = true;
           // Set only one bucket to force bucket conflict.
@@ -381,6 +414,7 @@ TEST_P(PlainTableDBTest, Flush) {
             plain_table_options.huge_page_tlb_size = huge_page_tlb_size;
             plain_table_options.encoding_type = encoding_type;
             plain_table_options.full_scan_mode = false;
+            plain_table_options.store_index_in_file = store_index_in_file;
 
             options.table_factory.reset(
                 NewPlainTableFactory(plain_table_options));
@@ -393,6 +427,7 @@ TEST_P(PlainTableDBTest, Flush) {
             plain_table_options.huge_page_tlb_size = huge_page_tlb_size;
             plain_table_options.encoding_type = encoding_type;
             plain_table_options.full_scan_mode = false;
+            plain_table_options.store_index_in_file = store_index_in_file;
 
             options.table_factory.reset(
                 NewPlainTableFactory(plain_table_options));
@@ -418,15 +453,22 @@ TEST_P(PlainTableDBTest, Flush) {
           auto row = ptc.begin();
           auto tp = row->second;
 
-          ASSERT_EQ(total_order ? "4" : "12",
-                    (tp->user_collected_properties)
-                        .at("plain_table_hash_table_size"));
-          ASSERT_EQ(
-              "0",
-              (tp->user_collected_properties).at("plain_table_sub_index_size"));
+          if (!store_index_in_file) {
+            ASSERT_EQ(total_order ? "4" : "12",
+                      (tp->user_collected_properties)
+                          .at("plain_table_hash_table_size"));
+            ASSERT_EQ("0", (tp->user_collected_properties)
+                               .at("plain_table_sub_index_size"));
+          } else {
+            ASSERT_EQ("0", (tp->user_collected_properties)
+                               .at("plain_table_hash_table_size"));
+            ASSERT_EQ("0", (tp->user_collected_properties)
+                               .at("plain_table_sub_index_size"));
+          }
           ASSERT_EQ("v3", Get("1000000000000foo"));
           ASSERT_EQ("v2", Get("0000000000000bar"));
         }
+        }
       }
     }
   }
@@ -436,11 +478,19 @@ TEST_P(PlainTableDBTest, Flush2) {
   for (size_t huge_page_tlb_size = 0; huge_page_tlb_size <= 2 * 1024 * 1024;
        huge_page_tlb_size += 2 * 1024 * 1024) {
     for (EncodingType encoding_type : {kPlain, kPrefix}) {
-      for (int bloom_bits = 0; bloom_bits <= 117; bloom_bits += 117) {
-        for (int total_order = 0; total_order <= 1; total_order++) {
+    for (int bloom_bits = 0; bloom_bits <= 117; bloom_bits += 117) {
+      for (int total_order = 0; total_order <= 1; total_order++) {
+        for (int store_index_in_file = 0; store_index_in_file <= 1;
+             ++store_index_in_file) {
           if (encoding_type == kPrefix && total_order) {
             continue;
           }
+          if (!bloom_bits && store_index_in_file) {
+            continue;
+          }
+          if (total_order && store_index_in_file) {
+          continue;
+        }
         bool expect_bloom_not_match = false;
         Options options = CurrentOptions();
         options.create_if_missing = true;
@@ -459,6 +509,7 @@ TEST_P(PlainTableDBTest, Flush2) {
         plain_table_options.bloom_bits_per_key = bloom_bits;
         plain_table_options.huge_page_tlb_size = huge_page_tlb_size;
         plain_table_options.encoding_type = encoding_type;
+        plain_table_options.store_index_in_file = store_index_in_file;
         options.table_factory.reset(new TestPlainTableFactory(
             &expect_bloom_not_match, plain_table_options,
             0 /* column_family_id */, kDefaultColumnFamilyName));
@@ -497,8 +548,9 @@ TEST_P(PlainTableDBTest, Flush2) {
           }
           expect_bloom_not_match = false;
         }
-        }
       }
+      }
+    }
     }
   }
 }
index 100586d4ea603a7033e93832a1ca3ef7aed34836..8d97a60e3a8d54d99b364f765b4921f3ffda03ae 100644 (file)
@@ -351,11 +351,10 @@ struct PlainTableOptions {
   //                  using the index.
   bool full_scan_mode = false;
 
-  // THIS FEATURE IS REMOVED.
   // @store_index_in_file: compute plain table index and bloom filter during
   //                       file building and store it in file. When reading
   //                       file, index will be mmaped instead of recomputation.
-  // bool store_index_in_file = false;
+  bool store_index_in_file = false;
 };
 
 // -- Plain Table with prefix-only seek
index 7497317d95025837562844462c61542821bb1cd5..3dbd13280ad13668b12307e3d1aba153c51bf767 100644 (file)
@@ -20,7 +20,8 @@
 jlong Java_org_rocksdb_PlainTableConfig_newTableFactoryHandle(
     JNIEnv * /*env*/, jobject /*jobj*/, jint jkey_size,
     jint jbloom_bits_per_key, jdouble jhash_table_ratio, jint jindex_sparseness,
-    jint jhuge_page_tlb_size, jbyte jencoding_type, jboolean jfull_scan_mode) {
+    jint jhuge_page_tlb_size, jbyte jencoding_type, jboolean jfull_scan_mode,
+    jboolean jstore_index_in_file) {
   rocksdb::PlainTableOptions options = rocksdb::PlainTableOptions();
   options.user_key_len = jkey_size;
   options.bloom_bits_per_key = jbloom_bits_per_key;
@@ -29,6 +30,7 @@ jlong Java_org_rocksdb_PlainTableConfig_newTableFactoryHandle(
   options.huge_page_tlb_size = jhuge_page_tlb_size;
   options.encoding_type = static_cast<rocksdb::EncodingType>(jencoding_type);
   options.full_scan_mode = jfull_scan_mode;
+  options.store_index_in_file = jstore_index_in_file;
   return reinterpret_cast<jlong>(rocksdb::NewPlainTableFactory(options));
 }
 
index 7ebfaf1b9e19701c1bae38c8fbccfe1b70c8f7a4..c099981678b7a44719bb284e496ed3fa24b4b29c 100644 (file)
@@ -21,6 +21,8 @@ public class PlainTableConfig extends TableFormatConfig {
   public static final EncodingType DEFAULT_ENCODING_TYPE =
       EncodingType.kPlain;
   public static final boolean DEFAULT_FULL_SCAN_MODE = false;
+  public static final boolean DEFAULT_STORE_INDEX_IN_FILE
+      = false;
 
   public PlainTableConfig() {
     keySize_ = VARIABLE_LENGTH;
@@ -30,6 +32,7 @@ public class PlainTableConfig extends TableFormatConfig {
     hugePageTlbSize_ = DEFAULT_HUGE_TLB_SIZE;
     encodingType_ = DEFAULT_ENCODING_TYPE;
     fullScanMode_ = DEFAULT_FULL_SCAN_MODE;
+    storeIndexInFile_ = DEFAULT_STORE_INDEX_IN_FILE;
   }
 
   /**
@@ -208,10 +211,9 @@ public class PlainTableConfig extends TableFormatConfig {
    * @param storeIndexInFile value indicating if index shall
    *     be stored in a file
    * @return the reference to the current config.
-   * @deprecated
    */
-  @Deprecated
   public PlainTableConfig setStoreIndexInFile(boolean storeIndexInFile) {
+    this.storeIndexInFile_ = storeIndexInFile;
     return this;
   }
 
@@ -220,20 +222,23 @@ public class PlainTableConfig extends TableFormatConfig {
    * in a file.
    *
    * @return currently set value for store index in file.
-   * @deprecated
    */
-  @Deprecated
   public boolean storeIndexInFile() {
-    return false;
+    return storeIndexInFile_;
   }
 
   @Override protected long newTableFactoryHandle() {
-    return newTableFactoryHandle(keySize_, bloomBitsPerKey_, hashTableRatio_, indexSparseness_,
-        hugePageTlbSize_, encodingType_.getValue(), fullScanMode_);
+    return newTableFactoryHandle(keySize_, bloomBitsPerKey_,
+        hashTableRatio_, indexSparseness_, hugePageTlbSize_,
+        encodingType_.getValue(), fullScanMode_,
+        storeIndexInFile_);
   }
 
-  private native long newTableFactoryHandle(int keySize, int bloomBitsPerKey, double hashTableRatio,
-      int indexSparseness, int hugePageTlbSize, byte encodingType, boolean fullScanMode);
+  private native long newTableFactoryHandle(
+      int keySize, int bloomBitsPerKey,
+      double hashTableRatio, int indexSparseness,
+      int hugePageTlbSize, byte encodingType,
+      boolean fullScanMode, boolean storeIndexInFile);
 
   private int keySize_;
   private int bloomBitsPerKey_;
@@ -242,4 +247,5 @@ public class PlainTableConfig extends TableFormatConfig {
   private int hugePageTlbSize_;
   private EncodingType encodingType_;
   private boolean fullScanMode_;
+  private boolean storeIndexInFile_;
 }
index 3dafef4c766e34605b23be472a307c4757ce0370..dcb6cc39f89268b7cd9c9f15e07577fca6bdfd9b 100644 (file)
@@ -70,6 +70,14 @@ public class PlainTableConfigTest {
     plainTableConfig.setFullScanMode(true);
     assertThat(plainTableConfig.fullScanMode()).isTrue();  }
 
+  @Test
+  public void storeIndexInFile() {
+    PlainTableConfig plainTableConfig = new PlainTableConfig();
+    plainTableConfig.setStoreIndexInFile(true);
+    assertThat(plainTableConfig.storeIndexInFile()).
+        isTrue();
+  }
+
   @Test
   public void plainTableConfig() {
     try(final Options opt = new Options()) {
index f700d8d65d10f72741f07c34278ea4e0ae18017b..b7076c8bdc5c2cf6c1863a171bc03538d4c9675e 100644 (file)
@@ -662,6 +662,7 @@ TEST_F(OptionsTest, GetPlainTableOptionsFromString) {
   ASSERT_EQ(new_opt.huge_page_tlb_size, 4);
   ASSERT_EQ(new_opt.encoding_type, EncodingType::kPrefix);
   ASSERT_TRUE(new_opt.full_scan_mode);
+  ASSERT_TRUE(new_opt.store_index_in_file);
 
   // unknown option
   ASSERT_NOK(GetPlainTableOptionsFromString(table_opt,
index 1b1058b4c116a0c73bf3427972890b4b1fc9691d..453b6c768b548e69203030f33047e2a63d23f531 100644 (file)
@@ -20,6 +20,7 @@
 #include "table/plain_table_factory.h"
 #include "db/dbformat.h"
 #include "table/block_builder.h"
+#include "table/bloom_block.h"
 #include "table/plain_table_index.h"
 #include "table/format.h"
 #include "table/meta_blocks.h"
@@ -61,17 +62,34 @@ PlainTableBuilder::PlainTableBuilder(
         int_tbl_prop_collector_factories,
     uint32_t column_family_id, WritableFileWriter* file, uint32_t user_key_len,
     EncodingType encoding_type, size_t index_sparseness,
-    const std::string& column_family_name)
+    uint32_t bloom_bits_per_key, const std::string& column_family_name,
+    uint32_t num_probes, size_t huge_page_tlb_size, double hash_table_ratio,
+    bool store_index_in_file)
     : ioptions_(ioptions),
       moptions_(moptions),
+      bloom_block_(num_probes),
       file_(file),
+      bloom_bits_per_key_(bloom_bits_per_key),
+      huge_page_tlb_size_(huge_page_tlb_size),
       encoder_(encoding_type, user_key_len, moptions.prefix_extractor.get(),
                index_sparseness),
+      store_index_in_file_(store_index_in_file),
       prefix_extractor_(moptions.prefix_extractor.get()) {
+  // Build index block and save it in the file if hash_table_ratio > 0
+  if (store_index_in_file_) {
+    assert(hash_table_ratio > 0 || IsTotalOrderMode());
+    index_builder_.reset(new PlainTableIndexBuilder(
+        &arena_, ioptions, moptions.prefix_extractor.get(), index_sparseness,
+        hash_table_ratio, huge_page_tlb_size_));
+    properties_.user_collected_properties
+        [PlainTablePropertyNames::kBloomVersion] = "1";  // For future use
+  }
+
   properties_.fixed_key_len = user_key_len;
 
   // for plain table, we put all the data in a big chuck.
   properties_.num_data_blocks = 1;
+  // Fill it later if store_index_in_file_ == true
   properties_.index_size = 0;
   properties_.filter_size = 0;
   // To support roll-back to previous version, now still use version 0 for
@@ -112,11 +130,26 @@ void PlainTableBuilder::Add(const Slice& key, const Slice& value) {
     return;
   }
 
+  // Store key hash
+  if (store_index_in_file_) {
+    if (moptions_.prefix_extractor == nullptr) {
+      keys_or_prefixes_hashes_.push_back(GetSliceHash(internal_key.user_key));
+    } else {
+      Slice prefix =
+          moptions_.prefix_extractor->Transform(internal_key.user_key);
+      keys_or_prefixes_hashes_.push_back(GetSliceHash(prefix));
+    }
+  }
+
   // Write value
   assert(offset_ <= std::numeric_limits<uint32_t>::max());
+  auto prev_offset = static_cast<uint32_t>(offset_);
   // Write out the key
   encoder_.AppendKey(key, file_, &offset_, meta_bytes_buf,
                      &meta_bytes_buf_size);
+  if (SaveIndexInFile()) {
+    index_builder_->AddKeyPrefix(GetPrefix(internal_key), prev_offset);
+  }
 
   // Write value length
   uint32_t value_size = static_cast<uint32_t>(value.size());
@@ -162,6 +195,46 @@ Status PlainTableBuilder::Finish() {
 
   MetaIndexBuilder meta_index_builer;
 
+  if (store_index_in_file_ && (properties_.num_entries > 0)) {
+    assert(properties_.num_entries <= std::numeric_limits<uint32_t>::max());
+    Status s;
+    BlockHandle bloom_block_handle;
+    if (bloom_bits_per_key_ > 0) {
+      bloom_block_.SetTotalBits(
+          &arena_,
+          static_cast<uint32_t>(properties_.num_entries) * bloom_bits_per_key_,
+          ioptions_.bloom_locality, huge_page_tlb_size_, ioptions_.info_log);
+
+      PutVarint32(&properties_.user_collected_properties
+                       [PlainTablePropertyNames::kNumBloomBlocks],
+                  bloom_block_.GetNumBlocks());
+
+      bloom_block_.AddKeysHashes(keys_or_prefixes_hashes_);
+
+      Slice bloom_finish_result = bloom_block_.Finish();
+
+      properties_.filter_size = bloom_finish_result.size();
+      s = WriteBlock(bloom_finish_result, file_, &offset_, &bloom_block_handle);
+
+      if (!s.ok()) {
+        return s;
+      }
+      meta_index_builer.Add(BloomBlockBuilder::kBloomBlock, bloom_block_handle);
+    }
+    BlockHandle index_block_handle;
+    Slice index_finish_result = index_builder_->Finish();
+
+    properties_.index_size = index_finish_result.size();
+    s = WriteBlock(index_finish_result, file_, &offset_, &index_block_handle);
+
+    if (!s.ok()) {
+      return s;
+    }
+
+    meta_index_builer.Add(PlainTableIndexBuilder::kPlainTableIndexBlock,
+                          index_block_handle);
+  }
+
   // Calculate bloom block size and index block size
   PropertyBlockBuilder property_block_builder;
   // -- Add basic properties
index 5a0be80b6419d52ff1076e17155a99b5f6b4a1c8..ca0879a4e1da75152696c2de616e069a6e17f57c 100644 (file)
@@ -12,6 +12,8 @@
 #include "rocksdb/status.h"
 #include "rocksdb/table.h"
 #include "rocksdb/table_properties.h"
+#include "table/bloom_block.h"
+#include "table/plain_table_index.h"
 #include "table/plain_table_key_coding.h"
 #include "table/table_builder.h"
 
@@ -35,7 +37,10 @@ class PlainTableBuilder: public TableBuilder {
           int_tbl_prop_collector_factories,
       uint32_t column_family_id, WritableFileWriter* file,
       uint32_t user_key_size, EncodingType encoding_type,
-      size_t index_sparseness, const std::string& column_family_name);
+      size_t index_sparseness, uint32_t bloom_bits_per_key,
+      const std::string& column_family_name, uint32_t num_probes = 6,
+      size_t huge_page_tlb_size = 0, double hash_table_ratio = 0,
+      bool store_index_in_file = false);
 
   // REQUIRES: Either Finish() or Abandon() has been called.
   ~PlainTableBuilder();
@@ -69,6 +74,8 @@ class PlainTableBuilder: public TableBuilder {
 
   TableProperties GetTableProperties() const override { return properties_; }
 
+  bool SaveIndexInFile() const { return store_index_in_file_; }
+
  private:
   Arena arena_;
   const ImmutableCFOptions& ioptions_;
@@ -76,12 +83,19 @@ class PlainTableBuilder: public TableBuilder {
   std::vector<std::unique_ptr<IntTblPropCollector>>
       table_properties_collectors_;
 
+  BloomBlockBuilder bloom_block_;
+  std::unique_ptr<PlainTableIndexBuilder> index_builder_;
+
   WritableFileWriter* file_;
   uint64_t offset_ = 0;
+  uint32_t bloom_bits_per_key_;
+  size_t huge_page_tlb_size_;
   Status status_;
   TableProperties properties_;
   PlainTableKeyEncoder encoder_;
 
+  bool store_index_in_file_;
+
   std::vector<uint32_t> keys_or_prefixes_hashes_;
   bool closed_ = false;  // Either Finish() or Abandon() has been called.
 
index a27f22edf29e050f91c294a036fadce05f79bf81..a6e59c142fd2c8f866e33348d8cd6975dfa57f01 100644 (file)
@@ -42,8 +42,10 @@ TableBuilder* PlainTableFactory::NewTableBuilder(
       table_builder_options.ioptions, table_builder_options.moptions,
       table_builder_options.int_tbl_prop_collector_factories, column_family_id,
       file, table_options_.user_key_len, table_options_.encoding_type,
-      table_options_.index_sparseness,
-      table_builder_options.column_family_name);
+      table_options_.index_sparseness, table_options_.bloom_bits_per_key,
+      table_builder_options.column_family_name, 6,
+      table_options_.huge_page_tlb_size, table_options_.hash_table_ratio,
+      table_options_.store_index_in_file);
 }
 
 std::string PlainTableFactory::GetPrintableTableOptions() const {
@@ -55,15 +57,27 @@ std::string PlainTableFactory::GetPrintableTableOptions() const {
   snprintf(buffer, kBufferSize, "  user_key_len: %u\n",
            table_options_.user_key_len);
   ret.append(buffer);
+  snprintf(buffer, kBufferSize, "  bloom_bits_per_key: %d\n",
+           table_options_.bloom_bits_per_key);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "  hash_table_ratio: %lf\n",
+           table_options_.hash_table_ratio);
+  ret.append(buffer);
   snprintf(buffer, kBufferSize, "  index_sparseness: %" ROCKSDB_PRIszt "\n",
            table_options_.index_sparseness);
   ret.append(buffer);
+  snprintf(buffer, kBufferSize, "  huge_page_tlb_size: %" ROCKSDB_PRIszt "\n",
+           table_options_.huge_page_tlb_size);
+  ret.append(buffer);
   snprintf(buffer, kBufferSize, "  encoding_type: %d\n",
            table_options_.encoding_type);
   ret.append(buffer);
   snprintf(buffer, kBufferSize, "  full_scan_mode: %d\n",
            table_options_.full_scan_mode);
   ret.append(buffer);
+  snprintf(buffer, kBufferSize, "  store_index_in_file: %d\n",
+           table_options_.store_index_in_file);
+  ret.append(buffer);
   return ret;
 }
 
index 4d7e98ca50db70008bb64743d40a038663d51eb5..157e3acda010824e2aad8f73fa43cf226e0471e6 100644 (file)
@@ -204,7 +204,8 @@ static std::unordered_map<std::string, OptionTypeInfo> plain_table_type_info = {
      {offsetof(struct PlainTableOptions, full_scan_mode), OptionType::kBoolean,
       OptionVerificationType::kNormal, false, 0}},
     {"store_index_in_file",
-     {0, OptionType::kBoolean, OptionVerificationType::kDeprecated, false, 0}}};
+     {offsetof(struct PlainTableOptions, store_index_in_file),
+      OptionType::kBoolean, OptionVerificationType::kNormal, false, 0}}};
 
 }  // namespace rocksdb
 #endif  // ROCKSDB_LITE
index 62bc906feb13f3d129c0b2a318bfa82999a7de90..5085edf1eff786239d96e3d8c77f0ef9f30ce0a4 100644 (file)
@@ -294,8 +294,47 @@ Status PlainTableReader::PopulateIndex(TableProperties* props,
   assert(props != nullptr);
   table_properties_.reset(props);
 
-  // index_in_file and bloom_in_file features are deprecated.
-  // Even if they exist in file, ignore them and always reconstruct.
+  BlockContents index_block_contents;
+  Status s = ReadMetaBlock(file_info_.file.get(), nullptr /* prefetch_buffer */,
+                           file_size_, kPlainTableMagicNumber, ioptions_,
+                           PlainTableIndexBuilder::kPlainTableIndexBlock,
+                           &index_block_contents,
+                           true /* compression_type_missing */);
+
+  bool index_in_file = s.ok();
+
+  BlockContents bloom_block_contents;
+  bool bloom_in_file = false;
+  // We only need to read the bloom block if index block is in file.
+  if (index_in_file) {
+    s = ReadMetaBlock(file_info_.file.get(), nullptr /* prefetch_buffer */,
+                      file_size_, kPlainTableMagicNumber, ioptions_,
+                      BloomBlockBuilder::kBloomBlock, &bloom_block_contents,
+                      true /* compression_type_missing */);
+    bloom_in_file = s.ok() && bloom_block_contents.data.size() > 0;
+  }
+
+  Slice* bloom_block;
+  if (bloom_in_file) {
+    // If bloom_block_contents.allocation is not empty (which will be the case
+    // for non-mmap mode), it holds the alloated memory for the bloom block.
+    // It needs to be kept alive to keep `bloom_block` valid.
+    bloom_block_alloc_ = std::move(bloom_block_contents.allocation);
+    bloom_block = &bloom_block_contents.data;
+  } else {
+    bloom_block = nullptr;
+  }
+
+  Slice* index_block;
+  if (index_in_file) {
+    // If index_block_contents.allocation is not empty (which will be the case
+    // for non-mmap mode), it holds the alloated memory for the index block.
+    // It needs to be kept alive to keep `index_block` valid.
+    index_block_alloc_ = std::move(index_block_contents.allocation);
+    index_block = &index_block_contents.data;
+  } else {
+    index_block = nullptr;
+  }
 
   if ((prefix_extractor_ == nullptr) && (hash_table_ratio != 0)) {
     // moptions.prefix_extractor is requried for a hash-based look-up.
@@ -308,36 +347,77 @@ Status PlainTableReader::PopulateIndex(TableProperties* props,
   // offset) and append it to IndexRecordList, which is a data structure created
   // to store them.
 
-  // Allocate bloom filter here for total order mode.
-  if (IsTotalOrderMode()) {
-    uint32_t num_bloom_bits =
-        static_cast<uint32_t>(table_properties_->num_entries) *
-        bloom_bits_per_key;
-    if (num_bloom_bits > 0) {
-      enable_bloom_ = true;
-      bloom_.SetTotalBits(&arena_, num_bloom_bits, ioptions_.bloom_locality,
-                          huge_page_tlb_size, ioptions_.info_log);
+  if (!index_in_file) {
+    // Allocate bloom filter here for total order mode.
+    if (IsTotalOrderMode()) {
+      uint32_t num_bloom_bits =
+          static_cast<uint32_t>(table_properties_->num_entries) *
+          bloom_bits_per_key;
+      if (num_bloom_bits > 0) {
+        enable_bloom_ = true;
+        bloom_.SetTotalBits(&arena_, num_bloom_bits, ioptions_.bloom_locality,
+                            huge_page_tlb_size, ioptions_.info_log);
+      }
+    }
+  } else if (bloom_in_file) {
+    enable_bloom_ = true;
+    auto num_blocks_property = props->user_collected_properties.find(
+        PlainTablePropertyNames::kNumBloomBlocks);
+
+    uint32_t num_blocks = 0;
+    if (num_blocks_property != props->user_collected_properties.end()) {
+      Slice temp_slice(num_blocks_property->second);
+      if (!GetVarint32(&temp_slice, &num_blocks)) {
+        num_blocks = 0;
+      }
     }
+    // cast away const qualifier, because bloom_ won't be changed
+    bloom_.SetRawData(
+        const_cast<unsigned char*>(
+            reinterpret_cast<const unsigned char*>(bloom_block->data())),
+        static_cast<uint32_t>(bloom_block->size()) * 8, num_blocks);
+  } else {
+    // Index in file but no bloom in file. Disable bloom filter in this case.
+    enable_bloom_ = false;
+    bloom_bits_per_key = 0;
   }
+
   PlainTableIndexBuilder index_builder(&arena_, ioptions_, prefix_extractor_,
                                        index_sparseness, hash_table_ratio,
                                        huge_page_tlb_size);
 
   std::vector<uint32_t> prefix_hashes;
-  Status s = PopulateIndexRecordList(&index_builder, &prefix_hashes);
-  if (!s.ok()) {
-    return s;
+  if (!index_in_file) {
+    s = PopulateIndexRecordList(&index_builder, &prefix_hashes);
+    if (!s.ok()) {
+      return s;
+    }
+  } else {
+    s = index_.InitFromRawData(*index_block);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  if (!index_in_file) {
+    // Calculated bloom filter size and allocate memory for
+    // bloom filter based on the number of prefixes, then fill it.
+    AllocateAndFillBloom(bloom_bits_per_key, index_.GetNumPrefixes(),
+                         huge_page_tlb_size, &prefix_hashes);
   }
-  // Calculated bloom filter size and allocate memory for
-  // bloom filter based on the number of prefixes, then fill it.
-  AllocateAndFillBloom(bloom_bits_per_key, index_.GetNumPrefixes(),
-                       huge_page_tlb_size, &prefix_hashes);
 
   // Fill two table properties.
-  props->user_collected_properties["plain_table_hash_table_size"] =
-      ToString(index_.GetIndexSize() * PlainTableIndex::kOffsetLen);
-  props->user_collected_properties["plain_table_sub_index_size"] =
-      ToString(index_.GetSubIndexSize());
+  if (!index_in_file) {
+    props->user_collected_properties["plain_table_hash_table_size"] =
+        ToString(index_.GetIndexSize() * PlainTableIndex::kOffsetLen);
+    props->user_collected_properties["plain_table_sub_index_size"] =
+        ToString(index_.GetSubIndexSize());
+  } else {
+    props->user_collected_properties["plain_table_hash_table_size"] =
+        ToString(0);
+    props->user_collected_properties["plain_table_sub_index_size"] =
+        ToString(0);
+  }
 
   return Status::OK();
 }