]> git-server-git.apps.pok.os.sepia.ceph.com Git - rocksdb.git/commitdiff
Fix MultiGet with timestamps (#12943)
authoreniac1024 <jycai@bu.edu>
Fri, 23 Aug 2024 20:58:04 +0000 (13:58 -0700)
committerPeter Dillinger <peterd@meta.com>
Sat, 24 Aug 2024 15:38:26 +0000 (08:38 -0700)
Summary:
Issue: MultiGet(PinnableSlice) can't read out all timestamps.
Fixed the impl, and added an UT as well. In the original impl, if MultiGet reads multiple column families, a later column family would clean up timestamps of previous column family.
Fix: https://github.com/facebook/rocksdb/issues/12950#issue-2476996580

Pull Request resolved: https://github.com/facebook/rocksdb/pull/12943

Reviewed By: anand1976

Differential Revision: D61729257

Pulled By: pdillinger

fbshipit-source-id: 55267c26076c8a59acedd27e14714711729a40df

db/db_impl/db_impl.cc
db/db_with_timestamp_basic_test.cc

index 592b38f776e2735c37022a630bdd0a2499994428..0f8c203244c7e98ef07d7ae40ebfc787fd09cbf0 100644 (file)
@@ -3143,10 +3143,11 @@ Status DBImpl::MultiGetImpl(
   StopWatch sw(immutable_db_options_.clock, stats_, DB_MULTIGET);
 
   assert(sorted_keys);
+  assert(start_key + num_keys <= sorted_keys->size());
   // Clear the timestamps for returning results so that we can distinguish
   // between tombstone or key that has never been written
-  for (auto* kctx : *sorted_keys) {
-    assert(kctx);
+  for (size_t i = start_key; i < start_key + num_keys; ++i) {
+    KeyContext* kctx = (*sorted_keys)[i];
     if (kctx->timestamp) {
       kctx->timestamp->clear();
     }
index 2d5b08832ab5fa08d9d4b79b2466ce72175f5a59..acb1c05c1d2673d25a295ca7b77e2abc8c4a81b9 100644 (file)
@@ -172,6 +172,70 @@ TEST_F(DBBasicTestWithTimestamp, MixedCfs) {
   Close();
 }
 
+TEST_F(DBBasicTestWithTimestamp, MultiGetMultipleCfs) {
+  const size_t kTimestampSize = Timestamp(0, 0).size();
+  TestComparator test_cmp(kTimestampSize);
+  Options options = CurrentOptions();
+  options.env = env_;
+  options.create_if_missing = true;
+  options.avoid_flush_during_shutdown = true;
+  options.comparator = &test_cmp;
+  DestroyAndReopen(options);
+
+  Options options1 = CurrentOptions();
+  options1.env = env_;
+  options1.comparator = &test_cmp;
+  ColumnFamilyHandle* handle = nullptr;
+  Status s = db_->CreateColumnFamily(options1, "data", &handle);
+  ASSERT_OK(s);
+
+  std::string ts = Timestamp(1, 0);
+  WriteBatch wb(0, 0, 0, kTimestampSize);
+  ASSERT_OK(wb.Put("a", "value"));
+  ASSERT_OK(wb.Put(handle, "a", "value"));
+  const auto ts_sz_func = [kTimestampSize](uint32_t /*cf_id*/) {
+    return kTimestampSize;
+  };
+  ASSERT_OK(wb.UpdateTimestamps(ts, ts_sz_func));
+  ASSERT_OK(db_->Write(WriteOptions(), &wb));
+
+  int num_keys = 2;
+  std::vector<Slice> keys;
+  std::vector<std::string> expected_values;
+  for (int i = 0; i < num_keys; i++) {
+    keys.push_back("a");
+    expected_values.push_back("value");
+  }
+  std::vector<ColumnFamilyHandle*> handles;
+  handles.push_back(db_->DefaultColumnFamily());
+  handles.push_back(handle);
+
+  {
+    Slice read_ts_slice(ts);
+    ReadOptions read_opts;
+    read_opts.timestamp = &read_ts_slice;
+
+    std::vector<PinnableSlice> values;
+    values.resize(num_keys);
+    std::vector<Status> statuses;
+    statuses.resize(num_keys);
+    std::vector<std::string> timestamps;
+    timestamps.resize(num_keys);
+
+    db_->MultiGet(read_opts, num_keys, handles.data(), keys.data(),
+                  values.data(), timestamps.data(), statuses.data());
+
+    for (int i = 0; i < num_keys; i++) {
+      ASSERT_OK(statuses[i]);
+      ASSERT_EQ(expected_values[i], values[i].ToString());
+      ASSERT_EQ(ts, timestamps[i]);
+    }
+  }
+
+  delete handle;
+  Close();
+}
+
 TEST_F(DBBasicTestWithTimestamp, CompactRangeWithSpecifiedRange) {
   Options options = CurrentOptions();
   options.env = env_;