From: eniac1024 Date: Fri, 23 Aug 2024 20:58:04 +0000 (-0700) Subject: Fix MultiGet with timestamps (#12943) X-Git-Tag: v9.6.1~4 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=f17287696481a45a7ae3a47eb5f33e715dc4ce8e;p=rocksdb.git Fix MultiGet with timestamps (#12943) Summary: Issue: MultiGet(PinnableSlice) can't read out all timestamps. Fixed the impl, and added an UT as well. In the original impl, if MultiGet reads multiple column families, a later column family would clean up timestamps of previous column family. Fix: https://github.com/facebook/rocksdb/issues/12950#issue-2476996580 Pull Request resolved: https://github.com/facebook/rocksdb/pull/12943 Reviewed By: anand1976 Differential Revision: D61729257 Pulled By: pdillinger fbshipit-source-id: 55267c26076c8a59acedd27e14714711729a40df --- diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 592b38f77..0f8c20324 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -3143,10 +3143,11 @@ Status DBImpl::MultiGetImpl( StopWatch sw(immutable_db_options_.clock, stats_, DB_MULTIGET); assert(sorted_keys); + assert(start_key + num_keys <= sorted_keys->size()); // Clear the timestamps for returning results so that we can distinguish // between tombstone or key that has never been written - for (auto* kctx : *sorted_keys) { - assert(kctx); + for (size_t i = start_key; i < start_key + num_keys; ++i) { + KeyContext* kctx = (*sorted_keys)[i]; if (kctx->timestamp) { kctx->timestamp->clear(); } diff --git a/db/db_with_timestamp_basic_test.cc b/db/db_with_timestamp_basic_test.cc index 2d5b08832..acb1c05c1 100644 --- a/db/db_with_timestamp_basic_test.cc +++ b/db/db_with_timestamp_basic_test.cc @@ -172,6 +172,70 @@ TEST_F(DBBasicTestWithTimestamp, MixedCfs) { Close(); } +TEST_F(DBBasicTestWithTimestamp, MultiGetMultipleCfs) { + const size_t kTimestampSize = Timestamp(0, 0).size(); + TestComparator test_cmp(kTimestampSize); + Options options = CurrentOptions(); + options.env = env_; + options.create_if_missing = true; + options.avoid_flush_during_shutdown = true; + options.comparator = &test_cmp; + DestroyAndReopen(options); + + Options options1 = CurrentOptions(); + options1.env = env_; + options1.comparator = &test_cmp; + ColumnFamilyHandle* handle = nullptr; + Status s = db_->CreateColumnFamily(options1, "data", &handle); + ASSERT_OK(s); + + std::string ts = Timestamp(1, 0); + WriteBatch wb(0, 0, 0, kTimestampSize); + ASSERT_OK(wb.Put("a", "value")); + ASSERT_OK(wb.Put(handle, "a", "value")); + const auto ts_sz_func = [kTimestampSize](uint32_t /*cf_id*/) { + return kTimestampSize; + }; + ASSERT_OK(wb.UpdateTimestamps(ts, ts_sz_func)); + ASSERT_OK(db_->Write(WriteOptions(), &wb)); + + int num_keys = 2; + std::vector keys; + std::vector expected_values; + for (int i = 0; i < num_keys; i++) { + keys.push_back("a"); + expected_values.push_back("value"); + } + std::vector handles; + handles.push_back(db_->DefaultColumnFamily()); + handles.push_back(handle); + + { + Slice read_ts_slice(ts); + ReadOptions read_opts; + read_opts.timestamp = &read_ts_slice; + + std::vector values; + values.resize(num_keys); + std::vector statuses; + statuses.resize(num_keys); + std::vector timestamps; + timestamps.resize(num_keys); + + db_->MultiGet(read_opts, num_keys, handles.data(), keys.data(), + values.data(), timestamps.data(), statuses.data()); + + for (int i = 0; i < num_keys; i++) { + ASSERT_OK(statuses[i]); + ASSERT_EQ(expected_values[i], values[i].ToString()); + ASSERT_EQ(ts, timestamps[i]); + } + } + + delete handle; + Close(); +} + TEST_F(DBBasicTestWithTimestamp, CompactRangeWithSpecifiedRange) { Options options = CurrentOptions(); options.env = env_;