]> git-server-git.apps.pok.os.sepia.ceph.com Git - rocksdb.git/commitdiff
Fix txn `MultiGet()` return un-committed data with snapshot (#7963)
authorJay Zhuang <zjay@fb.com>
Thu, 18 Feb 2021 16:47:02 +0000 (08:47 -0800)
committerJay Zhuang <zjay@fb.com>
Thu, 18 Feb 2021 17:30:53 +0000 (09:30 -0800)
Summary:
TransactionDB uses read callback to filter out un-committed data before
a snapshot. But `MultiGet()` API doesn't use that at all, which causes
returning unwanted data.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/7963

Test Plan: Added unittest to reproduce

Reviewed By: anand1976

Differential Revision: D26455851

Pulled By: jay-zhuang

fbshipit-source-id: 265276698cf9d8c4cd79e3250ef10d14375bac55

HISTORY.md
db/db_impl/db_impl.cc
utilities/transactions/transaction_test.cc

index 2baf84687584b7982f81f8e1b3b536370555c58b..384dacde32aa7990931ca5798f18e7d7da3befcc 100644 (file)
@@ -3,6 +3,7 @@
 ### Bug Fixes
 * Since 6.15.0, `TransactionDB` returns error `Status`es from calls to `DeleteRange()` and calls to `Write()` where the `WriteBatch` contains a range deletion. Previously such operations may have succeeded while not providing the expected transactional guarantees. There are certain cases where range deletion can still be used on such DBs; see the API doc on `TransactionDB::DeleteRange()` for details.
 * `OptimisticTransactionDB` now returns error `Status`es from calls to `DeleteRange()` and calls to `Write()` where the `WriteBatch` contains a range deletion. Previously such operations may have succeeded while not providing the expected transactional guarantees.
+* Fix `WRITE_PREPARED`, `WRITE_UNPREPARED` TransactionDB `MultiGet()` may return uncommitted data with snapshot.
 
 ## 6.17.1 (01/28/2021)
 ### Behavior Changes
index f4e8e8e3148450ac234d9c8e3ce288235729acd4..200e27a29076100b7d2f1bd9382cd60274fdbeb6 100644 (file)
@@ -1718,7 +1718,9 @@ Status DBImpl::GetImpl(const ReadOptions& read_options, const Slice& key,
   }
   // If timestamp is used, we use read callback to ensure <key,t,s> is returned
   // only if t <= read_opts.timestamp and s <= snapshot.
-  if (ts_sz > 0 && !get_impl_options.callback) {
+  if (ts_sz > 0) {
+    assert(!get_impl_options
+                .callback);  // timestamp with callback is not supported
     read_cb.Refresh(snapshot);
     get_impl_options.callback = &read_cb;
   }
@@ -2394,8 +2396,9 @@ void DBImpl::MultiGetWithCallback(
   }
 
   GetWithTimestampReadCallback timestamp_read_callback(0);
-  ReadCallback* read_callback = nullptr;
+  ReadCallback* read_callback = callback;
   if (read_options.timestamp && read_options.timestamp->size() > 0) {
+    assert(!read_callback);  // timestamp with callback is not supported
     timestamp_read_callback.Refresh(consistent_seqnum);
     read_callback = &timestamp_read_callback;
   }
index 27f9504e01750e67f318ca91dda31df2c348fd0c..def6d0f6b12a8d2a079034a70b03b50f47208391 100644 (file)
@@ -2925,6 +2925,47 @@ TEST_P(TransactionTest, MultiGetLargeBatchedTest) {
   }
 }
 
+TEST_P(TransactionTest, MultiGetSnapshot) {
+  WriteOptions write_options;
+  TransactionOptions transaction_options;
+  Transaction* txn1 = db->BeginTransaction(write_options, transaction_options);
+
+  Slice key = "foo";
+
+  Status s = txn1->Put(key, "bar");
+  ASSERT_OK(s);
+
+  s = txn1->SetName("test");
+  ASSERT_OK(s);
+
+  s = txn1->Prepare();
+  ASSERT_OK(s);
+
+  // Get snapshot between prepare and commit
+  // Un-committed data should be invisible to other transactions
+  const Snapshot* s1 = db->GetSnapshot();
+
+  s = txn1->Commit();
+  ASSERT_OK(s);
+  delete txn1;
+
+  Transaction* txn2 = db->BeginTransaction(write_options, transaction_options);
+  ReadOptions read_options;
+  read_options.snapshot = s1;
+
+  std::vector<Slice> keys;
+  std::vector<PinnableSlice> values(1);
+  std::vector<Status> statuses(1);
+  keys.push_back(key);
+  auto cfd = db->DefaultColumnFamily();
+  txn2->MultiGet(read_options, cfd, 1, keys.data(), values.data(),
+                 statuses.data());
+  ASSERT_TRUE(statuses[0].IsNotFound());
+  delete txn2;
+
+  db->ReleaseSnapshot(s1);
+}
+
 TEST_P(TransactionTest, ColumnFamiliesTest2) {
   WriteOptions write_options;
   ReadOptions read_options, snapshot_read_options;