]> git-server-git.apps.pok.os.sepia.ceph.com Git - rocksdb.git/commitdiff
Allow range deletions in `*TransactionDB` only when safe (#7929)
authorAndrew Kryczka <andrewkr@fb.com>
Fri, 5 Feb 2021 23:55:34 +0000 (15:55 -0800)
committerAndrew Kryczka <andrewkr@fb.com>
Sat, 6 Feb 2021 01:00:34 +0000 (17:00 -0800)
Summary:
Explicitly reject all range deletions on `TransactionDB` or `OptimisticTransactionDB`, except when the user provides sufficient promises that allow us to proceed safely. The necessary promises are described in the API doc for `TransactionDB::DeleteRange()`. There is currently no way to provide enough promises to make it safe in `OptimisticTransactionDB`.

Fixes https://github.com/facebook/rocksdb/issues/7913.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/7929

Test Plan: unit tests covering the cases it's permitted/rejected

Reviewed By: ltamasi

Differential Revision: D26240254

Pulled By: ajkr

fbshipit-source-id: 2834a0ce64cc3e4c3799e35b885a5e79c2f4f6d9

HISTORY.md
include/rocksdb/utilities/optimistic_transaction_db.h
include/rocksdb/utilities/transaction_db.h
utilities/transactions/optimistic_transaction_db_impl.h
utilities/transactions/optimistic_transaction_test.cc
utilities/transactions/transaction_test.cc
utilities/transactions/write_prepared_txn_db.cc

index 3233385ab59767dc1a4902bfd1b4bf64f9d227f5..643c745d63965d3a1cff87ecc891a22bfe6e5a19 100644 (file)
@@ -1,4 +1,9 @@
 # Rocksdb Change Log
+## Unreleased
+### Bug Fixes
+* Since 6.15.0, `TransactionDB` returns error `Status`es from calls to `DeleteRange()` and calls to `Write()` where the `WriteBatch` contains a range deletion. Previously such operations may have succeeded while not providing the expected transactional guarantees. There are certain cases where range deletion can still be used on such DBs; see the API doc on `TransactionDB::DeleteRange()` for details.
+* `OptimisticTransactionDB` now returns error `Status`es from calls to `DeleteRange()` and calls to `Write()` where the `WriteBatch` contains a range deletion. Previously such operations may have succeeded while not providing the expected transactional guarantees.
+
 ## 6.15.4 (01/21/2021)
 ### Bug Fixes
 * Fix a race condition between DB startups and shutdowns in managing the periodic background worker threads. One effect of this race condition could be the process being terminated.
index 5356df71f3969d5a31905087b3e291b694a3a502..c070e49a309ba9c88cc6b46cd502bc1907f61bc3 100644 (file)
@@ -51,6 +51,8 @@ struct OptimisticTransactionDBOptions {
   uint32_t occ_lock_buckets = (1 << 20);
 };
 
+// Range deletions (including those in `WriteBatch`es passed to `Write()`) are
+// incompatible with `OptimisticTransactionDB` and will return a non-OK `Status`
 class OptimisticTransactionDB : public StackableDB {
  public:
   // Open an OptimisticTransactionDB similar to DB::Open().
index 2e1a0a171399547344fdf7d3c5ccda4c7a4184f3..71416f55b3d7f5aa161bc9bf797a03198ffa93e7 100644 (file)
@@ -244,6 +244,17 @@ class TransactionDB : public StackableDB {
     // falls back to the un-optimized version of ::Write
     return Write(opts, updates);
   }
+  // Transactional `DeleteRange()` is not yet supported.
+  // However, users who know their deleted range does not conflict with
+  // anything can still use it via the `Write()` API. In all cases, the
+  // `Write()` overload specifying `TransactionDBWriteOptimizations` must be
+  // used and `skip_concurrency_control` must be set. When using either
+  // WRITE_PREPARED or WRITE_UNPREPARED , `skip_duplicate_key_check` must
+  // additionally be set.
+  virtual Status DeleteRange(const WriteOptions&, ColumnFamilyHandle*,
+                             const Slice&, const Slice&) override {
+    return Status::NotSupported();
+  }
   // Open a TransactionDB similar to DB::Open().
   // Internally call PrepareWrap() and WrapDB()
   // If the return status is not ok, then dbptr is set to nullptr.
index d895d49b89674b719d4ed6d0514d3e1805a1d0a4..a23d9a06d7c4d1ee342b365355c24c8777c0a9e2 100644 (file)
@@ -46,6 +46,22 @@ class OptimisticTransactionDBImpl : public OptimisticTransactionDB {
                                 const OptimisticTransactionOptions& txn_options,
                                 Transaction* old_txn) override;
 
+  // Transactional `DeleteRange()` is not yet supported.
+  virtual Status DeleteRange(const WriteOptions&, ColumnFamilyHandle*,
+                             const Slice&, const Slice&) override {
+    return Status::NotSupported();
+  }
+
+  // Range deletions also must not be snuck into `WriteBatch`es as they are
+  // incompatible with `OptimisticTransactionDB`.
+  virtual Status Write(const WriteOptions& write_opts,
+                       WriteBatch* batch) override {
+    if (batch->HasDeleteRange()) {
+      return Status::NotSupported();
+    }
+    return OptimisticTransactionDB::Write(write_opts, batch);
+  }
+
   size_t GetLockBucketsSize() const { return bucketed_locks_.size(); }
 
   OccValidationPolicy GetValidatePolicy() const { return validate_policy_; }
index 21c8cdac4ee5d3228b5dc1c2c6ecb0699c43aaa0..3256c2405ffec07cdf77a3c375fd0cc814ebc034 100644 (file)
@@ -1098,6 +1098,17 @@ TEST_P(OptimisticTransactionTest, IteratorTest) {
   delete txn;
 }
 
+TEST_P(OptimisticTransactionTest, DeleteRangeSupportTest) {
+  // `OptimisticTransactionDB` does not allow range deletion in any API.
+  ASSERT_TRUE(
+      txn_db
+          ->DeleteRange(WriteOptions(), txn_db->DefaultColumnFamily(), "a", "b")
+          .IsNotSupported());
+  WriteBatch wb;
+  ASSERT_OK(wb.DeleteRange("a", "b"));
+  ASSERT_NOK(txn_db->Write(WriteOptions(), &wb));
+}
+
 TEST_P(OptimisticTransactionTest, SavepointTest) {
   WriteOptions write_options;
   ReadOptions read_options, snapshot_read_options;
index 72068cb85d6d80f893ed213ba046dc2b3e13611d..f52c0be942f434acbcdc7feaf2d6d9c7ba0c015b 100644 (file)
@@ -4835,6 +4835,56 @@ TEST_P(TransactionTest, MergeTest) {
   ASSERT_EQ("a,3", value);
 }
 
+TEST_P(TransactionTest, DeleteRangeSupportTest) {
+  // The `DeleteRange()` API is banned everywhere.
+  ASSERT_TRUE(
+      db->DeleteRange(WriteOptions(), db->DefaultColumnFamily(), "a", "b")
+          .IsNotSupported());
+
+  // But range deletions can be added via the `Write()` API by specifying the
+  // proper flags to promise there are no conflicts according to the DB type
+  // (see `TransactionDB::DeleteRange()` API doc for details).
+  for (bool skip_concurrency_control : {false, true}) {
+    for (bool skip_duplicate_key_check : {false, true}) {
+      ASSERT_OK(db->Put(WriteOptions(), "a", "val"));
+      WriteBatch wb;
+      ASSERT_OK(wb.DeleteRange("a", "b"));
+      TransactionDBWriteOptimizations flags;
+      flags.skip_concurrency_control = skip_concurrency_control;
+      flags.skip_duplicate_key_check = skip_duplicate_key_check;
+      Status s = db->Write(WriteOptions(), flags, &wb);
+      std::string value;
+      switch (txn_db_options.write_policy) {
+        case WRITE_COMMITTED:
+          if (skip_concurrency_control) {
+            ASSERT_OK(s);
+            ASSERT_TRUE(db->Get(ReadOptions(), "a", &value).IsNotFound());
+          } else {
+            ASSERT_NOK(s);
+            ASSERT_OK(db->Get(ReadOptions(), "a", &value));
+          }
+          break;
+        case WRITE_PREPARED:
+          // Intentional fall-through
+        case WRITE_UNPREPARED:
+          if (skip_concurrency_control && skip_duplicate_key_check) {
+            ASSERT_OK(s);
+            ASSERT_TRUE(db->Get(ReadOptions(), "a", &value).IsNotFound());
+          } else {
+            ASSERT_NOK(s);
+            ASSERT_OK(db->Get(ReadOptions(), "a", &value));
+          }
+          break;
+      }
+      // Without any promises from the user, range deletion via other `Write()`
+      // APIs are still banned.
+      ASSERT_OK(db->Put(WriteOptions(), "a", "val"));
+      ASSERT_NOK(db->Write(WriteOptions(), &wb));
+      ASSERT_OK(db->Get(ReadOptions(), "a", &value));
+    }
+  }
+}
+
 TEST_P(TransactionTest, DeferSnapshotTest) {
   WriteOptions write_options;
   ReadOptions read_options;
index a1b67bd1ff797d0bbe61c394cb8dc07e28aa985c..e26b1f0746e9f1dff622f87e530cee1f5adab635 100644 (file)
@@ -157,7 +157,9 @@ Status WritePreparedTxnDB::WriteInternal(const WriteOptions& write_options_orig,
     // TODO(myabandeh): add an option to allow user skipping this cost
     SubBatchCounter counter(*GetCFComparatorMap());
     auto s = batch->Iterate(&counter);
-    assert(s.ok());
+    if (!s.ok()) {
+      return s;
+    }
     batch_cnt = counter.BatchCount();
     WPRecordTick(TXN_DUPLICATE_KEY_OVERHEAD);
     ROCKS_LOG_DETAILS(info_log_, "Duplicate key overhead: %" PRIu64 " batches",