]> git-server-git.apps.pok.os.sepia.ceph.com Git - rocksdb.git/commitdiff
Fix a race condition in transaction stress test (#10157)
authorYanqin Jin <yanqin@fb.com>
Tue, 14 Jun 2022 01:54:38 +0000 (18:54 -0700)
committerFacebook GitHub Bot <facebook-github-bot@users.noreply.github.com>
Tue, 14 Jun 2022 01:54:38 +0000 (18:54 -0700)
Summary:
Before this PR, there can be a race condition between the thread calling
`StressTest::Open()` and a background compaction thread calling
`MultiOpsTxnsStressTest::VerifyPkSkFast()`.

```
Time   thread1                             bg_compact_thr
 |     TransactionDB::Open(..., &txn_db_)
 |     db_ is still nullptr
 |                                         db_->GetSnapshot()  // segfault
 |     db_ = txn_db_
 V
```

Pull Request resolved: https://github.com/facebook/rocksdb/pull/10157

Test Plan: CI

Reviewed By: akankshamahajan15

Differential Revision: D37121653

Pulled By: riversand963

fbshipit-source-id: 6a53117f958e9ee86f77297fdeb843e5160a9331

db_stress_tool/db_stress_test_base.cc
db_stress_tool/db_stress_test_base.h
db_stress_tool/multi_ops_txns_stress.cc

index 9e7fa4e2b818ed90d02bf60cee3a7d2292fec2a9..dc7c71c340453c77b18fe3c1ac1a114c4e3d7508 100644 (file)
@@ -66,6 +66,7 @@ StressTest::StressTest()
 #ifndef ROCKSDB_LITE
       txn_db_(nullptr),
 #endif
+      db_aptr_(nullptr),
       clock_(db_stress_env->GetSystemClock().get()),
       new_column_family_name_(1),
       num_times_reopened_(0),
@@ -2541,7 +2542,13 @@ void StressTest::Open(SharedState* shared) {
         fflush(stderr);
       }
       assert(s.ok());
-      db_ = txn_db_;
+
+      // Do not swap the order of the following.
+      {
+        db_ = txn_db_;
+        db_aptr_.store(txn_db_, std::memory_order_release);
+      }
+
       // after a crash, rollback to commit recovered transactions
       std::vector<Transaction*> trans;
       txn_db_->GetAllPreparedTransactions(&trans);
index fcb89c4c2cd7fedaf6ff7a323580edc89d8b3681..d4b2c5e2180777c477441581b209da99e201c629 100644 (file)
@@ -236,6 +236,10 @@ class StressTest {
 #ifndef ROCKSDB_LITE
   TransactionDB* txn_db_;
 #endif
+
+  // Currently only used in MultiOpsTxnsStressTest
+  std::atomic<DB*> db_aptr_;
+
   Options options_;
   SystemClock* clock_;
   std::vector<ColumnFamilyHandle*> column_families_;
index 07a1783f29396a170eda802504d315a8b0a57a55..31c88e6d568f7854f9740b51f28ee5b25269982b 100644 (file)
@@ -1248,7 +1248,19 @@ void MultiOpsTxnsStressTest::VerifyDb(ThreadState* thread) const {
   }
 }
 
+// VerifyPkSkFast() can be called by MultiOpsTxnsStressListener's callbacks
+// which can be called before TransactionDB::Open() returns to caller.
+// Therefore, at that time, db_ and txn_db_  may still be nullptr.
+// Caller has to make sure that the race condition does not happen.
 void MultiOpsTxnsStressTest::VerifyPkSkFast(int job_id) {
+  DB* const db = db_aptr_.load(std::memory_order_acquire);
+  if (db == nullptr) {
+    return;
+  }
+
+  assert(db_ == db);
+  assert(db_ != nullptr);
+
   const Snapshot* const snapshot = db_->GetSnapshot();
   assert(snapshot);
   ManagedSnapshot snapshot_guard(db_, snapshot);