]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
os/bluestore: make nid and blobid allocation less racy
authorSage Weil <sage@redhat.com>
Thu, 1 Sep 2016 18:31:52 +0000 (14:31 -0400)
committerSage Weil <sage@redhat.com>
Wed, 7 Sep 2016 15:26:05 +0000 (11:26 -0400)
We could bump the _max value for a TransContext in it's
prepare state, have it wait for a long time on IO, and
let another txc allocate and commit something with
an id higher than the previous max.

Fix this first by pushing the max ids into the
TransContext where we can deal with them at commit time,
and then making _kv_sync_thread bump the committed
max in a safe way.

Note that this will need to change if/when we do
these commits in parallel.

Signed-off-by: Sage Weil <sage@redhat.com>
src/os/bluestore/BlueStore.cc
src/os/bluestore/BlueStore.h

index 4bd7142a1d84238e9e62579a80253ba757994674..a17980082b94b880d090bf2bcf7ac3607242642d 100644 (file)
@@ -2177,8 +2177,6 @@ BlueStore::BlueStore(CephContext *cct, const string& path)
     fsid_fd(-1),
     mounted(false),
     coll_lock("BlueStore::coll_lock"),
-    nid_last(0),
-    nid_max(0),
     throttle_ops(cct, "bluestore_max_ops", cct->_conf->bluestore_max_ops),
     throttle_bytes(cct, "bluestore_max_bytes", cct->_conf->bluestore_max_bytes),
     throttle_wal_ops(cct, "bluestore_wal_max_ops",
@@ -5661,34 +5659,19 @@ int BlueStore::_open_super_meta()
 
 void BlueStore::_assign_nid(TransContext *txc, OnodeRef o)
 {
-#warning racy because txn commit may be delayed by io
   if (o->onode.nid)
     return;
-  std::lock_guard<std::mutex> l(nid_lock);
-  o->onode.nid = ++nid_last;
-  dout(20) << __func__ << " " << o->onode.nid << dendl;
-  if (nid_last > nid_max) {
-    nid_max += g_conf->bluestore_nid_prealloc;
-    bufferlist bl;
-    ::encode(nid_max, bl);
-    txc->t->set(PREFIX_SUPER, "nid_max", bl);
-    dout(10) << __func__ << " nid_max now " << nid_max << dendl;
-  }
+  uint64_t nid = ++nid_last;
+  dout(20) << __func__ << " " << nid << dendl;
+  o->onode.nid = nid;
+  txc->last_nid = nid;
 }
 
 uint64_t BlueStore::_assign_blobid(TransContext *txc)
 {
-#warning racy because txn commit may be delayed by io
-  std::lock_guard<std::mutex> l(blobid_lock);
   uint64_t bid = ++blobid_last;
   dout(20) << __func__ << " " << bid << dendl;
-  if (blobid_last > blobid_max) {
-    blobid_max += g_conf->bluestore_blobid_prealloc;
-    bufferlist bl;
-    ::encode(blobid_max, bl);
-    txc->t->set(PREFIX_SUPER, "blobid_max", bl);
-    dout(10) << __func__ << " blobid_max now " << blobid_max << dendl;
-  }
+  txc->last_blobid = bid;
   return bid;
 }
 
@@ -6101,13 +6084,38 @@ void BlueStore::_kv_sync_thread()
       // flush/barrier on block device
       bdev->flush();
 
+      uint64_t high_nid = 0, high_blobid = 0;
       if (!g_conf->bluestore_sync_transaction &&
          !g_conf->bluestore_sync_submit_transaction) {
-       for (std::deque<TransContext *>::iterator it = kv_committing.begin();
-            it != kv_committing.end();
-            ++it) {
-         _txc_finalize_kv((*it), (*it)->t);
-         int r = db->submit_transaction((*it)->t);
+       for (auto txc : kv_committing) {
+         _txc_finalize_kv(txc, txc->t);
+         if (txc->last_nid > high_nid) {
+           high_nid = txc->last_nid;
+         }
+         if (txc->last_blobid > high_blobid) {
+           high_blobid = txc->last_blobid;
+         }
+       }
+       if (!kv_committing.empty()) {
+         TransContext *first_txc = kv_committing.front();
+         std::lock_guard<std::mutex> l(id_lock);
+         if (high_nid + g_conf->bluestore_nid_prealloc/2 > nid_max) {
+           nid_max = high_nid + g_conf->bluestore_nid_prealloc;
+           bufferlist bl;
+           ::encode(nid_max, bl);
+           first_txc->t->set(PREFIX_SUPER, "nid_max", bl);
+           dout(10) << __func__ << " nid_max now " << nid_max << dendl;
+         }
+         if (high_blobid + g_conf->bluestore_blobid_prealloc/2 > blobid_max) {
+           blobid_max = high_blobid + g_conf->bluestore_blobid_prealloc;
+           bufferlist bl;
+           ::encode(blobid_max, bl);
+           first_txc->t->set(PREFIX_SUPER, "blobid_max", bl);
+           dout(10) << __func__ << " blobid_max now " << blobid_max << dendl;
+         }
+       }
+       for (auto txc : kv_committing) {
+         int r = db->submit_transaction(txc->t);
          assert(r == 0);
        }
       }
index 264a770b5e3e89c2236a24a43077311d9a4d5bfa..424e5f5a6e2bee41a78ae14531bc155570d10195 100644 (file)
@@ -1036,6 +1036,9 @@ public:
     uint64_t seq = 0;
     utime_t start;
 
+    uint64_t last_nid = 0;     ///< if non-zero, highest new nid we allocated
+    uint64_t last_blobid = 0;  ///< if non-zero, highest new blobid we allocated
+
     struct DeferredCsum {
       BlobRef blob;
       uint64_t b_off;
@@ -1259,12 +1262,10 @@ private:
 
   vector<Cache*> cache_shards;
 
-  std::mutex nid_lock;
-  uint64_t nid_last;
-  uint64_t nid_max;
-
-  std::mutex blobid_lock;
-  uint64_t blobid_last = 0;
+  std::mutex id_lock;
+  std::atomic<uint64_t> nid_last = {0};
+  uint64_t nid_max = 0;
+  std::atomic<uint64_t> blobid_last = {0};
   uint64_t blobid_max = 0;
 
   Throttle throttle_ops, throttle_bytes;          ///< submit to commit