]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
os/bluestore: release wal_cleaning extents in order
authorSage Weil <sage@redhat.com>
Fri, 25 Mar 2016 15:03:28 +0000 (11:03 -0400)
committerSage Weil <sage@redhat.com>
Wed, 30 Mar 2016 15:23:15 +0000 (11:23 -0400)
We need to order the freelist updates so that they match the
commit order of the actual transactions.  Otherwise we might, say,
set a key here, delete it in the _txc_update_fm, but commit in
the wrong order and end up with the key surviving.

Signed-off-by: Sage Weil <sage@redhat.com>
src/os/bluestore/BlueStore.cc

index 943efc3ae267e1c921288302afd7e97afc49a81c..1f516490c7964fd0775e4c7a14486ab94f1793ee 100644 (file)
@@ -3920,31 +3920,24 @@ void BlueStore::_kv_sync_thread()
       dout(30) << __func__ << " committing txc " << kv_committing << dendl;
       dout(30) << __func__ << " wal_cleaning txc " << wal_cleaning << dendl;
 
-      // one transaction to force a sync
-      KeyValueDB::Transaction t = db->get_transaction();
+      alloc->commit_start();
 
-      // allocations and deallocations
-      for (std::deque<TransContext *>::iterator it = wal_cleaning.begin();
-         it != wal_cleaning.end();
-         ++it) {
-       TransContext *txc = *it;
-       if (!txc->wal_txn->released.empty()) {
-         dout(20) << __func__ << " txc " << txc
-           << " (post-wal) released " << txc->wal_txn->released
-           << dendl;
-         for (interval_set<uint64_t>::iterator p =
-             txc->wal_txn->released.begin();
-             p != txc->wal_txn->released.end();
-             ++p) {
-           dout(20) << __func__ << " release " << p.get_start()
-             << "~" << p.get_len() << dendl;
-           fm->release(p.get_start(), p.get_len(), t);
-           if (!g_conf->bluestore_debug_no_reuse_blocks)
-             alloc->release(p.get_start(), p.get_len());
-         }
+      // flush/barrier on block device
+      bdev->flush();
+
+      if (!g_conf->bluestore_sync_transaction &&
+         !g_conf->bluestore_sync_submit_transaction) {
+       for (std::deque<TransContext *>::iterator it = kv_committing.begin();
+            it != kv_committing.end();
+            ++it) {
+         _txc_update_fm((*it));
+         db->submit_transaction((*it)->t);
        }
       }
 
+      // one final transaction to force a sync
+      KeyValueDB::Transaction t = db->get_transaction();
+
       vector<bluestore_extent_t> bluefs_gift_extents;
       if (bluefs) {
        int r = _balance_bluefs_freespace(&bluefs_gift_extents, t);
@@ -3962,17 +3955,25 @@ void BlueStore::_kv_sync_thread()
        }
       }
 
-      alloc->commit_start();
-
-      // flush/barrier on block device
-      bdev->flush();
-
-      if (!g_conf->bluestore_sync_transaction && !g_conf->bluestore_sync_submit_transaction) {
-       for (std::deque<TransContext *>::iterator it = kv_committing.begin();
-            it != kv_committing.end();
-            ++it) {
-         _txc_update_fm((*it));
-         db->submit_transaction((*it)->t);
+      // allocations and deallocations
+      for (std::deque<TransContext *>::iterator it = wal_cleaning.begin();
+         it != wal_cleaning.end();
+         ++it) {
+       TransContext *txc = *it;
+       if (!txc->wal_txn->released.empty()) {
+         dout(20) << __func__ << " txc " << txc
+           << " (post-wal) released " << txc->wal_txn->released
+           << dendl;
+         for (interval_set<uint64_t>::iterator p =
+             txc->wal_txn->released.begin();
+             p != txc->wal_txn->released.end();
+             ++p) {
+           dout(20) << __func__ << " release " << p.get_start()
+             << "~" << p.get_len() << dendl;
+           fm->release(p.get_start(), p.get_len(), t);
+           if (!g_conf->bluestore_debug_no_reuse_blocks)
+             alloc->release(p.get_start(), p.get_len());
+         }
        }
       }
 
@@ -3997,6 +3998,7 @@ void BlueStore::_kv_sync_thread()
        t->rmkey(PREFIX_WAL, key);
       }
       db->submit_transaction_sync(t);
+
       utime_t finish = ceph_clock_now(NULL);
       utime_t dur = finish - start;
       dout(20) << __func__ << " committed " << kv_committing.size()