From 8b45b0d7d77dc3835b19d6c723f9cedf76af7357 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 25 Mar 2016 11:03:28 -0400 Subject: [PATCH] os/bluestore: release wal_cleaning extents in order We need to order the freelist updates so that they match the commit order of the actual transactions. Otherwise we might, say, set a key here, delete it in the _txc_update_fm, but commit in the wrong order and end up with the key surviving. Signed-off-by: Sage Weil --- src/os/bluestore/BlueStore.cc | 66 ++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 32 deletions(-) diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 943efc3ae267e..1f516490c7964 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -3920,31 +3920,24 @@ void BlueStore::_kv_sync_thread() dout(30) << __func__ << " committing txc " << kv_committing << dendl; dout(30) << __func__ << " wal_cleaning txc " << wal_cleaning << dendl; - // one transaction to force a sync - KeyValueDB::Transaction t = db->get_transaction(); + alloc->commit_start(); - // allocations and deallocations - for (std::deque::iterator it = wal_cleaning.begin(); - it != wal_cleaning.end(); - ++it) { - TransContext *txc = *it; - if (!txc->wal_txn->released.empty()) { - dout(20) << __func__ << " txc " << txc - << " (post-wal) released " << txc->wal_txn->released - << dendl; - for (interval_set::iterator p = - txc->wal_txn->released.begin(); - p != txc->wal_txn->released.end(); - ++p) { - dout(20) << __func__ << " release " << p.get_start() - << "~" << p.get_len() << dendl; - fm->release(p.get_start(), p.get_len(), t); - if (!g_conf->bluestore_debug_no_reuse_blocks) - alloc->release(p.get_start(), p.get_len()); - } + // flush/barrier on block device + bdev->flush(); + + if (!g_conf->bluestore_sync_transaction && + !g_conf->bluestore_sync_submit_transaction) { + for (std::deque::iterator it = kv_committing.begin(); + it != kv_committing.end(); + ++it) { + _txc_update_fm((*it)); + db->submit_transaction((*it)->t); } } + // one final transaction to force a sync + KeyValueDB::Transaction t = db->get_transaction(); + vector bluefs_gift_extents; if (bluefs) { int r = _balance_bluefs_freespace(&bluefs_gift_extents, t); @@ -3962,17 +3955,25 @@ void BlueStore::_kv_sync_thread() } } - alloc->commit_start(); - - // flush/barrier on block device - bdev->flush(); - - if (!g_conf->bluestore_sync_transaction && !g_conf->bluestore_sync_submit_transaction) { - for (std::deque::iterator it = kv_committing.begin(); - it != kv_committing.end(); - ++it) { - _txc_update_fm((*it)); - db->submit_transaction((*it)->t); + // allocations and deallocations + for (std::deque::iterator it = wal_cleaning.begin(); + it != wal_cleaning.end(); + ++it) { + TransContext *txc = *it; + if (!txc->wal_txn->released.empty()) { + dout(20) << __func__ << " txc " << txc + << " (post-wal) released " << txc->wal_txn->released + << dendl; + for (interval_set::iterator p = + txc->wal_txn->released.begin(); + p != txc->wal_txn->released.end(); + ++p) { + dout(20) << __func__ << " release " << p.get_start() + << "~" << p.get_len() << dendl; + fm->release(p.get_start(), p.get_len(), t); + if (!g_conf->bluestore_debug_no_reuse_blocks) + alloc->release(p.get_start(), p.get_len()); + } } } @@ -3997,6 +3998,7 @@ void BlueStore::_kv_sync_thread() t->rmkey(PREFIX_WAL, key); } db->submit_transaction_sync(t); + utime_t finish = ceph_clock_now(NULL); utime_t dur = finish - start; dout(20) << __func__ << " committed " << kv_committing.size() -- 2.39.5