From 38c5b04235402a7908bc4713f617d767ca9fdc56 Mon Sep 17 00:00:00 2001 From: Igor Fedotov Date: Thu, 20 May 2021 02:17:21 +0300 Subject: [PATCH] os/bluestore: introduce multithireading sync for bluestore's repairer In quick-fix mode bluestore uses 2 threads by default to perform the repair. Due to lacking synchronization they might corrupt repair transaction batch. Fixes: https://tracker.ceph.com/issues/50017 Signed-off-by: Igor Fedotov --- src/os/bluestore/BlueStore.cc | 33 ++++++++++++++++++++++++--------- src/os/bluestore/BlueStore.h | 32 +++++++++++++++++++++++++++----- 2 files changed, 51 insertions(+), 14 deletions(-) diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index d03ad90f0172a..4e713b998d991 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -7200,7 +7200,7 @@ int BlueStore::_fsck_check_extents( bs.set(pos); }); if (repairer) { - repairer->get_space_usage_tracker().set_used( e.offset, e.length, cid, oid); + repairer->set_space_used(e.offset, e.length, cid, oid); } if (e.end() > bdev->get_size()) { @@ -7524,8 +7524,11 @@ BlueStore::OnodeRef BlueStore::fsck_check_objects_shallow( << " zombie spanning blob(s) found, the first one: " << *first_broken << dendl; if(repairer) { - auto txn = repairer->fix_spanning_blobs(db); - _record_onode(o, txn); + repairer->fix_spanning_blobs( + db, + [&](KeyValueDB::Transaction txn) { + _record_onode(o, txn); + }); } } } @@ -8290,7 +8293,7 @@ int BlueStore::_fsck_on_open(BlueStore::FSCKDepth depth, bool repair) if (repair) { - repairer.get_space_usage_tracker().init( + repairer.init_space_usage_tracker( bdev->get_size(), min_alloc_size); } @@ -8449,7 +8452,6 @@ int BlueStore::_fsck_on_open(BlueStore::FSCKDepth depth, bool repair) if (repair && repairer.preprocess_misreference(db)) { dout(1) << __func__ << " sorting out misreferenced extents" << dendl; - auto& space_tracker = repairer.get_space_usage_tracker(); auto& misref_extents = repairer.get_misreferences(); interval_set to_release; it = db->get_iterator(PREFIX_OBJ, KeyValueDB::ITERATOR_NOCACHE); @@ -8471,7 +8473,7 @@ int BlueStore::_fsck_on_open(BlueStore::FSCKDepth depth, bool repair) ghobject_t oid; int r = get_key_object(it->key(), &oid); - if (r < 0 || !space_tracker.is_used(oid)) { + if (r < 0 || !repairer.is_used(oid)) { continue; } @@ -8494,7 +8496,7 @@ int BlueStore::_fsck_on_open(BlueStore::FSCKDepth depth, bool repair) expected_statfs = &expected_pool_statfs[pool_id]; } } - if (!space_tracker.is_used(c->cid)) { + if (!repairer.is_used(c->cid)) { continue; } @@ -16165,6 +16167,7 @@ bool BlueStoreRepairer::remove_key(KeyValueDB *db, const string& prefix, const string& key) { + std::lock_guard l(lock); if (!remove_key_txn) { remove_key_txn = db->get_transaction(); } @@ -16176,6 +16179,8 @@ bool BlueStoreRepairer::remove_key(KeyValueDB *db, void BlueStoreRepairer::fix_per_pool_omap(KeyValueDB *db, int val) { + std::lock_guard l(lock); // possibly redundant + ceph_assert(fix_per_pool_omap_txn == nullptr); fix_per_pool_omap_txn = db->get_transaction(); ++to_repair_cnt; bufferlist bl; @@ -16188,6 +16193,7 @@ bool BlueStoreRepairer::fix_shared_blob( uint64_t sbid, const bufferlist* bl) { + std::lock_guard l(lock); // possibly redundant KeyValueDB::Transaction txn; if (fix_misreferences_txn) { // reuse this txn txn = fix_misreferences_txn; @@ -16213,6 +16219,7 @@ bool BlueStoreRepairer::fix_statfs(KeyValueDB *db, const string& key, const store_statfs_t& new_statfs) { + std::lock_guard l(lock); if (!fix_statfs_txn) { fix_statfs_txn = db->get_transaction(); } @@ -16229,6 +16236,7 @@ bool BlueStoreRepairer::fix_leaked(KeyValueDB *db, FreelistManager* fm, uint64_t offset, uint64_t len) { + std::lock_guard l(lock); if (!fix_fm_leaked_txn) { fix_fm_leaked_txn = db->get_transaction(); } @@ -16240,6 +16248,7 @@ bool BlueStoreRepairer::fix_false_free(KeyValueDB *db, FreelistManager* fm, uint64_t offset, uint64_t len) { + std::lock_guard l(lock); if (!fix_fm_false_free_txn) { fix_fm_false_free_txn = db->get_transaction(); } @@ -16248,17 +16257,22 @@ bool BlueStoreRepairer::fix_false_free(KeyValueDB *db, return true; } -KeyValueDB::Transaction BlueStoreRepairer::fix_spanning_blobs(KeyValueDB* db) +bool BlueStoreRepairer::fix_spanning_blobs( + KeyValueDB* db, + std::function f) { + std::lock_guard l(lock); if (!fix_onode_txn) { fix_onode_txn = db->get_transaction(); } + f(fix_onode_txn); ++to_repair_cnt; - return fix_onode_txn; + return true; } bool BlueStoreRepairer::preprocess_misreference(KeyValueDB *db) { + //NB: not for use in multithreading mode!!! if (misreferenced_extents.size()) { size_t n = space_usage_tracker.filter_out(misreferenced_extents); ceph_assert(n > 0); @@ -16272,6 +16286,7 @@ bool BlueStoreRepairer::preprocess_misreference(KeyValueDB *db) unsigned BlueStoreRepairer::apply(KeyValueDB* db) { + //NB: not for use in multithreading mode!!! if (fix_per_pool_omap_txn) { auto ok = db->submit_transaction_sync(fix_per_pool_omap_txn) == 0; ceph_assert(ok); diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 82bdf1a724ca9..5b37126b0d3b7 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -3485,6 +3485,8 @@ static inline void intrusive_ptr_release(BlueStore::OpSequencer *o) { class BlueStoreRepairer { + ceph::mutex lock = ceph::make_mutex("BlueStore::BlueStoreRepairer::lock"); + public: // to simplify future potential migration to mempools using fsck_interval = interval_set; @@ -3626,15 +3628,16 @@ public: bool fix_false_free(KeyValueDB *db, FreelistManager* fm, uint64_t offset, uint64_t len); - KeyValueDB::Transaction fix_spanning_blobs(KeyValueDB* db); - - void init(uint64_t total_space, uint64_t lres_tracking_unit_size); + bool fix_spanning_blobs( + KeyValueDB* db, + std::function f); bool preprocess_misreference(KeyValueDB *db); unsigned apply(KeyValueDB* db); void note_misreference(uint64_t offs, uint64_t len, bool inc_error) { + std::lock_guard l(lock); misreferenced_extents.union_insert(offs, len); if (inc_error) { ++to_repair_cnt; @@ -3645,13 +3648,32 @@ public: ++to_repair_cnt; } - StoreSpaceTracker& get_space_usage_tracker() { - return space_usage_tracker; + void init_space_usage_tracker( + uint64_t total_space, uint64_t lres_tracking_unit_size) + { + //NB: not for use in multithreading mode!!! + space_usage_tracker.init(total_space, lres_tracking_unit_size); + } + void set_space_used(uint64_t offset, uint64_t len, + const coll_t& cid, const ghobject_t& oid) { + std::lock_guard l(lock); + space_usage_tracker.set_used(offset, len, cid, oid); } + inline bool is_used(const coll_t& cid) const { + //NB: not for use in multithreading mode!!! + return space_usage_tracker.is_used(cid); + } + inline bool is_used(const ghobject_t& oid) const { + //NB: not for use in multithreading mode!!! + return space_usage_tracker.is_used(oid); + } + const fsck_interval& get_misreferences() const { + //NB: not for use in multithreading mode!!! return misreferenced_extents; } KeyValueDB::Transaction get_fix_misreferences_txn() { + //NB: not for use in multithreading mode!!! return fix_misreferences_txn; } -- 2.39.5