From 18d120dc2c15c0cf8d3b27ddba3c7f2057856555 Mon Sep 17 00:00:00 2001 From: Igor Fedotov Date: Thu, 12 Nov 2020 20:04:45 +0300 Subject: [PATCH] os/bluestore: detect and fix "zombie" spanning blobs using fsck. Signed-off-by: Igor Fedotov --- src/os/bluestore/BlueStore.cc | 67 +++++++++++++++++++++++++++++- src/os/bluestore/BlueStore.h | 3 ++ src/test/objectstore/store_test.cc | 35 ++++++++++++++-- 3 files changed, 100 insertions(+), 5 deletions(-) diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index ddf98cedfe0f0..7a2fe9635d8c3 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -2348,7 +2348,7 @@ BlueStore::OldExtent* BlueStore::OldExtent::create(CollectionRef c, BlobRef& b) { OldExtent* oe = new OldExtent(lo, o, l, b); b->put_ref(c.get(), o, l, &(oe->r)); - oe->blob_empty = b->get_referenced_bytes() == 0; + oe->blob_empty = !b->is_referenced(); return oe; } @@ -7452,6 +7452,34 @@ BlueStore::OnodeRef BlueStore::fsck_check_objects_shallow( } } // for (auto& i : ref_map) + { + auto &sbm = o->extent_map.spanning_blob_map; + size_t broken = 0; + BlobRef first_broken; + for (auto it = sbm.begin(); it != sbm.end();) { + auto it1 = it++; + if (ref_map.count(it1->second) == 0) { + if (!broken) { + first_broken = it1->second; + ++errors; + } + broken++; + if (repairer) { + sbm.erase(it1); + } + } + } + if (broken) { + derr << "fsck error: " << oid << " - " << broken + << " zombie spanning blob(s) found, the first one: " + << *first_broken << dendl; + if(repairer) { + auto txn = repairer->fix_spanning_blobs(db); + _record_onode(o, txn); + } + } + } + if (o->onode.has_omap()) { _fsck_check_object_omap(depth, o, ctx); } @@ -8975,6 +9003,30 @@ void BlueStore::inject_misreference(coll_t cid1, ghobject_t oid1, db->submit_transaction_sync(txn); } +void BlueStore::inject_zombie_spanning_blob(coll_t cid, ghobject_t oid, + int16_t blob_id) +{ + OnodeRef o; + CollectionRef c = _get_collection(cid); + ceph_assert(c); + { + std::unique_lock l{ c->lock }; // just to avoid internal asserts + o = c->get_onode(oid, false); + ceph_assert(o); + o->extent_map.fault_range(db, 0, OBJECT_MAX_SIZE); + } + + BlobRef b = c->new_blob(); + b->id = blob_id; + o->extent_map.spanning_blob_map[blob_id] = b; + + KeyValueDB::Transaction txn; + txn = db->get_transaction(); + + _record_onode(o, txn); + db->submit_transaction_sync(txn); +} + void BlueStore::collect_metadata(map *pm) { dout(10) << __func__ << dendl; @@ -16171,6 +16223,15 @@ bool BlueStoreRepairer::fix_false_free(KeyValueDB *db, return true; } +KeyValueDB::Transaction BlueStoreRepairer::fix_spanning_blobs(KeyValueDB* db) +{ + if (!fix_onode_txn) { + fix_onode_txn = db->get_transaction(); + } + ++to_repair_cnt; + return fix_onode_txn; +} + bool BlueStoreRepairer::preprocess_misreference(KeyValueDB *db) { if (misreferenced_extents.size()) { @@ -16206,6 +16267,10 @@ unsigned BlueStoreRepairer::apply(KeyValueDB* db) db->submit_transaction_sync(fix_misreferences_txn); fix_misreferences_txn = nullptr; } + if (fix_onode_txn) { + db->submit_transaction_sync(fix_onode_txn); + fix_onode_txn = nullptr; + } if (fix_shared_blob_txn) { db->submit_transaction_sync(fix_shared_blob_txn); fix_shared_blob_txn = nullptr; diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 14c06a5c26ebf..a251b82f7bf2c 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -2998,6 +2998,7 @@ public: void inject_misreference(coll_t cid1, ghobject_t oid1, coll_t cid2, ghobject_t oid2, uint64_t offset); + void inject_zombie_spanning_blob(coll_t cid, ghobject_t oid, int16_t blob_id); // resets global per_pool_omap in DB void inject_legacy_omap(); // resets per_pool_omap | pgmeta_omap for onode @@ -3631,6 +3632,7 @@ public: bool fix_false_free(KeyValueDB *db, FreelistManager* fm, uint64_t offset, uint64_t len); + KeyValueDB::Transaction fix_spanning_blobs(KeyValueDB* db); void init(uint64_t total_space, uint64_t lres_tracking_unit_size); @@ -3669,6 +3671,7 @@ private: KeyValueDB::Transaction fix_shared_blob_txn; KeyValueDB::Transaction fix_misreferences_txn; + KeyValueDB::Transaction fix_onode_txn; StoreSpaceTracker space_usage_tracker; diff --git a/src/test/objectstore/store_test.cc b/src/test/objectstore/store_test.cc index 596b0d1674bd3..d643d90c6ae5b 100644 --- a/src/test/objectstore/store_test.cc +++ b/src/test/objectstore/store_test.cc @@ -8305,13 +8305,41 @@ TEST_P(StoreTestSpecificAUSize, BluestoreRepairTest) { ASSERT_EQ(bstore->fsck(false), 0); } + cerr << "Zombie spanning blob" << std::endl; + { + bstore->mount(); + ghobject_t hoid4 = make_object("Object 4", pool); + auto ch = store->open_collection(cid); + { + bufferlist bl; + string s(0x1000, 'a'); + bl.append(s); + ObjectStore::Transaction t; + for(size_t i = 0; i < 0x10; i++) { + t.write(cid, hoid4, i * bl.length(), bl.length(), bl); + } + r = queue_transaction(store, ch, std::move(t)); + ASSERT_EQ(r, 0); + } + sleep(5); + { + bstore->inject_zombie_spanning_blob(cid, hoid4, 12345); + bstore->inject_zombie_spanning_blob(cid, hoid4, 23456); + bstore->inject_zombie_spanning_blob(cid, hoid4, 23457); + } + + bstore->umount(); + ASSERT_EQ(bstore->fsck(false), 1); + ASSERT_LE(bstore->repair(false), 0); + ASSERT_EQ(bstore->fsck(false), 0); + } + cerr << "Completing" << std::endl; bstore->mount(); } -TEST_P(StoreTest, BluestoreRepairGlobalStats) -{ +TEST_P(StoreTest, BluestoreRepairGlobalStats) { if (string(GetParam()) != "bluestore") return; const size_t offs_base = 65536 / 2; @@ -8373,8 +8401,7 @@ TEST_P(StoreTest, BluestoreRepairGlobalStats) bstore->mount(); } -TEST_P(StoreTest, BluestoreRepairGlobalStatsFixOnMount) -{ +TEST_P(StoreTest, BluestoreRepairGlobalStatsFixOnMount) { if (string(GetParam()) != "bluestore") return; const size_t offs_base = 65536 / 2; -- 2.39.5