From: Igor Fedotov Date: Thu, 12 Nov 2020 17:04:45 +0000 (+0300) Subject: os/bluestore: detect and fix "zombie" spanning blobs using fsck. X-Git-Tag: v14.2.17~12^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=95dc864840e3f405386acdf312c7505eb3daebc9;p=ceph.git os/bluestore: detect and fix "zombie" spanning blobs using fsck. Signed-off-by: Igor Fedotov (cherry picked from commit 18d120dc2c15c0cf8d3b27ddba3c7f2057856555) Conflicts: (trivial) src/os/bluestore/BlueStore.cc (trivial) src/os/bluestore/BlueStore.h --- diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 2697b514e0bf..ad4be3d32654 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -2279,7 +2279,7 @@ BlueStore::OldExtent* BlueStore::OldExtent::create(CollectionRef c, BlobRef& b) { OldExtent* oe = new OldExtent(lo, o, l, b); b->put_ref(c.get(), o, l, &(oe->r)); - oe->blob_empty = b->get_referenced_bytes() == 0; + oe->blob_empty = !b->is_referenced(); return oe; } @@ -7450,6 +7450,35 @@ BlueStore::OnodeRef BlueStore::fsck_check_objects_shallow( *res_statfs); } } // for (auto& i : ref_map) + + { + auto &sbm = o->extent_map.spanning_blob_map; + size_t broken = 0; + BlobRef first_broken; + for (auto it = sbm.begin(); it != sbm.end();) { + auto it1 = it++; + if (ref_map.count(it1->second) == 0) { + if (!broken) { + first_broken = it1->second; + ++errors; + } + broken++; + if (repairer) { + sbm.erase(it1); + } + } + } + if (broken) { + derr << "fsck error: " << oid << " - " << broken + << " zombie spanning blob(s) found, the first one: " + << *first_broken << dendl; + if(repairer) { + auto txn = repairer->fix_spanning_blobs(db); + _record_onode(o, txn); + } + } + } + return o; } @@ -8842,6 +8871,30 @@ void BlueStore::inject_misreference(coll_t cid1, ghobject_t oid1, db->submit_transaction_sync(txn); } +void BlueStore::inject_zombie_spanning_blob(coll_t cid, ghobject_t oid, + int16_t blob_id) +{ + OnodeRef o; + CollectionRef c = _get_collection(cid); + ceph_assert(c); + { + RWLock::WLocker l(c->lock); // just to avoid internal asserts + o = c->get_onode(oid, false); + ceph_assert(o); + o->extent_map.fault_range(db, 0, OBJECT_MAX_SIZE); + } + + BlobRef b = c->new_blob(); + b->id = blob_id; + o->extent_map.spanning_blob_map[blob_id] = b; + + KeyValueDB::Transaction txn; + txn = db->get_transaction(); + + _record_onode(o, txn); + db->submit_transaction_sync(txn); +} + void BlueStore::collect_metadata(map *pm) { dout(10) << __func__ << dendl; @@ -14997,6 +15050,14 @@ bool BlueStoreRepairer::fix_bluefs_extents(std::atomic& out_of_sync_fl ++to_repair_cnt; return true; } +KeyValueDB::Transaction BlueStoreRepairer::fix_spanning_blobs(KeyValueDB* db) +{ + if (!fix_onode_txn) { + fix_onode_txn = db->get_transaction(); + } + ++to_repair_cnt; + return fix_onode_txn; +} bool BlueStoreRepairer::preprocess_misreference(KeyValueDB *db) { @@ -15029,6 +15090,10 @@ unsigned BlueStoreRepairer::apply(KeyValueDB* db) db->submit_transaction_sync(fix_misreferences_txn); fix_misreferences_txn = nullptr; } + if (fix_onode_txn) { + db->submit_transaction_sync(fix_onode_txn); + fix_onode_txn = nullptr; + } if (fix_shared_blob_txn) { db->submit_transaction_sync(fix_shared_blob_txn); fix_shared_blob_txn = nullptr; diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index d64d5584c2b6..0ef5e8f27e07 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -2721,6 +2721,8 @@ public: void inject_misreference(coll_t cid1, ghobject_t oid1, coll_t cid2, ghobject_t oid2, uint64_t offset); + void inject_zombie_spanning_blob(coll_t cid, ghobject_t oid, int16_t blob_id); + // resets global per_pool_omap in DB void compact() override { ceph_assert(db); @@ -3352,6 +3354,7 @@ public: FreelistManager* fm, uint64_t offset, uint64_t len); bool fix_bluefs_extents(std::atomic& out_of_sync_flag); + KeyValueDB::Transaction fix_spanning_blobs(KeyValueDB* db); void init(uint64_t total_space, uint64_t lres_tracking_unit_size); @@ -3388,6 +3391,7 @@ private: KeyValueDB::Transaction fix_shared_blob_txn; KeyValueDB::Transaction fix_misreferences_txn; + KeyValueDB::Transaction fix_onode_txn; StoreSpaceTracker space_usage_tracker; diff --git a/src/test/objectstore/store_test.cc b/src/test/objectstore/store_test.cc index 47fb19ee8295..55a3e4268a22 100644 --- a/src/test/objectstore/store_test.cc +++ b/src/test/objectstore/store_test.cc @@ -7486,13 +7486,41 @@ TEST_P(StoreTestSpecificAUSize, BluestoreRepairTest) { ASSERT_EQ(bstore->fsck(false), 0); } + cerr << "Zombie spanning blob" << std::endl; + { + bstore->mount(); + ghobject_t hoid4 = make_object("Object 4", pool); + auto ch = store->open_collection(cid); + { + bufferlist bl; + string s(0x1000, 'a'); + bl.append(s); + ObjectStore::Transaction t; + for(size_t i = 0; i < 0x10; i++) { + t.write(cid, hoid4, i * bl.length(), bl.length(), bl); + } + r = queue_transaction(store, ch, std::move(t)); + ASSERT_EQ(r, 0); + } + sleep(5); + { + bstore->inject_zombie_spanning_blob(cid, hoid4, 12345); + bstore->inject_zombie_spanning_blob(cid, hoid4, 23456); + bstore->inject_zombie_spanning_blob(cid, hoid4, 23457); + } + + bstore->umount(); + ASSERT_EQ(bstore->fsck(false), 1); + ASSERT_LE(bstore->repair(false), 0); + ASSERT_EQ(bstore->fsck(false), 0); + } + cerr << "Completing" << std::endl; bstore->mount(); } -TEST_P(StoreTest, BluestoreRepairGlobalStats) -{ +TEST_P(StoreTest, BluestoreRepairGlobalStats) { if (string(GetParam()) != "bluestore") return; const size_t offs_base = 65536 / 2; @@ -7554,8 +7582,7 @@ TEST_P(StoreTest, BluestoreRepairGlobalStats) bstore->mount(); } -TEST_P(StoreTest, BluestoreRepairGlobalStatsFixOnMount) -{ +TEST_P(StoreTest, BluestoreRepairGlobalStatsFixOnMount) { if (string(GetParam()) != "bluestore") return; const size_t offs_base = 65536 / 2;