From f087679af5af13de4e282f3f01eae6dab6684ad4 Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Thu, 18 Apr 2024 15:15:25 -0700 Subject: [PATCH] osd/PGBackend::be_scan_list: only call stat, getattrs once per object We call back into be_scan_list repeatedly for the same object in deep scrub if the object is too large/has too many omap entries to be done in a single call. This causes us to wastefully call ObjectStore::getattrs and stat multiple times. This has been a long standing, but mostly harmless bug (aside from the extra metadata traffic). However, 1a4d3f018, between reef and squid, switched ScrubMap::object::attrs to a map from a map. This should have been harmless, except that the ObjectStore::getattrs overload for that type uses aset[i->first].append(i->second); rather than aset.emplace(i.first.c_str(), i.second); to populate the map. Combined with this bug, that means that if we need 7 calls into be_scan_list to deep scrub an object, we'll end up with attributes concatenated 7 times each. This didn't cause visible problems with squid/main testing since all of the replicas would end up doing the same thing and they'd still decode ok, but it did become visible during reef->squid upgrade testing as the older osds sent maps with the correct contents. The next commit will fix ObjectStore::getattrs to clear the map. Fixes: https://tracker.ceph.com/issues/65185 Signed-off-by: Samuel Just (cherry picked from commit 5671e8555eddc622a6edb926e67dadce79e6add5) --- src/osd/PGBackend.cc | 75 +++++++++++++++++++++++++++----------------- src/osd/osd_types.h | 3 ++ 2 files changed, 50 insertions(+), 28 deletions(-) diff --git a/src/osd/PGBackend.cc b/src/osd/PGBackend.cc index 2d354dee282..8f0c9486ca0 100644 --- a/src/osd/PGBackend.cc +++ b/src/osd/PGBackend.cc @@ -615,42 +615,61 @@ int PGBackend::be_scan_list( ceph_assert(pos.pos < pos.ls.size()); hobject_t& poid = pos.ls[pos.pos]; - struct stat st; - int r = store->stat( - ch, - ghobject_t( - poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard), - &st, - true); - if (r == 0) { - ScrubMap::object &o = map.objects[poid]; - o.size = st.st_size; - ceph_assert(!o.negative); - store->getattrs( + int r = 0; + ScrubMap::object &o = map.objects[poid]; + if (!pos.metadata_done) { + struct stat st; + r = store->stat( ch, ghobject_t( poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard), - o.attrs); + &st, + true); + + if (r == 0) { + o.size = st.st_size; + ceph_assert(!o.negative); + r = store->getattrs( + ch, + ghobject_t( + poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard), + o.attrs); + } - if (pos.deep) { - r = be_deep_scrub(poid, map, pos, o); + if (r == -ENOENT) { + dout(25) << __func__ << " " << poid << " got " << r + << ", removing from map" << dendl; + map.objects.erase(poid); + } else if (r == -EIO) { + dout(25) << __func__ << " " << poid << " got " << r + << ", stat_error" << dendl; + o.stat_error = true; + } else if (r != 0) { + derr << __func__ << " got: " << cpp_strerror(r) << dendl; + ceph_abort(); + } + + if (r != 0) { + dout(25) << __func__ << " " << poid << " got " << r + << ", skipping" << dendl; + pos.next_object(); + return 0; } + dout(25) << __func__ << " " << poid << dendl; - } else if (r == -ENOENT) { - dout(25) << __func__ << " " << poid << " got " << r - << ", skipping" << dendl; - } else if (r == -EIO) { - dout(25) << __func__ << " " << poid << " got " << r - << ", stat_error" << dendl; - ScrubMap::object &o = map.objects[poid]; - o.stat_error = true; - } else { - derr << __func__ << " got: " << cpp_strerror(r) << dendl; - ceph_abort(); + pos.metadata_done = true; } - if (r == -EINPROGRESS) { - return -EINPROGRESS; + + if (pos.deep) { + r = be_deep_scrub(poid, map, pos, o); + if (r == -EINPROGRESS) { + return -EINPROGRESS; + } else if (r != 0) { + derr << __func__ << " be_deep_scrub got: " << cpp_strerror(r) << dendl; + ceph_abort(); + } } + pos.next_object(); return 0; } diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index b44ce7e1968..b283f4b9b2a 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -6298,6 +6298,7 @@ WRITE_CLASS_ENCODER(ScrubMap) struct ScrubMapBuilder { bool deep = false; std::vector ls; + bool metadata_done = false; size_t pos = 0; int64_t data_pos = 0; std::string omap_pos; @@ -6322,6 +6323,7 @@ struct ScrubMapBuilder { void next_object() { ++pos; + metadata_done = false; data_pos = 0; omap_pos.clear(); omap_keys = 0; @@ -6333,6 +6335,7 @@ struct ScrubMapBuilder { if (pos.pos < pos.ls.size()) { out << " " << pos.ls[pos.pos]; } + out << " metadata_done " << pos.metadata_done; if (pos.data_pos < 0) { out << " byte " << pos.data_pos; } -- 2.39.5