From 5ce6bf7729b30d8709ac7c33f46688ee4c7bf43c Mon Sep 17 00:00:00 2001 From: Ronen Friedman Date: Thu, 30 Jan 2025 03:27:58 -0600 Subject: [PATCH] osd/scrub: discard repair_oinfo_oid() repair_oinfo_oid(), called every scrub, has a very specific functionality: fix the object ID specified in the Object Info attribute, if different from the ID of the owning object. This fix was added in 2017, as a response to a unique failure scenario that was observed in Sepia - probably following a filesystem bug. See https://tracker.ceph.com/issues/18409 & https://tracker.ceph.com/issues/20471. The limited functionality of repair_oinfo_oid() - only repairing this one specific issue, and only if the OI_ATTR exists and is decodable - does not justify the overhead of running it every scrub. (cherry picked from commit aa22f19831731185e3c115a2b4e5603e8ef2634f) Signed-off-by: Ronen Friedman --- src/osd/scrubber/pg_scrubber.cc | 56 ++----------------------------- src/osd/scrubber/pg_scrubber.h | 2 -- src/osd/scrubber/scrub_backend.cc | 13 +++++-- 3 files changed, 13 insertions(+), 58 deletions(-) diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc index 893e59e74d8..dfb24d3df1d 100644 --- a/src/osd/scrubber/pg_scrubber.cc +++ b/src/osd/scrubber/pg_scrubber.cc @@ -1243,63 +1243,13 @@ int PgScrubber::build_scrub_map_chunk(ScrubMap& map, } // finish - dout(20) << __func__ << " finishing" << dendl; ceph_assert(pos.done()); - repair_oinfo_oid(map); - - dout(20) << __func__ << " done, got " << map.objects.size() << " items" - << dendl; + dout(20) << fmt::format("{}: done. {} objects in scrub-map", __func__, + map.objects.size()) + << dendl; return 0; } -/// \todo consider moving repair_oinfo_oid() back to the backend -void PgScrubber::repair_oinfo_oid(ScrubMap& smap) -{ - for (auto i = smap.objects.rbegin(); i != smap.objects.rend(); ++i) { - - const hobject_t& hoid = i->first; - ScrubMap::object& o = i->second; - - if (o.attrs.find(OI_ATTR) == o.attrs.end()) { - continue; - } - bufferlist bl; - bl.push_back(o.attrs[OI_ATTR]); - object_info_t oi; - try { - oi.decode(bl); - } catch (...) { - continue; - } - - if (oi.soid != hoid) { - ObjectStore::Transaction t; - OSDriver::OSTransaction _t(m_pg->osdriver.get_transaction(&t)); - - m_osds->clog->error() - << "osd." << m_pg_whoami << " found object info error on pg " << m_pg_id - << " oid " << hoid << " oid in object info: " << oi.soid - << "...repaired"; - // Fix object info - oi.soid = hoid; - bl.clear(); - encode(oi, - bl, - m_pg->get_osdmap()->get_features(CEPH_ENTITY_TYPE_OSD, nullptr)); - - bufferptr bp(bl.c_str(), bl.length()); - o.attrs[OI_ATTR] = bp; - - t.setattr(m_pg->coll, ghobject_t(hoid), OI_ATTR, bl); - int r = m_pg->osd->store->queue_transaction(m_pg->ch, std::move(t)); - if (r != 0) { - derr << __func__ << ": queue_transaction got " << cpp_strerror(r) - << dendl; - } - } - } -} - void PgScrubber::run_callbacks() { diff --git a/src/osd/scrubber/pg_scrubber.h b/src/osd/scrubber/pg_scrubber.h index f172f5fe9bd..e1641ec3109 100644 --- a/src/osd/scrubber/pg_scrubber.h +++ b/src/osd/scrubber/pg_scrubber.h @@ -774,8 +774,6 @@ class PgScrubber : public ScrubPgIF, epoch_t m_interval_start{0}; ///< interval's 'from' of when scrubbing was ///< first scheduled - void repair_oinfo_oid(ScrubMap& smap); - /* * the exact epoch when the scrubbing actually started (started here - cleared * checks for no-scrub conf). Incoming events are verified against this, with diff --git a/src/osd/scrubber/scrub_backend.cc b/src/osd/scrubber/scrub_backend.cc index e25c5b99da0..c351723ee6b 100644 --- a/src/osd/scrubber/scrub_backend.cc +++ b/src/osd/scrubber/scrub_backend.cc @@ -721,10 +721,17 @@ shard_as_auth_t ScrubBackend::possible_auth_shard(const hobject_t& obj, return shard_as_auth_t{errstream.str()}; } } - } - // This is automatically corrected in repair_oinfo_oid() - ceph_assert(oi.soid == obj); + if (!dup_error_cond(err, + false, + (oi.soid != obj), + shard_info, + &shard_info_wrapper::set_info_corrupted, + "candidate info oid mismatch"sv, + errstream)) { + return shard_as_auth_t{errstream.str()}; + } + } if (test_error_cond(smap_obj.size != logical_to_ondisk_size(oi.size), shard_info, -- 2.39.5