From 397992439ff7ae76475f4a3789ad0673d16b8c99 Mon Sep 17 00:00:00 2001 From: Ronen Friedman Date: Mon, 31 Jan 2022 13:25:58 +0000 Subject: [PATCH] osd/scrub: make the scrubber-be the sole owner of the authoritative set simplifying the interfaces used by the scrubber backend. Signed-off-by: Ronen Friedman --- src/osd/scrubber/pg_scrubber.cc | 41 +++++++++++++++++++++++++------ src/osd/scrubber/scrub_backend.cc | 29 +++++++++++----------- src/osd/scrubber/scrub_backend.h | 2 ++ 3 files changed, 50 insertions(+), 22 deletions(-) diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc index 59db35e0910..f1f0e295be0 100644 --- a/src/osd/scrubber/pg_scrubber.cc +++ b/src/osd/scrubber/pg_scrubber.cc @@ -1626,7 +1626,8 @@ void PgScrubber::scrub_finish() // if the repair request comes from auto-repair and large number of errors, // we would like to cancel auto-repair if (m_is_repair && m_flags.auto_repair && - m_authoritative.size() > m_pg->cct->_conf->osd_scrub_auto_repair_num_errors) { + m_be->authoritative_peers_count() > + static_cast(m_pg->cct->_conf->osd_scrub_auto_repair_num_errors)) { dout(10) << __func__ << " undoing the repair" << dendl; state_clear(PG_STATE_REPAIR); // not expected to be set, anyway @@ -1636,10 +1637,12 @@ void PgScrubber::scrub_finish() m_be->update_repair_status(m_is_repair); - // if a regular scrub had errors within the limit, do a deep scrub to auto repair + // if a regular scrub had errors within the limit, do a deep scrub to auto + // repair bool do_auto_scrub = false; - if (m_flags.deep_scrub_on_error && !m_authoritative.empty() && - m_authoritative.size() <= m_pg->cct->_conf->osd_scrub_auto_repair_num_errors) { + if (m_flags.deep_scrub_on_error && m_be->authoritative_peers_count() && + m_be->authoritative_peers_count() <= + static_cast(m_pg->cct->_conf->osd_scrub_auto_repair_num_errors)) { ceph_assert(!m_is_deep); do_auto_scrub = true; dout(15) << __func__ << " Try to auto repair after scrub errors" << dendl; @@ -1650,9 +1653,34 @@ void PgScrubber::scrub_finish() // type-specific finish (can tally more errors) _scrub_finish(); + /// \todo fix the relevant scrub test so that we would not need the extra log + /// line here (even if the following 'if' is false) + + if (m_be->authoritative_peers_count()) { + + auto err_msg = fmt::format("{} {} {} missing, {} inconsistent objects", + m_pg->info.pgid, + m_mode_desc, + m_be->m_missing.size(), + m_be->m_inconsistent.size()); + + dout(2) << err_msg << dendl; + m_osds->clog->error() << fmt::to_string(err_msg); + } + // note that the PG_STATE_REPAIR might have changed above - m_fixed_count += m_be->scrub_process_inconsistent(); - bool has_error = !m_authoritative.empty() && m_is_repair; + if (m_be->authoritative_peers_count() && m_is_repair) { + + state_clear(PG_STATE_CLEAN); + // we know we have a problem, so it's OK to set the user-visible flag + // even if we only reached here via auto-repair + state_set(PG_STATE_REPAIR); + update_op_mode_text(); + m_be->update_repair_status(true); + m_fixed_count += m_be->scrub_process_inconsistent(); + } + + bool has_error = (m_be->authoritative_peers_count() > 0) && m_is_repair; { stringstream oss; @@ -2103,7 +2131,6 @@ void PgScrubber::reset_internal_state() run_callbacks(); - m_authoritative.clear(); num_digest_updates_pending = 0; m_primary_scrubmap_pos.reset(); replica_scrubmap = ScrubMap{}; diff --git a/src/osd/scrubber/scrub_backend.cc b/src/osd/scrubber/scrub_backend.cc index d19c39782ca..d8d00cbed0a 100644 --- a/src/osd/scrubber/scrub_backend.cc +++ b/src/osd/scrubber/scrub_backend.cc @@ -268,8 +268,8 @@ void ScrubBackend::omap_checks() /* * update_authoritative() updates: - # - * - m_scrubber.m_authoritative: adds obj-> list of pairs < scrub-map, shard> + * + * - m_auth_peers: adds obj-> list of pairs < scrub-map, shard> * * - m_cleaned_meta_map: replaces [obj] entry with: * the relevant object in the scrub-map of the "selected" (back-most) peer @@ -284,20 +284,18 @@ void ScrubBackend::update_authoritative() compare_smaps(); // note: might cluster-log errors - /// \todo try replacing with algorithm-based code - - // update the scrubber object's m_authoritative with the list of good + // update the session-wide m_auth_peers with the list of good // peers for each object (i.e. the ones that are in this_chunks's auth list) for (auto& [obj, peers] : this_chunk->authoritative) { - list> good_peers; + auth_peers_t good_peers; for (auto& peer : peers) { good_peers.emplace_back(this_chunk->received_maps[peer].objects[obj], peer); } - m_scrubber.m_authoritative.emplace(obj, good_peers); + m_auth_peers.emplace(obj, std::move(good_peers)); } for (const auto& [obj, peers] : this_chunk->authoritative) { @@ -360,13 +358,11 @@ int ScrubBackend::scrub_process_inconsistent() __func__, m_mode_desc, m_repair, - m_scrubber.m_authoritative.size()) + m_auth_peers.size()) << dendl; + ceph_assert(!m_auth_peers.empty()); // authoritative only store objects which are missing or inconsistent. - if (m_scrubber.m_authoritative.empty()) { - return 0; - } // some tests expect an error message that does not contain the __func__ and // PG: @@ -847,7 +843,8 @@ std::optional ScrubBackend::compare_obj_in_maps(const hobject_t& ho ++m_scrubber.m_shallow_errors; } - m_scrubber.m_store->add_object_error(ho.pool, object_error); + //m_scrubber.m_store->add_object_error(ho.pool, object_error); + this_chunk->m_inconsistent_objs.push_back(std::move(object_error)); return fmt::format("{} soid {} : failed to pick suitable object info\n", m_scrubber.m_pg_id.pgid, ho); @@ -1586,7 +1583,7 @@ void ScrubBackend::scrub_snapshot_metadata(ScrubMap& map) } ++m_scrubber.m_shallow_errors; soid_error.set_headless(); - m_scrubber.m_store->add_snap_error(pool.id, soid_error); + this_chunk->m_inconsistent_objs.push_back(std::move(soid_error)); ++soid_error_count; if (head && soid.get_head() == head->get_head()) head_error.set_clone(soid.snap); @@ -1604,8 +1601,10 @@ void ScrubBackend::scrub_snapshot_metadata(ScrubMap& map) } // Save previous head error information - if (head && (head_error.errors || soid_error_count)) - m_scrubber.m_store->add_snap_error(pool.id, head_error); + if (head && (head_error.errors || soid_error_count)) { + this_chunk->m_inconsistent_objs.push_back(std::move(head_error)); + } + // Set this as a new head object head = soid; missing = 0; diff --git a/src/osd/scrubber/scrub_backend.h b/src/osd/scrubber/scrub_backend.h index c67a1a43acc..6cdcb1e02f5 100644 --- a/src/osd/scrubber/scrub_backend.h +++ b/src/osd/scrubber/scrub_backend.h @@ -352,6 +352,8 @@ class ScrubBackend { const omap_stat_t& this_scrub_omapstats() const { return m_omap_stats; } + int authoritative_peers_count() const { return m_auth_peers.size(); }; + std::ostream& logger_prefix(std::ostream* _dout, const ScrubBackend* t); private: -- 2.39.5