From 2852ea77cb455e2e8d26d46d2b27282d19e9220b Mon Sep 17 00:00:00 2001 From: Ronen Friedman Date: Sun, 27 Mar 2022 14:56:16 +0000 Subject: [PATCH] test/scrub/osd: scrubber-related interface changes required to facilitate testing Signed-off-by: Ronen Friedman --- src/crimson/osd/ops_executer.cc | 18 ++--- src/crimson/osd/pg.cc | 6 +- src/crimson/osd/pg.h | 6 +- src/include/types_fmt.h | 16 ++++ src/osd/ECBackend.h | 3 +- src/osd/OSD.cc | 2 +- src/osd/PG.cc | 2 +- src/osd/PG.h | 59 ++++++++------- src/osd/PGBackend.h | 3 +- src/osd/PeeringState.h | 19 ++++- src/osd/ReplicatedBackend.h | 5 +- src/osd/scrubber/pg_scrubber.cc | 61 +++++++++++++-- src/osd/scrubber/pg_scrubber.h | 30 +++++--- src/osd/scrubber/scrub_backend.cc | 120 +++++++++--------------------- src/osd/scrubber/scrub_backend.h | 79 ++++++++++++++++---- src/osd/scrubber_common.h | 37 +++++++++ 16 files changed, 300 insertions(+), 166 deletions(-) diff --git a/src/crimson/osd/ops_executer.cc b/src/crimson/osd/ops_executer.cc index 075225fabb4..ffe7f0eb602 100644 --- a/src/crimson/osd/ops_executer.cc +++ b/src/crimson/osd/ops_executer.cc @@ -611,7 +611,7 @@ OpsExecuter::do_execute_op(OSDOp& osd_op) }); case CEPH_OSD_OP_OMAPSETVALS: #if 0 - if (!pg.get_pool().info.supports_omap()) { + if (!pg.get_pgpool().info.supports_omap()) { return crimson::ct_error::operation_not_supported::make(); } #endif @@ -620,7 +620,7 @@ OpsExecuter::do_execute_op(OSDOp& osd_op) }, true); case CEPH_OSD_OP_OMAPSETHEADER: #if 0 - if (!pg.get_pool().info.supports_omap()) { + if (!pg.get_pgpool().info.supports_omap()) { return crimson::ct_error::operation_not_supported::make(); } #endif @@ -630,7 +630,7 @@ OpsExecuter::do_execute_op(OSDOp& osd_op) }, true); case CEPH_OSD_OP_OMAPRMKEYRANGE: #if 0 - if (!pg.get_pool().info.supports_omap()) { + if (!pg.get_pgpool().info.supports_omap()) { return crimson::ct_error::operation_not_supported::make(); } #endif @@ -639,7 +639,7 @@ OpsExecuter::do_execute_op(OSDOp& osd_op) }, true); case CEPH_OSD_OP_OMAPRMKEYS: /** TODO: Implement supports_omap() - if (!pg.get_pool().info.supports_omap()) { + if (!pg.get_pgpool().info.supports_omap()) { return crimson::ct_error::operation_not_supported::make(); }*/ return do_write_op([&osd_op] (auto& backend, auto& os, auto& txn) { @@ -714,7 +714,7 @@ std::vector OpsExecuter::prepare_transaction( // Defined here because there is a circular dependency between OpsExecuter and PG uint32_t OpsExecuter::get_pool_stripe_width() const { - return pg->get_pool().info.get_stripe_width(); + return pg->get_pgpool().info.get_stripe_width(); } // Defined here because there is a circular dependency between OpsExecuter and PG @@ -908,7 +908,7 @@ static PG::interruptible_future<> do_pgnls( } const auto pg_start = pg.get_pgid().pgid.get_hobj_start(); const auto pg_end = \ - pg.get_pgid().pgid.get_hobj_end(pg.get_pool().info.get_pg_num()); + pg.get_pgid().pgid.get_hobj_end(pg.get_pgpool().info.get_pg_num()); return do_pgnls_common(pg_start, pg_end, pg.get_backend(), @@ -952,7 +952,7 @@ static PG::interruptible_future<> do_pgnls_filtered( return seastar::do_with(std::move(filter), [&, lower_bound=std::move(lower_bound)](auto&& filter) { const auto pg_start = pg.get_pgid().pgid.get_hobj_start(); - const auto pg_end = pg.get_pgid().pgid.get_hobj_end(pg.get_pool().info.get_pg_num()); + const auto pg_end = pg.get_pgid().pgid.get_hobj_end(pg.get_pgpool().info.get_pg_num()); return do_pgnls_common(pg_start, pg_end, pg.get_backend(), @@ -1037,7 +1037,7 @@ static PG::interruptible_future<> do_pgls( } const auto pg_start = pg.get_pgid().pgid.get_hobj_start(); const auto pg_end = - pg.get_pgid().pgid.get_hobj_end(pg.get_pool().info.get_pg_num()); + pg.get_pgid().pgid.get_hobj_end(pg.get_pgpool().info.get_pg_num()); return do_pgls_common(pg_start, pg_end, pg.get_backend(), @@ -1081,7 +1081,7 @@ static PG::interruptible_future<> do_pgls_filtered( return seastar::do_with(std::move(filter), [&, lower_bound=std::move(lower_bound)](auto&& filter) { const auto pg_start = pg.get_pgid().pgid.get_hobj_start(); - const auto pg_end = pg.get_pgid().pgid.get_hobj_end(pg.get_pool().info.get_pg_num()); + const auto pg_end = pg.get_pgid().pgid.get_hobj_end(pg.get_pgpool().info.get_pg_num()); return do_pgls_common(pg_start, pg_end, pg.get_backend(), diff --git a/src/crimson/osd/pg.cc b/src/crimson/osd/pg.cc index 70d49b9e24d..58a42c6fbda 100644 --- a/src/crimson/osd/pg.cc +++ b/src/crimson/osd/pg.cc @@ -331,7 +331,7 @@ void PG::prepare_write(pg_info_t &info, } pglog.write_log_and_missing( t, &km, coll_ref->get_cid(), pgmeta_oid, - peering_state.get_pool().info.require_rollback()); + peering_state.get_pgpool().info.require_rollback()); if (!km.empty()) { t.omap_setkeys(coll_ref->get_cid(), pgmeta_oid, km); } @@ -628,7 +628,7 @@ PG::do_osd_ops_execute( // check for full if ((ox->delta_stats.num_bytes > 0 || ox->delta_stats.num_objects > 0) && - get_pool().info.has_flag(pg_pool_t::FLAG_FULL)) { + get_pgpool().info.has_flag(pg_pool_t::FLAG_FULL)) { const auto& m = ox->get_message(); if (m.get_reqid().name.is_mds() || // FIXME: ignore MDS for now m.has_flag(CEPH_OSD_FLAG_FULL_FORCE)) { @@ -636,7 +636,7 @@ PG::do_osd_ops_execute( } else if (m.has_flag(CEPH_OSD_FLAG_FULL_TRY)) { // they tried, they failed. logger().info(" full, replying to FULL_TRY op"); - if (get_pool().info.has_flag(pg_pool_t::FLAG_FULL_QUOTA)) + if (get_pgpool().info.has_flag(pg_pool_t::FLAG_FULL_QUOTA)) return interruptor::make_ready_future( seastar::now(), OpsExecuter::osd_op_ierrorator::future<>( diff --git a/src/crimson/osd/pg.h b/src/crimson/osd/pg.h index a5398b3df57..77ffdbbde2c 100644 --- a/src/crimson/osd/pg.h +++ b/src/crimson/osd/pg.h @@ -461,8 +461,8 @@ public: return get_info().history.same_interval_since; } - const auto& get_pool() const { - return peering_state.get_pool(); + const auto& get_pgpool() const { + return peering_state.get_pgpool(); } pg_shard_t get_primary() const { return peering_state.get_primary(); @@ -701,7 +701,7 @@ public: interruptible_future> already_complete(const osd_reqid_t& reqid); int get_recovery_op_priority() const { int64_t pri = 0; - get_pool().info.opts.get(pool_opts_t::RECOVERY_OP_PRIORITY, &pri); + get_pgpool().info.opts.get(pool_opts_t::RECOVERY_OP_PRIORITY, &pri); return pri > 0 ? pri : crimson::common::local_conf()->osd_recovery_op_priority; } seastar::future<> mark_unfound_lost(int) { diff --git a/src/include/types_fmt.h b/src/include/types_fmt.h index 96392de2e5e..5f33248a983 100644 --- a/src/include/types_fmt.h +++ b/src/include/types_fmt.h @@ -43,6 +43,22 @@ struct fmt::formatter> { } }; +template +struct fmt::formatter> { + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } + + template + auto format(const std::vector& l, FormatContext& ctx) + { + std::string_view sep = "["; + for (const auto& e : l) { + fmt::format_to(ctx.out(), "{}{}", sep, e); + sep = ","; + } + return fmt::format_to(ctx.out(), "]"); + } +}; + template struct fmt::formatter> { constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } diff --git a/src/osd/ECBackend.h b/src/osd/ECBackend.h index 296c1202afa..d710aacb7de 100644 --- a/src/osd/ECBackend.h +++ b/src/osd/ECBackend.h @@ -675,7 +675,8 @@ public: ScrubMap &map, ScrubMapBuilder &pos, ScrubMap::object &o) override; - uint64_t be_get_ondisk_size(uint64_t logical_size) override { + + uint64_t be_get_ondisk_size(uint64_t logical_size) const final { return sinfo.logical_to_next_chunk_offset(logical_size); } void _failed_push(const hobject_t &hoid, diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 6c437312e05..ef55e440b06 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -9140,7 +9140,7 @@ void OSD::split_pgs( *i, split_bits, i->ps(), - &child->get_pool().info, + &child->get_pgpool().info, rctx.transaction); parent->split_into( i->pgid, diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 3f14c805948..04b4a1e1206 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -215,7 +215,7 @@ PG::PG(OSDService *o, OSDMapRef curmap, curmap, this, this), - pool(recovery_state.get_pool()), + pool(recovery_state.get_pgpool()), info(recovery_state.get_info()) { #ifdef PG_DEBUG_REFS diff --git a/src/osd/PG.h b/src/osd/PG.h index 753d26dd274..889fe96c76b 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -163,18 +163,9 @@ class PGRecoveryStats { * */ -/// Facilitating scrub-realated object access to private PG data -class ScrubberPasskey { -private: - friend class Scrub::ReplicaReservations; - friend class PrimaryLogScrub; - friend class PgScrubber; - ScrubberPasskey() {} - ScrubberPasskey(const ScrubberPasskey&) = default; - ScrubberPasskey& operator=(const ScrubberPasskey&) = delete; -}; - -class PG : public DoutPrefixProvider, public PeeringState::PeeringListener { +class PG : public DoutPrefixProvider, + public PeeringState::PeeringListener, + public Scrub::PgScrubBeListener { friend struct NamedState; friend class PeeringState; friend class PgScrubber; @@ -245,7 +236,7 @@ public: return pg_id; } - const PGPool& get_pool() const { + const PGPool& get_pgpool() const final { return pool; } uint64_t get_last_user_version() const { @@ -261,6 +252,11 @@ public: return info.history.same_interval_since; } + bool is_waiting_for_unreadable_object() const final + { + return !waiting_for_unreadable_object.empty(); + } + static void set_last_scrub_stamp( utime_t t, pg_history_t &history, pg_stat_t &stats) { stats.last_scrub_stamp = t; @@ -346,7 +342,7 @@ public: int get_acting_primary() const { return recovery_state.get_acting_primary(); } - pg_shard_t get_primary() const { + pg_shard_t get_primary() const final { return recovery_state.get_primary(); } const std::vector get_up() const { @@ -1408,20 +1404,29 @@ protected: const pg_info_t &info; -// ScrubberPasskey getters: +// ScrubberPasskey getters/misc: public: - const pg_info_t& get_pg_info(ScrubberPasskey) const { - return info; - } - - OSDService* get_pg_osd(ScrubberPasskey) const { - return osd; - } - - requested_scrub_t& get_planned_scrub(ScrubberPasskey) { - return m_planned_scrub; - } - + const pg_info_t& get_pg_info(ScrubberPasskey) const final { return info; } + + OSDService* get_pg_osd(ScrubberPasskey) const { return osd; } + + requested_scrub_t& get_planned_scrub(ScrubberPasskey) + { + return m_planned_scrub; + } + + void force_object_missing(ScrubberPasskey, + const std::set& peer, + const hobject_t& oid, + eversion_t version) final + { + recovery_state.force_object_missing(peer, oid, version); + } + + uint64_t logical_to_ondisk_size(uint64_t logical_size) const final + { + return get_pgbackend()->be_get_ondisk_size(logical_size); + } }; #endif diff --git a/src/osd/PGBackend.h b/src/osd/PGBackend.h index 945a76eaa9b..29e969f1b24 100644 --- a/src/osd/PGBackend.h +++ b/src/osd/PGBackend.h @@ -586,7 +586,8 @@ typedef std::shared_ptr OSDMapRef; ScrubMapBuilder &pos); virtual uint64_t be_get_ondisk_size( - uint64_t logical_size) = 0; + uint64_t logical_size) const = 0; + virtual int be_deep_scrub( const hobject_t &oid, ScrubMap &map, diff --git a/src/osd/PeeringState.h b/src/osd/PeeringState.h index 3cec0f2daa3..cdf762b7816 100644 --- a/src/osd/PeeringState.h +++ b/src/osd/PeeringState.h @@ -20,6 +20,7 @@ #include "PGStateUtils.h" #include "PGPeeringEvent.h" #include "osd_types.h" +#include "osd_types_fmt.h" #include "os/ObjectStore.h" #include "OSDMap.h" #include "MissingLoc.h" @@ -57,6 +58,22 @@ struct PGPool { } }; +template <> +struct fmt::formatter { + template + constexpr auto parse(ParseContext& ctx) { return ctx.begin(); } + + template + auto format(const PGPool& pool, FormatContext& ctx) + { + return fmt::format_to(ctx.out(), + "{}/{}({})", + pool.id, + pool.name, + pool.info); + } +}; + struct PeeringCtx; // [primary only] content recovery state @@ -2136,7 +2153,7 @@ public: return last_rollback_info_trimmed_to_applied; } /// Returns stable reference to internal pool structure - const PGPool &get_pool() const { + const PGPool &get_pgpool() const { return pool; } /// Returns reference to current osdmap diff --git a/src/osd/ReplicatedBackend.h b/src/osd/ReplicatedBackend.h index b1cfe4dd270..f5d29fbdfc8 100644 --- a/src/osd/ReplicatedBackend.h +++ b/src/osd/ReplicatedBackend.h @@ -431,7 +431,10 @@ private: ScrubMap &map, ScrubMapBuilder &pos, ScrubMap::object &o) override; - uint64_t be_get_ondisk_size(uint64_t logical_size) override { return logical_size; } + + uint64_t be_get_ondisk_size(uint64_t logical_size) const final { + return logical_size; + } }; #endif diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc index c4739e640c8..56f94c9e8e9 100644 --- a/src/osd/scrubber/pg_scrubber.cc +++ b/src/osd/scrubber/pg_scrubber.cc @@ -565,9 +565,9 @@ ScrubQueue::sched_params_t PgScrubber::determine_scrub_time( } else { res.proposed_time = m_pg->info.history.last_scrub_stamp; res.min_interval = - m_pg->get_pool().info.opts.value_or(pool_opts_t::SCRUB_MIN_INTERVAL, 0.0); + m_pg->get_pgpool().info.opts.value_or(pool_opts_t::SCRUB_MIN_INTERVAL, 0.0); res.max_interval = - m_pg->get_pool().info.opts.value_or(pool_opts_t::SCRUB_MAX_INTERVAL, 0.0); + m_pg->get_pgpool().info.opts.value_or(pool_opts_t::SCRUB_MAX_INTERVAL, 0.0); } dout(15) << __func__ << " suggested: " << res.proposed_time @@ -1022,7 +1022,6 @@ void PgScrubber::on_init() m_be = std::make_unique( *this, - *(m_pg->get_pgbackend()), *m_pg, m_pg_whoami, m_is_repair, @@ -1048,7 +1047,6 @@ void PgScrubber::on_replica_init() { m_be = std::make_unique( *this, - *(m_pg->get_pgbackend()), *m_pg, m_pg_whoami, m_is_repair, @@ -1191,13 +1189,61 @@ int PgScrubber::build_scrub_map_chunk(ScrubMap& map, // finish dout(20) << __func__ << " finishing" << dendl; ceph_assert(pos.done()); - m_be->repair_oinfo_oid(map); + repair_oinfo_oid(map); dout(20) << __func__ << " done, got " << map.objects.size() << " items" << dendl; return 0; } +/// \todo consider moving repair_oinfo_oid() back to the backend +void PgScrubber::repair_oinfo_oid(ScrubMap& smap) +{ + for (auto i = smap.objects.rbegin(); i != smap.objects.rend(); ++i) { + + const hobject_t& hoid = i->first; + ScrubMap::object& o = i->second; + + if (o.attrs.find(OI_ATTR) == o.attrs.end()) { + continue; + } + bufferlist bl; + bl.push_back(o.attrs[OI_ATTR]); + object_info_t oi; + try { + oi.decode(bl); + } catch (...) { + continue; + } + + if (oi.soid != hoid) { + ObjectStore::Transaction t; + OSDriver::OSTransaction _t(m_pg->osdriver.get_transaction(&t)); + + m_osds->clog->error() + << "osd." << m_pg_whoami << " found object info error on pg " << m_pg_id + << " oid " << hoid << " oid in object info: " << oi.soid + << "...repaired"; + // Fix object info + oi.soid = hoid; + bl.clear(); + encode(oi, + bl, + m_pg->get_osdmap()->get_features(CEPH_ENTITY_TYPE_OSD, nullptr)); + + bufferptr bp(bl.c_str(), bl.length()); + o.attrs[OI_ATTR] = bp; + + t.setattr(m_pg->coll, ghobject_t(hoid), OI_ATTR, bl); + int r = m_pg->osd->store->queue_transaction(m_pg->ch, std::move(t)); + if (r != 0) { + derr << __func__ << ": queue_transaction got " << cpp_strerror(r) + << dendl; + } + } + } +} + void PgScrubber::run_callbacks() { @@ -2282,8 +2328,9 @@ const OSDMapRef& PgScrubber::get_osdmap() const return m_pg->get_osdmap(); } -ostream& operator<<(ostream& out, const PgScrubber& scrubber) -{ +LoggerSinkSet& PgScrubber::get_logger() const { return*m_osds->clog.get(); } + +ostream &operator<<(ostream &out, const PgScrubber &scrubber) { return out << scrubber.m_flags; } diff --git a/src/osd/scrubber/pg_scrubber.h b/src/osd/scrubber/pg_scrubber.h index 6bbb61835a7..295dd445737 100644 --- a/src/osd/scrubber/pg_scrubber.h +++ b/src/osd/scrubber/pg_scrubber.h @@ -272,8 +272,9 @@ ostream& operator<<(ostream& out, const scrub_flags_t& sf); * the actual scrubbing code. */ class PgScrubber : public ScrubPgIF, - public ScrubMachineListener, - public SnapMapperAccessor { + public ScrubMachineListener, + public SnapMapperAccessor, + public ScrubBeListener { public: explicit PgScrubber(PG* pg); @@ -567,23 +568,33 @@ class PgScrubber : public ScrubPgIF, ostream& show(ostream& out) const override; public: - // ------------------ the I/F used by the ScrubBackend (not named yet) + // ------------------ the I/F used by the ScrubBackend (ScrubBeListener) // note: the reason we must have these forwarders, is because of the // artificial PG vs. PrimaryLogPG distinction. Some of the services used // by the scrubber backend are PrimaryLog-specific. - virtual void add_to_stats(const object_stat_sum_t& stat) + void add_to_stats(const object_stat_sum_t& stat) override { ceph_assert(0 && "expecting a PrimaryLogScrub object"); } - virtual void submit_digest_fixes(const digests_fixes_t& fixes) + void submit_digest_fixes(const digests_fixes_t& fixes) override { ceph_assert(0 && "expecting a PrimaryLogScrub object"); } - // ------------------------------------------------------------------------------------- + CephContext* get_pg_cct() const final { return m_pg->cct; } + + LoggerSinkSet& get_logger() const final; + + spg_t get_pgid() const final { return m_pg->get_pgid(); } + + /// Returns reference to current osdmap + const OSDMapRef& get_osdmap() const final; + + + // --------------------------------------------------------------------------- friend ostream& operator<<(ostream& out, const PgScrubber& scrubber); @@ -706,6 +717,8 @@ class PgScrubber : public ScrubPgIF, epoch_t m_interval_start{0}; ///< interval's 'from' of when scrubbing was ///< first scheduled + void repair_oinfo_oid(ScrubMap& smap); + /* * the exact epoch when the scrubbing actually started (started here - cleared * checks for no-scrub conf). Incoming events are verified against this, with @@ -768,14 +781,9 @@ class PgScrubber : public ScrubPgIF, int num_digest_updates_pending{0}; hobject_t m_start, m_end; ///< note: half-closed: [start,end) - /// Returns reference to current osdmap - const OSDMapRef& get_osdmap() const; - /// Returns epoch of current osdmap epoch_t get_osdmap_epoch() const { return get_osdmap()->get_epoch(); } - CephContext* get_pg_cct() const { return m_pg->cct; } - // collected statistics int m_shallow_errors{0}; int m_deep_errors{0}; diff --git a/src/osd/scrubber/scrub_backend.cc b/src/osd/scrubber/scrub_backend.cc index aa7427f4248..70b3eed9ed0 100644 --- a/src/osd/scrubber/scrub_backend.cc +++ b/src/osd/scrubber/scrub_backend.cc @@ -25,7 +25,7 @@ using namespace std::chrono; using namespace std::chrono_literals; using namespace std::literals; -#define dout_context (m_scrubber.m_osds->cct) +#define dout_context (m_scrubber.get_pg_cct()) #define dout_subsys ceph_subsys_osd #undef dout_prefix @@ -40,24 +40,22 @@ std::ostream& ScrubBackend::logger_prefix(std::ostream* out, // ////////////////////////////////////////////////////////////////////////// // // for a Primary -ScrubBackend::ScrubBackend(PgScrubber& scrubber, - PGBackend& backend, - PG& pg, +ScrubBackend::ScrubBackend(ScrubBeListener& scrubber, + PgScrubBeListener& pg, pg_shard_t i_am, bool repair, scrub_level_t shallow_or_deep, const std::set& acting) : m_scrubber{scrubber} - , m_pgbe{backend} , m_pg{pg} , m_pg_whoami{i_am} , m_repair{repair} , m_depth{shallow_or_deep} - , m_pg_id{scrubber.m_pg_id} - , m_pool{m_pg.get_pool()} + , m_pg_id{scrubber.get_pgid()} + , m_pool{m_pg.get_pgpool()} , m_incomplete_clones_allowed{m_pool.info.allow_incomplete_clones()} , m_conf{m_scrubber.get_pg_cct()->_conf} - , clog{m_scrubber.m_osds->clog} + , clog{m_scrubber.get_logger()} { m_formatted_id = m_pg_id.calc_name_sring(); @@ -74,22 +72,20 @@ ScrubBackend::ScrubBackend(PgScrubber& scrubber, } // for a Replica -ScrubBackend::ScrubBackend(PgScrubber& scrubber, - PGBackend& backend, - PG& pg, +ScrubBackend::ScrubBackend(ScrubBeListener& scrubber, + PgScrubBeListener& pg, pg_shard_t i_am, bool repair, scrub_level_t shallow_or_deep) : m_scrubber{scrubber} - , m_pgbe{backend} , m_pg{pg} , m_pg_whoami{i_am} , m_repair{repair} , m_depth{shallow_or_deep} - , m_pg_id{scrubber.m_pg_id} - , m_pool{m_pg.get_pool()} + , m_pg_id{scrubber.get_pgid()} + , m_pool{m_pg.get_pgpool()} , m_conf{m_scrubber.get_pg_cct()->_conf} - , clog{m_scrubber.m_osds->clog} + , clog{m_scrubber.get_logger()} { m_formatted_id = m_pg_id.calc_name_sring(); m_is_replicated = m_pool.info.is_replicated(); @@ -100,7 +96,7 @@ ScrubBackend::ScrubBackend(PgScrubber& scrubber, uint64_t ScrubBackend::logical_to_ondisk_size(uint64_t logical_size) const { - return m_pgbe.be_get_ondisk_size(logical_size); + return m_pg.logical_to_ondisk_size(logical_size); } void ScrubBackend::update_repair_status(bool should_repair) @@ -268,7 +264,7 @@ void ScrubBackend::omap_checks() if (!wss.str().empty()) { dout(5) << __func__ << ": " << wss.str() << dendl; - clog->warn(wss); + clog.warn(wss); } } @@ -311,53 +307,6 @@ void ScrubBackend::update_authoritative() } } -void ScrubBackend::repair_oinfo_oid(ScrubMap& smap) -{ - for (auto i = smap.objects.rbegin(); i != smap.objects.rend(); ++i) { - - const hobject_t& hoid = i->first; - ScrubMap::object& o = i->second; - - if (o.attrs.find(OI_ATTR) == o.attrs.end()) { - continue; - } - bufferlist bl; - bl.push_back(o.attrs[OI_ATTR]); - object_info_t oi; - try { - oi.decode(bl); - } catch (...) { - continue; - } - - if (oi.soid != hoid) { - ObjectStore::Transaction t; - OSDriver::OSTransaction _t(m_pg.osdriver.get_transaction(&t)); - - clog->error() << "osd." << m_pg_whoami - << " found object info error on pg " << m_pg_id - << " oid " << hoid << " oid in object info: " << oi.soid - << "...repaired"; - // Fix object info - oi.soid = hoid; - bl.clear(); - encode(oi, - bl, - m_pg.get_osdmap()->get_features(CEPH_ENTITY_TYPE_OSD, nullptr)); - - bufferptr bp(bl.c_str(), bl.length()); - o.attrs[OI_ATTR] = bp; - - t.setattr(m_pg.coll, ghobject_t(hoid), OI_ATTR, bl); - int r = m_pg.osd->store->queue_transaction(m_pg.ch, std::move(t)); - if (r != 0) { - derr << __func__ << ": queue_transaction got " << cpp_strerror(r) - << dendl; - } - } - } -} - int ScrubBackend::scrub_process_inconsistent() { dout(20) << fmt::format("{}: {} (m_repair:{}) good peers tbl #: {}", @@ -379,7 +328,7 @@ int ScrubBackend::scrub_process_inconsistent() m_inconsistent.size()); dout(4) << err_msg << dendl; - clog->error() << err_msg; + clog.error() << err_msg; ceph_assert(m_repair); int fixed_cnt{0}; @@ -438,13 +387,13 @@ void ScrubBackend::repair_object(const hobject_t& soid, if (bad_peers.count(m_pg.get_primary())) { // We should only be scrubbing if the PG is clean. - ceph_assert(m_pg.waiting_for_unreadable_object.empty()); + ceph_assert(!m_pg.is_waiting_for_unreadable_object()); dout(10) << __func__ << ": primary = " << m_pg.get_primary() << dendl; } // No need to pass ok_peers, they must not be missing the object, so // force_object_missing will add them to missing_loc anyway - m_pg.recovery_state.force_object_missing(bad_peers, soid, oi.version); + m_pg.force_object_missing(ScrubberPasskey{}, bad_peers, soid, oi.version); } @@ -811,7 +760,7 @@ void ScrubBackend::compare_smaps() [this](const auto& ho) { if (auto maybe_clust_err = compare_obj_in_maps(ho); maybe_clust_err) { - clog->error() << *maybe_clust_err; + clog.error() << *maybe_clust_err; } }); } @@ -829,7 +778,7 @@ std::optional ScrubBackend::compare_obj_in_maps( if (candidates_errors.str().size()) { // a collection of shard-specific errors detected while // finding the best shard to serve as authoritative - clog->error() << candidates_errors.str(); + clog.error() << candidates_errors.str(); } inconsistent_obj_wrapper object_error{ho}; @@ -851,7 +800,7 @@ std::optional ScrubBackend::compare_obj_in_maps( this_chunk->m_inconsistent_objs.push_back(std::move(object_error)); return fmt::format("{} soid {} : failed to pick suitable object info\n", - m_scrubber.m_pg_id.pgid, + m_scrubber.get_pgid().pgid, ho); } @@ -1449,7 +1398,8 @@ static inline bool doing_clones( void ScrubBackend::scrub_snapshot_metadata(ScrubMap& map) { dout(10) << __func__ << " num stat obj " - << m_pg.info.stats.stats.sum.num_objects << dendl; + << m_pg.get_pg_info(ScrubberPasskey{}).stats.stats.sum.num_objects + << dendl; std::optional all_clones; // Unspecified snapid_t or std::nullopt @@ -1483,7 +1433,7 @@ void ScrubBackend::scrub_snapshot_metadata(ScrubMap& map) std::optional oi; if (!p->second.attrs.count(OI_ATTR)) { oi = std::nullopt; - clog->error() << m_mode_desc << " " << m_pg_id << " " << soid + clog.error() << m_mode_desc << " " << m_pg_id << " " << soid << " : no '" << OI_ATTR << "' attr"; this_chunk->m_error_counts.shallow_errors++; soid_error.set_info_missing(); @@ -1494,7 +1444,7 @@ void ScrubBackend::scrub_snapshot_metadata(ScrubMap& map) oi = object_info_t(bv); } catch (ceph::buffer::error& e) { oi = std::nullopt; - clog->error() << m_mode_desc << " " << m_pg_id << " " << soid + clog.error() << m_mode_desc << " " << m_pg_id << " " << soid << " : can't decode '" << OI_ATTR << "' attr " << e.what(); this_chunk->m_error_counts.shallow_errors++; @@ -1505,7 +1455,7 @@ void ScrubBackend::scrub_snapshot_metadata(ScrubMap& map) if (oi) { if (logical_to_ondisk_size(oi->size) != p->second.size) { - clog->error() << m_mode_desc << " " << m_pg_id << " " << soid + clog.error() << m_mode_desc << " " << m_pg_id << " " << soid << " : on disk size (" << p->second.size << ") does not match object info size (" << oi->size << ") adjusted for ondisk to (" @@ -1573,11 +1523,11 @@ void ScrubBackend::scrub_snapshot_metadata(ScrubMap& map) if (!expected) { // If we couldn't read the head's snapset, just ignore clones if (head && !snapset) { - clog->error() << m_mode_desc << " " << m_pg_id << " TTTTT:" << m_pg_id + clog.error() << m_mode_desc << " " << m_pg_id << " TTTTT:" << m_pg_id << " " << soid << " : clone ignored due to missing snapset"; } else { - clog->error() << m_mode_desc << " " << m_pg_id << " " << soid + clog.error() << m_mode_desc << " " << m_pg_id << " " << soid << " : is an unexpected clone"; } this_chunk->m_error_counts.shallow_errors++; @@ -1611,7 +1561,7 @@ void ScrubBackend::scrub_snapshot_metadata(ScrubMap& map) << dendl; if (p->second.attrs.count(SS_ATTR) == 0) { - clog->error() << m_mode_desc << " " << m_pg_id << " " << soid + clog.error() << m_mode_desc << " " << m_pg_id << " " << soid << " : no '" << SS_ATTR << "' attr"; this_chunk->m_error_counts.shallow_errors++; snapset = std::nullopt; @@ -1626,7 +1576,7 @@ void ScrubBackend::scrub_snapshot_metadata(ScrubMap& map) head_error.ss_bl.push_back(p->second.attrs[SS_ATTR]); } catch (ceph::buffer::error& e) { snapset = std::nullopt; - clog->error() << m_mode_desc << " " << m_pg_id << " " << soid + clog.error() << m_mode_desc << " " << m_pg_id << " " << soid << " : can't decode '" << SS_ATTR << "' attr " << e.what(); this_chunk->m_error_counts.shallow_errors++; @@ -1641,7 +1591,7 @@ void ScrubBackend::scrub_snapshot_metadata(ScrubMap& map) if (!snapset->clones.empty()) { dout(20) << " snapset " << *snapset << dendl; if (snapset->seq == 0) { - clog->error() << m_mode_desc << " " << m_pg_id << " " << soid + clog.error() << m_mode_desc << " " << m_pg_id << " " << soid << " : snaps.seq not set"; this_chunk->m_error_counts.shallow_errors++; head_error.set_snapset_error(); @@ -1658,13 +1608,13 @@ void ScrubBackend::scrub_snapshot_metadata(ScrubMap& map) << dendl; if (snapset->clone_size.count(soid.snap) == 0) { - clog->error() << m_mode_desc << " " << m_pg_id << " " << soid + clog.error() << m_mode_desc << " " << m_pg_id << " " << soid << " : is missing in clone_size"; this_chunk->m_error_counts.shallow_errors++; soid_error.set_size_mismatch(); } else { if (oi && oi->size != snapset->clone_size[soid.snap]) { - clog->error() << m_mode_desc << " " << m_pg_id << " " << soid + clog.error() << m_mode_desc << " " << m_pg_id << " " << soid << " : size " << oi->size << " != clone_size " << snapset->clone_size[*curclone]; this_chunk->m_error_counts.shallow_errors++; @@ -1672,7 +1622,7 @@ void ScrubBackend::scrub_snapshot_metadata(ScrubMap& map) } if (snapset->clone_overlap.count(soid.snap) == 0) { - clog->error() << m_mode_desc << " " << m_pg_id << " " << soid + clog.error() << m_mode_desc << " " << m_pg_id << " " << soid << " : is missing in clone_overlap"; this_chunk->m_error_counts.shallow_errors++; soid_error.set_size_mismatch(); @@ -1696,7 +1646,7 @@ void ScrubBackend::scrub_snapshot_metadata(ScrubMap& map) } if (bad_interval_set) { - clog->error() << m_mode_desc << " " << m_pg_id << " " << soid + clog.error() << m_mode_desc << " " << m_pg_id << " " << soid << " : bad interval_set in clone_overlap"; this_chunk->m_error_counts.shallow_errors++; soid_error.set_size_mismatch(); @@ -1761,7 +1711,7 @@ int ScrubBackend::process_clones_to( // skip higher-numbered clones in the list. if (!m_incomplete_clones_allowed) { next_clone.snap = **curclone; - clog->error() << m_mode_desc << " " << m_pg_id << " " << *head + clog.error() << m_mode_desc << " " << m_pg_id << " " << *head << " : expected clone " << next_clone << " " << m_missing << " missing"; this_chunk->m_error_counts.shallow_errors++; @@ -1783,7 +1733,7 @@ void ScrubBackend::log_missing(int missing, << *head << " skipped " << missing << " clone(s) in cache tier" << dendl; } else { - clog->info() << m_mode_desc << " " << m_pg_id << " " << *head << " : " + clog.info() << m_mode_desc << " " << m_pg_id << " " << *head << " : " << missing << " missing clone(s)"; } } diff --git a/src/osd/scrubber/scrub_backend.h b/src/osd/scrubber/scrub_backend.h index 325cf4e6a03..81259e5fb81 100644 --- a/src/osd/scrubber/scrub_backend.h +++ b/src/osd/scrubber/scrub_backend.h @@ -46,15 +46,18 @@ #include #include "common/LogClient.h" +#include "osd/OSDMap.h" #include "common/scrub_types.h" +#include "osd/osd_types_fmt.h" + +#include "osd/scrubber_common.h" struct ScrubMap; class PG; class PgScrubber; -class PGBackend; -class PGPool; - +struct PGPool; +using Scrub::PgScrubBeListener; using data_omap_digests_t = std::pair, std::optional>; @@ -83,6 +86,20 @@ struct error_counters_t { int deep_errors{0}; }; +// the PgScrubber services used by the backend +struct ScrubBeListener { + virtual std::ostream& gen_prefix(std::ostream& out) const = 0; + virtual CephContext* get_pg_cct() const = 0; + virtual LoggerSinkSet& get_logger() const = 0; + virtual bool is_primary() const = 0; + virtual spg_t get_pgid() const = 0; + virtual const OSDMapRef& get_osdmap() const = 0; + virtual void add_to_stats(const object_stat_sum_t& stat) = 0; + virtual void submit_digest_fixes(const digests_fixes_t& fixes) = 0; + virtual ~ScrubBeListener() = default; +}; + + /* * snaps-related aux structures: * the scrub-backend scans the snaps associated with each scrubbed object, and @@ -301,23 +318,22 @@ struct scrub_chunk_t { class ScrubBackend { public: // Primary constructor - ScrubBackend(PgScrubber& scrubber, - PGBackend& backend, - PG& pg, + ScrubBackend(ScrubBeListener& scrubber, + PgScrubBeListener& pg, pg_shard_t i_am, bool repair, scrub_level_t shallow_or_deep, const std::set& acting); // Replica constructor: no primary map - ScrubBackend(PgScrubber& scrubber, - PGBackend& backend, - PG& pg, + ScrubBackend(ScrubBeListener& scrubber, + PgScrubBeListener& pg, pg_shard_t i_am, bool repair, scrub_level_t shallow_or_deep); friend class PgScrubber; + friend class TestScrubBackend; /** * reset the per-chunk data structure (scrub_chunk_t). @@ -355,8 +371,6 @@ class ScrubBackend { int scrub_process_inconsistent(); - void repair_oinfo_oid(ScrubMap& smap); - const omap_stat_t& this_scrub_omapstats() const { return m_omap_stats; } int authoritative_peers_count() const { return m_auth_peers.size(); }; @@ -365,9 +379,8 @@ class ScrubBackend { private: // set/constructed at the ctor(): - PgScrubber& m_scrubber; - PGBackend& m_pgbe; - PG& m_pg; + ScrubBeListener& m_scrubber; + Scrub::PgScrubBeListener& m_pg; const pg_shard_t m_pg_whoami; bool m_repair; const scrub_level_t m_depth; @@ -387,7 +400,7 @@ class ScrubBackend { // shorthands: ConfigProxy& m_conf; - LogChannelRef clog; + LoggerSinkSet& clog; private: @@ -534,3 +547,39 @@ class ScrubBackend { // accessing the PG backend for this translation service uint64_t logical_to_ondisk_size(uint64_t logical_size) const; }; + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } + + template + auto format(const data_omap_digests_t& dg, FormatContext& ctx) + { + // can't use value_or() due to different output types + if (std::get<0>(dg).has_value()) { + fmt::format_to(ctx.out(), "[{:#x}/", std::get<0>(dg).value()); + } else { + fmt::format_to(ctx.out(), "[---/"); + } + if (std::get<1>(dg).has_value()) { + return fmt::format_to(ctx.out(), "{:#x}]", std::get<1>(dg).value()); + } else { + return fmt::format_to(ctx.out(), "---]"); + } + } +}; + +template <> +struct fmt::formatter> { + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } + + template + auto format(const std::pair& x, + FormatContext& ctx) + { + return fmt::format_to(ctx.out(), + "{{ {} - {} }}", + std::get<0>(x), + std::get<1>(x)); + } +}; diff --git a/src/osd/scrubber_common.h b/src/osd/scrubber_common.h index 7825b4814ca..7cdf714db42 100644 --- a/src/osd/scrubber_common.h +++ b/src/osd/scrubber_common.h @@ -12,6 +12,24 @@ namespace ceph { class Formatter; } +struct PGPool; + +namespace Scrub { + class ReplicaReservations; +} + +/// Facilitating scrub-realated object access to private PG data +class ScrubberPasskey { +private: + friend class Scrub::ReplicaReservations; + friend class PrimaryLogScrub; + friend class PgScrubber; + friend class ScrubBackend; + ScrubberPasskey() {} + ScrubberPasskey(const ScrubberPasskey&) = default; + ScrubberPasskey& operator=(const ScrubberPasskey&) = delete; +}; + namespace Scrub { /// high/low OP priority @@ -29,6 +47,25 @@ struct ScrubPreconds { bool only_deadlined{false}; }; +/// PG services used by the scrubber backend +struct PgScrubBeListener { + virtual ~PgScrubBeListener() = default; + + virtual const PGPool& get_pgpool() const = 0; + virtual pg_shard_t get_primary() const = 0; + virtual void force_object_missing(ScrubberPasskey, + const std::set& peer, + const hobject_t& oid, + eversion_t version) = 0; + virtual const pg_info_t& get_pg_info(ScrubberPasskey) const = 0; + + // query the PG backend for the on-disk size of an object + virtual uint64_t logical_to_ondisk_size(uint64_t logical_size) const = 0; + + // used to verify our "cleaness" before scrubbing + virtual bool is_waiting_for_unreadable_object() const = 0; +}; + } // namespace Scrub -- 2.39.5