From 4e470cb09a1e35a822729959ae02d9106569b95b Mon Sep 17 00:00:00 2001 From: Ronen Friedman Date: Tue, 15 Apr 2025 03:34:06 -0500 Subject: [PATCH] osd/scrub: count scrub I/O Implement I/O counting in the PGBackend::be_scan_list() and relevant functions it calls. Signed-off-by: Ronen Friedman --- src/osd/ECBackend.cc | 4 ++++ src/osd/ECBackend.h | 1 + src/osd/ECBackendL.cc | 4 ++++ src/osd/ECBackendL.h | 1 + src/osd/ECSwitch.h | 10 ++++++---- src/osd/PGBackend.cc | 6 +++++- src/osd/PGBackend.h | 4 ++++ src/osd/ReplicatedBackend.cc | 21 ++++++++++++++++----- src/osd/ReplicatedBackend.h | 1 + src/osd/scrubber/pg_scrubber.cc | 4 ++-- 10 files changed, 44 insertions(+), 12 deletions(-) diff --git a/src/osd/ECBackend.cc b/src/osd/ECBackend.cc index 88e985c77af6f..41fde862bc8f5 100644 --- a/src/osd/ECBackend.cc +++ b/src/osd/ECBackend.cc @@ -1767,6 +1767,7 @@ int ECBackend::objects_get_attrs( } int ECBackend::be_deep_scrub( + const Scrub::ScrubCounterSet& io_counters, const hobject_t &poid, ScrubMap &map, ScrubMapBuilder &pos, @@ -1793,6 +1794,8 @@ int ECBackend::be_deep_scrub( if (stride % sinfo.get_chunk_size()) stride += sinfo.get_chunk_size() - (stride % sinfo.get_chunk_size()); + auto& perf_logger = *(get_parent()->get_logger()); + perf_logger.inc(io_counters.read_cnt); bufferlist bl; r = switcher->store->read( switcher->ch, @@ -1817,6 +1820,7 @@ int ECBackend::be_deep_scrub( if (r > 0) { pos.data_hash << bl; } + perf_logger.inc(io_counters.read_bytes, r); pos.data_pos += r; if (r == (int)stride) { return -EINPROGRESS; diff --git a/src/osd/ECBackend.h b/src/osd/ECBackend.h index 82b89f3e4afe2..510e53cfbd04a 100644 --- a/src/osd/ECBackend.h +++ b/src/osd/ECBackend.h @@ -507,6 +507,7 @@ class ECBackend : public ECCommon { bool auto_repair_supported() const { return true; } int be_deep_scrub( + const Scrub::ScrubCounterSet& io_counters, const hobject_t &poid, ScrubMap &map, ScrubMapBuilder &pos, diff --git a/src/osd/ECBackendL.cc b/src/osd/ECBackendL.cc index bc299874a72ac..79666f9370c3c 100644 --- a/src/osd/ECBackendL.cc +++ b/src/osd/ECBackendL.cc @@ -1727,6 +1727,7 @@ int ECBackendL::objects_get_attrs( } int ECBackendL::be_deep_scrub( + const Scrub::ScrubCounterSet& io_counters, const hobject_t &poid, ScrubMap &map, ScrubMapBuilder &pos, @@ -1754,6 +1755,8 @@ int ECBackendL::be_deep_scrub( if (stride % sinfo.get_chunk_size()) stride += sinfo.get_chunk_size() - (stride % sinfo.get_chunk_size()); + auto& perf_logger = *(get_parent()->get_logger()); + perf_logger.inc(io_counters.read_cnt); bufferlist bl; r = switcher->store->read( switcher->ch, @@ -1778,6 +1781,7 @@ int ECBackendL::be_deep_scrub( if (r > 0) { pos.data_hash << bl; } + perf_logger.inc(io_counters.read_bytes, r); pos.data_pos += r; if (r == (int)stride) { return -EINPROGRESS; diff --git a/src/osd/ECBackendL.h b/src/osd/ECBackendL.h index 6a578057acd85..cd0a1e846aa56 100644 --- a/src/osd/ECBackendL.h +++ b/src/osd/ECBackendL.h @@ -436,6 +436,7 @@ public: bool auto_repair_supported() const { return true; } int be_deep_scrub( + const Scrub::ScrubCounterSet& io_counters, const hobject_t &poid, ScrubMap &map, ScrubMapBuilder &pos, diff --git a/src/osd/ECSwitch.h b/src/osd/ECSwitch.h index c643b7e5cd40a..0ee16181d0381 100644 --- a/src/osd/ECSwitch.h +++ b/src/osd/ECSwitch.h @@ -301,13 +301,15 @@ public: return legacy.be_get_ondisk_size(logical_size); } - int be_deep_scrub(const hobject_t &oid, ScrubMap &map, ScrubMapBuilder &pos - , ScrubMap::object &o) + int be_deep_scrub( + const Scrub::ScrubCounterSet &io_counters, + const hobject_t &oid, ScrubMap &map, ScrubMapBuilder &pos, + ScrubMap::object &o) override { if (is_optimized()) { - return optimized.be_deep_scrub(oid, map, pos, o); + return optimized.be_deep_scrub(io_counters, oid, map, pos, o); } - return legacy.be_deep_scrub(oid, map, pos, o); + return legacy.be_deep_scrub(io_counters, oid, map, pos, o); } unsigned get_ec_data_chunk_count() const override diff --git a/src/osd/PGBackend.cc b/src/osd/PGBackend.cc index f7eeb225e61b7..ba492a69974dd 100644 --- a/src/osd/PGBackend.cc +++ b/src/osd/PGBackend.cc @@ -774,6 +774,7 @@ PGBackend *PGBackend::build_pg_backend( } int PGBackend::be_scan_list( + const Scrub::ScrubCounterSet& io_counters, ScrubMap &map, ScrubMapBuilder &pos) { @@ -781,10 +782,12 @@ int PGBackend::be_scan_list( ceph_assert(!pos.done()); ceph_assert(pos.pos < pos.ls.size()); hobject_t& poid = pos.ls[pos.pos]; + auto& perf_logger = *(get_parent()->get_logger()); int r = 0; ScrubMap::object &o = map.objects[poid]; if (!pos.metadata_done) { + perf_logger.inc(io_counters.stats_cnt); struct stat st; r = store->stat( ch, @@ -794,6 +797,7 @@ int PGBackend::be_scan_list( true); if (r == 0) { + perf_logger.inc(io_counters.getattr_cnt); o.size = st.st_size; ceph_assert(!o.negative); r = store->getattrs( @@ -828,7 +832,7 @@ int PGBackend::be_scan_list( } if (pos.deep) { - r = be_deep_scrub(poid, map, pos, o); + r = be_deep_scrub(io_counters, poid, map, pos, o); if (r == -EINPROGRESS) { return -EINPROGRESS; } else if (r != 0) { diff --git a/src/osd/PGBackend.h b/src/osd/PGBackend.h index a69a7df0c22a8..4aa1cc3630811 100644 --- a/src/osd/PGBackend.h +++ b/src/osd/PGBackend.h @@ -27,6 +27,7 @@ #include "common/WorkQueue.h" #include "include/Context.h" #include "os/ObjectStore.h" +#include "osd/scrubber_common.h" #include "common/LogClient.h" #include #include "PGTransaction.h" @@ -599,7 +600,9 @@ typedef std::shared_ptr OSDMapRef; Context *on_complete, bool fast_read = false) = 0; virtual bool auto_repair_supported() const = 0; + int be_scan_list( + const Scrub::ScrubCounterSet& io_counters, ScrubMap &map, ScrubMapBuilder &pos); @@ -607,6 +610,7 @@ typedef std::shared_ptr OSDMapRef; shard_id_t shard_id) const = 0; virtual int be_deep_scrub( + [[maybe_unused]] const Scrub::ScrubCounterSet& io_counters, const hobject_t &oid, ScrubMap &map, ScrubMapBuilder &pos, diff --git a/src/osd/ReplicatedBackend.cc b/src/osd/ReplicatedBackend.cc index e920bfc04956e..7e3c1fc956dc4 100644 --- a/src/osd/ReplicatedBackend.cc +++ b/src/osd/ReplicatedBackend.cc @@ -624,7 +624,7 @@ void ReplicatedBackend::submit_transaction( pg_committed_to, true, op_t); - + op_t.register_on_commit( parent->bless_context( new C_OSD_OnOpCommit(this, &op))); @@ -745,16 +745,17 @@ static uint32_t crc32_netstring(const uint32_t orig_crc, std::string_view data) } int ReplicatedBackend::be_deep_scrub( + const Scrub::ScrubCounterSet& io_counters, const hobject_t &poid, ScrubMap &map, ScrubMapBuilder &pos, ScrubMap::object &o) { dout(10) << __func__ << " " << poid << " pos " << pos << dendl; - int r; - uint32_t fadvise_flags = CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL | - CEPH_OSD_OP_FLAG_FADVISE_DONTNEED | - CEPH_OSD_OP_FLAG_BYPASS_CLEAN_CACHE; + auto& perf_logger = *(get_parent()->get_logger()); + const uint32_t fadvise_flags = CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL | + CEPH_OSD_OP_FLAG_FADVISE_DONTNEED | + CEPH_OSD_OP_FLAG_BYPASS_CLEAN_CACHE; utime_t sleeptime; sleeptime.set_from_double(cct->_conf->osd_debug_deep_scrub_sleep); @@ -763,6 +764,7 @@ int ReplicatedBackend::be_deep_scrub( sleeptime.sleep(); } + int r{0}; ceph_assert(poid == pos.ls[pos.pos]); if (!pos.data_done()) { if (pos.data_pos == 0) { @@ -771,6 +773,7 @@ int ReplicatedBackend::be_deep_scrub( const uint64_t stride = cct->_conf->osd_deep_scrub_stride; + perf_logger.inc(io_counters.read_cnt); bufferlist bl; r = store->read( ch, @@ -788,6 +791,7 @@ int ReplicatedBackend::be_deep_scrub( if (r > 0) { pos.data_hash << bl; } + perf_logger.inc(io_counters.read_bytes, r); pos.data_pos += r; if (static_cast(r) == stride) { dout(20) << __func__ << " " << poid << " more data, digest so far 0x" @@ -806,6 +810,7 @@ int ReplicatedBackend::be_deep_scrub( if (pos.omap_pos.empty()) { pos.omap_hash = -1; + perf_logger.inc(io_counters.omapgetheader_cnt); bufferlist hdrbl; r = store->omap_get_header( ch, @@ -822,10 +827,13 @@ int ReplicatedBackend::be_deep_scrub( bool encoded = false; dout(25) << "CRC header " << cleanbin(hdrbl, encoded, true) << dendl; pos.omap_hash = hdrbl.crc32c(pos.omap_hash); + perf_logger.inc(io_counters.omapgetheader_bytes, hdrbl.length()); } } // omap + + perf_logger.inc(io_counters.omapget_cnt); using omap_iter_seek_t = ObjectStore::omap_iter_seek_t; auto result = store->omap_iterate( ch, @@ -859,6 +867,9 @@ int ReplicatedBackend::be_deep_scrub( return -EINPROGRESS; } + // we have the full omap now. Finalize the perf counting + perf_logger.inc(io_counters.omapget_bytes, pos.omap_bytes); + if (pos.omap_keys > cct->_conf-> osd_deep_scrub_large_omap_object_key_threshold || pos.omap_bytes > cct->_conf-> diff --git a/src/osd/ReplicatedBackend.h b/src/osd/ReplicatedBackend.h index 5c22d19a121d6..b52fe71018152 100644 --- a/src/osd/ReplicatedBackend.h +++ b/src/osd/ReplicatedBackend.h @@ -457,6 +457,7 @@ private: int be_deep_scrub( + const Scrub::ScrubCounterSet& io_counters, const hobject_t &poid, ScrubMap &map, ScrubMapBuilder &pos, diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc index 7583a1cfca642..ef9818a0fef0a 100644 --- a/src/osd/scrubber/pg_scrubber.cc +++ b/src/osd/scrubber/pg_scrubber.cc @@ -1404,8 +1404,8 @@ int PgScrubber::build_scrub_map_chunk(ScrubMap& map, // scan objects while (!pos.done()) { - - int r = m_pg->get_pgbackend()->be_scan_list(map, pos); + int r = + m_pg->get_pgbackend()->be_scan_list(get_unlabeled_counters(), map, pos); dout(30) << __func__ << " BE returned " << r << dendl; if (r == -EINPROGRESS) { dout(20) << __func__ << " in progress" << dendl; -- 2.39.5