From: Jon Date: Mon, 6 Oct 2025 08:32:29 +0000 (+0100) Subject: osd: Fix stats mismatch cluster error seen during scrubbing X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=7031e319158f26e707beb6ad3bd463d7c8bdb005;p=ceph-ci.git osd: Fix stats mismatch cluster error seen during scrubbing Revert "osd: Reduce the amount of status invalidations when rolling shards forwards during peering" This reverts commit b5cad2694569b7f0eef173f87a7eecb2ddd6b27e. Fixes: https://tracker.ceph.com/issues/73260 Signed-off-by: Jon Bailey --- diff --git a/src/osd/PeeringState.cc b/src/osd/PeeringState.cc index bb1160d01d8..114c4c34722 100644 --- a/src/osd/PeeringState.cc +++ b/src/osd/PeeringState.cc @@ -455,8 +455,6 @@ bool PeeringState::proc_replica_notify(const pg_shard_t &from, const pg_notify_t psdout(10) << " got osd." << from << " " << oinfo << dendl; ceph_assert(is_primary()); peer_info[from] = oinfo; - stats_last_update[from] = oinfo.last_update; - update_peer_info(from, oinfo); might_have_unfound.insert(from); @@ -1039,7 +1037,6 @@ void PeeringState::clear_primary_state() peer_bytes.clear(); peer_missing.clear(); peer_last_complete_ondisk.clear(); - stats_last_update.clear(); peer_activated.clear(); min_last_complete_ondisk = eversion_t(); pg_trim_to = eversion_t(); @@ -3362,9 +3359,6 @@ void PeeringState::proc_master_log( psdout(10) << "proc_master_log for osd." << from << ": " << olog << " " << omissing << dendl; ceph_assert(!is_peered() && is_primary()); - stats_last_update[pg_whoami] = info.last_update; - psdout(20) << " recording last stats update on " << pg_whoami << ": " - << info.last_update << dendl; if (info.partial_writes_last_complete.contains(from.shard)) { apply_pwlc(info.partial_writes_last_complete[from.shard], from, oinfo, @@ -3472,9 +3466,9 @@ void PeeringState::proc_master_log( invalidate_stats = true; eversion_t previous_version; if (p == pg_log.get_log().log.begin()) { - previous_version = pg_log.get_tail(); + previous_version = pg_log.get_tail(); } else { - previous_version = std::prev(p)->version; + previous_version = std::prev(p)->version; } rollbacker.get()->partial_write(&info, previous_version, *p); olog.head = p->version; @@ -3487,42 +3481,8 @@ void PeeringState::proc_master_log( // make any adjustments to their missing map; we are taking their // log to be authoritative (i.e., their entries are by definitely // non-divergent). - - // Find the version we want to roll forwards to - // Iterate over all shards and see if any have a last_update equal to where we want to roll to - // Copy the stats for this shard into oinfo - // Set invalidate_stats to folse again if we do copy these stats - // Verify that this reintroduces the bug (Which is intended for stage 2) - - if (invalidate_stats) - { - for (const auto& [shard, my_info] : peer_info) - { - if (invalidate_stats && stats_last_update[shard] == olog.head) - { - oinfo.stats = my_info.stats; - invalidate_stats = false; - psdout(10) << "keeping stats for " << shard - << " (wanted last update: " << olog.head - << ", stats last update: " << stats_last_update[shard] - << ", shard last update: " << my_info.last_update << ")." - << dendl; - } else { - psdout(20) << "not using stats for " << shard - << " (wanted last update: " << olog.head - << ", stats last update: " << stats_last_update[shard] - << ", shard last update: " << my_info.last_update << ")." - << dendl; - } - } - } - merge_log(t, oinfo, std::move(olog), from); info.stats.stats_invalid |= invalidate_stats; - if (info.stats.stats_invalid) - { - psdout(10) << "invalidating stats for " << pg_whoami << dendl; - } peer_info[from] = oinfo; psdout(10) << " peer osd." << from << " now " << oinfo << " " << omissing << dendl; @@ -3543,7 +3503,7 @@ void PeeringState::proc_master_log( } update_history(oinfo.history); ceph_assert(cct->_conf->osd_find_best_info_ignore_history_les || - info.last_epoch_started >= info.history.last_epoch_started); + info.last_epoch_started >= info.history.last_epoch_started); peer_missing[from].claim(std::move(omissing)); } diff --git a/src/osd/PeeringState.h b/src/osd/PeeringState.h index 32fa9628a46..c4ee0d9a1f0 100644 --- a/src/osd/PeeringState.h +++ b/src/osd/PeeringState.h @@ -1509,8 +1509,6 @@ public: eversion_t last_update_applied; ///< last_update readable /// last version to which rollback_info trimming has been applied eversion_t last_rollback_info_trimmed_to_applied; - // last version in which the stats for a shard were updated - std::map stats_last_update; /// Counter to determine when pending flushes have completed unsigned flushes_in_progress = 0;