From ba483851c1642573b0a9b4e2f5b6c768175bff6b Mon Sep 17 00:00:00 2001 From: Jon Bailey Date: Wed, 20 Aug 2025 11:11:09 +0100 Subject: [PATCH] osd: Reduce the amount of status invalidations when rolling shards forwards during peering Currently stats invalidations happen during peering when rolling forward shards. We can reduce this so we only invalidate the stats when we don't have any other shards at the version we want to roll the stats forwards to. In the cases where we have a shard with the stats at the correct version, we use those stats instead of invalidating. If we do not have any shards with the correct version of stats, we do the invalidate as before. Signed-off-by: Jon Bailey (cherry picked from commit b5cad2694569b7f0eef173f87a7eecb2ddd6b27e) --- src/osd/PeeringState.cc | 46 ++++++++++++++++++++++++++++++++++++++--- src/osd/PeeringState.h | 2 ++ 2 files changed, 45 insertions(+), 3 deletions(-) diff --git a/src/osd/PeeringState.cc b/src/osd/PeeringState.cc index c7189b4beef..1963ac03300 100644 --- a/src/osd/PeeringState.cc +++ b/src/osd/PeeringState.cc @@ -455,6 +455,8 @@ bool PeeringState::proc_replica_notify(const pg_shard_t &from, const pg_notify_t psdout(10) << " got osd." << from << " " << oinfo << dendl; ceph_assert(is_primary()); peer_info[from] = oinfo; + stats_last_update[from] = oinfo.last_update; + update_peer_info(from, oinfo); might_have_unfound.insert(from); @@ -1037,6 +1039,7 @@ void PeeringState::clear_primary_state() peer_bytes.clear(); peer_missing.clear(); peer_last_complete_ondisk.clear(); + stats_last_update.clear(); peer_activated.clear(); min_last_complete_ondisk = eversion_t(); pg_trim_to = eversion_t(); @@ -3359,6 +3362,9 @@ void PeeringState::proc_master_log( psdout(10) << "proc_master_log for osd." << from << ": " << olog << " " << omissing << dendl; ceph_assert(!is_peered() && is_primary()); + stats_last_update[pg_whoami] = info.last_update; + psdout(20) << " recording last stats update on " << pg_whoami << ": " + << info.last_update << dendl; if (info.partial_writes_last_complete.contains(from.shard)) { apply_pwlc(info.partial_writes_last_complete[from.shard], from, oinfo, @@ -3466,9 +3472,9 @@ void PeeringState::proc_master_log( invalidate_stats = true; eversion_t previous_version; if (p == pg_log.get_log().log.begin()) { - previous_version = pg_log.get_tail(); + previous_version = pg_log.get_tail(); } else { - previous_version = std::prev(p)->version; + previous_version = std::prev(p)->version; } rollbacker.get()->partial_write(&info, previous_version, *p); olog.head = p->version; @@ -3481,8 +3487,42 @@ void PeeringState::proc_master_log( // make any adjustments to their missing map; we are taking their // log to be authoritative (i.e., their entries are by definitely // non-divergent). + + // Find the version we want to roll forwards to + // Iterate over all shards and see if any have a last_update equal to where we want to roll to + // Copy the stats for this shard into oinfo + // Set invalidate_stats to folse again if we do copy these stats + // Verify that this reintroduces the bug (Which is intended for stage 2) + + if (invalidate_stats) + { + for (const auto& [shard, my_info] : peer_info) + { + if (invalidate_stats && stats_last_update[shard] == olog.head) + { + oinfo.stats = my_info.stats; + invalidate_stats = false; + psdout(10) << "keeping stats for " << shard + << " (wanted last update: " << olog.head + << ", stats last update: " << stats_last_update[shard] + << ", shard last update: " << my_info.last_update << ")." + << dendl; + } else { + psdout(20) << "not using stats for " << shard + << " (wanted last update: " << olog.head + << ", stats last update: " << stats_last_update[shard] + << ", shard last update: " << my_info.last_update << ")." + << dendl; + } + } + } + merge_log(t, oinfo, std::move(olog), from); info.stats.stats_invalid |= invalidate_stats; + if (info.stats.stats_invalid) + { + psdout(10) << "invalidating stats for " << pg_whoami << dendl; + } peer_info[from] = oinfo; psdout(10) << " peer osd." << from << " now " << oinfo << " " << omissing << dendl; @@ -3503,7 +3543,7 @@ void PeeringState::proc_master_log( } update_history(oinfo.history); ceph_assert(cct->_conf->osd_find_best_info_ignore_history_les || - info.last_epoch_started >= info.history.last_epoch_started); + info.last_epoch_started >= info.history.last_epoch_started); peer_missing[from].claim(std::move(omissing)); } diff --git a/src/osd/PeeringState.h b/src/osd/PeeringState.h index 6fa89017b2e..0d8a001bca1 100644 --- a/src/osd/PeeringState.h +++ b/src/osd/PeeringState.h @@ -1507,6 +1507,8 @@ public: eversion_t last_update_applied; ///< last_update readable /// last version to which rollback_info trimming has been applied eversion_t last_rollback_info_trimmed_to_applied; + // last version in which the stats for a shard were updated + std::map stats_last_update; /// Counter to determine when pending flushes have completed unsigned flushes_in_progress = 0; -- 2.47.3