]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
osd: Fix stats mismatch cluster error seen during scrubbing
authorJon <jonathan.bailey1@ibm.com>
Mon, 6 Oct 2025 08:32:29 +0000 (09:32 +0100)
committerJon <jonathan.bailey1@ibm.com>
Mon, 6 Oct 2025 08:32:29 +0000 (09:32 +0100)
Revert "osd: Reduce the amount of status invalidations when rolling shards forwards during peering"

This reverts commit b5cad2694569b7f0eef173f87a7eecb2ddd6b27e.

Fixes: https://tracker.ceph.com/issues/73260
Signed-off-by: Jon Bailey <jonathan.bailey1@ibm.com>
src/osd/PeeringState.cc
src/osd/PeeringState.h

index bb1160d01d8f480b5ba512e2ce9fd46a04950c1b..114c4c34722fccef27b549cb9fceba622ea39684 100644 (file)
@@ -455,8 +455,6 @@ bool PeeringState::proc_replica_notify(const pg_shard_t &from, const pg_notify_t
   psdout(10) << " got osd." << from << " " << oinfo << dendl;
   ceph_assert(is_primary());
   peer_info[from] = oinfo;
-  stats_last_update[from] = oinfo.last_update;
-
   update_peer_info(from, oinfo);
   might_have_unfound.insert(from);
 
@@ -1039,7 +1037,6 @@ void PeeringState::clear_primary_state()
   peer_bytes.clear();
   peer_missing.clear();
   peer_last_complete_ondisk.clear();
-  stats_last_update.clear();
   peer_activated.clear();
   min_last_complete_ondisk = eversion_t();
   pg_trim_to = eversion_t();
@@ -3362,9 +3359,6 @@ void PeeringState::proc_master_log(
   psdout(10) << "proc_master_log for osd." << from << ": "
             << olog << " " << omissing << dendl;
   ceph_assert(!is_peered() && is_primary());
-  stats_last_update[pg_whoami] = info.last_update;
-  psdout(20) << " recording last stats update on " << pg_whoami << ": "
-           << info.last_update << dendl;
 
   if (info.partial_writes_last_complete.contains(from.shard)) {
     apply_pwlc(info.partial_writes_last_complete[from.shard], from, oinfo,
@@ -3472,9 +3466,9 @@ void PeeringState::proc_master_log(
       invalidate_stats = true;
       eversion_t previous_version;
       if (p == pg_log.get_log().log.begin()) {
-             previous_version = pg_log.get_tail();
+       previous_version = pg_log.get_tail();
       } else {
-             previous_version = std::prev(p)->version;
+       previous_version = std::prev(p)->version;
       }
       rollbacker.get()->partial_write(&info, previous_version, *p);
       olog.head = p->version;
@@ -3487,42 +3481,8 @@ void PeeringState::proc_master_log(
   // make any adjustments to their missing map; we are taking their
   // log to be authoritative (i.e., their entries are by definitely
   // non-divergent).
-
-  // Find the version we want to roll forwards to
-  // Iterate over all shards and see if any have a last_update equal to where we want to roll to
-  // Copy the stats for this shard into oinfo
-  // Set invalidate_stats to folse again if we do copy these stats
-  // Verify that this reintroduces the bug (Which is intended for stage 2)
-
-  if (invalidate_stats)
-  {
-    for (const auto& [shard, my_info] : peer_info)
-    {
-      if (invalidate_stats && stats_last_update[shard] == olog.head)
-      {
-        oinfo.stats = my_info.stats;
-        invalidate_stats = false;
-        psdout(10) << "keeping stats for " << shard
-                   << " (wanted last update: " << olog.head
-                   << ", stats last update: " << stats_last_update[shard]
-                   << ", shard last update: " << my_info.last_update << ")."
-                   << dendl;
-      } else {
-        psdout(20) << "not using stats for " << shard
-                   << " (wanted last update: " << olog.head
-                   << ", stats last update: " << stats_last_update[shard]
-                   << ", shard last update: " << my_info.last_update << ")."
-                   << dendl;
-      }
-    }
-  }
-
   merge_log(t, oinfo, std::move(olog), from);
   info.stats.stats_invalid |= invalidate_stats;
-  if (info.stats.stats_invalid)
-  {
-    psdout(10) << "invalidating stats for " << pg_whoami << dendl;
-  }
   peer_info[from] = oinfo;
   psdout(10) << " peer osd." << from << " now " << oinfo
             << " " << omissing << dendl;
@@ -3543,7 +3503,7 @@ void PeeringState::proc_master_log(
   }
   update_history(oinfo.history);
   ceph_assert(cct->_conf->osd_find_best_info_ignore_history_les ||
-    info.last_epoch_started >= info.history.last_epoch_started);
+        info.last_epoch_started >= info.history.last_epoch_started);
 
   peer_missing[from].claim(std::move(omissing));
 }
index 32fa9628a46b239e2f3b4f05279943b3d5be7b7e..c4ee0d9a1f0e178497cadb94ac14f8f259e7da32 100644 (file)
@@ -1509,8 +1509,6 @@ public:
   eversion_t  last_update_applied;  ///< last_update readable
   /// last version to which rollback_info trimming has been applied
   eversion_t  last_rollback_info_trimmed_to_applied;
-  // last version in which the stats for a shard were updated
-  std::map<pg_shard_t,eversion_t> stats_last_update;
 
   /// Counter to determine when pending flushes have completed
   unsigned flushes_in_progress = 0;