]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
src/mon/MgrStatMonitor: fix invalid iterator increment in calc_pool_availability() 64129/head
authorNitzan Mordechai <nmordech@redhat.com>
Wed, 21 May 2025 11:41:01 +0000 (11:41 +0000)
committerShraddha Agrawal <shraddha.agrawal000@gmail.com>
Thu, 26 Jun 2025 13:00:27 +0000 (18:30 +0530)
Erasing entries from `pool_availability` inside a range-for
loop invalidated the hidden iterator, triggering an
“Invalid read” under Valgrind.

- Use `std::erase_if(pool_availability, predicate)` for
  atomic removal.
- Refactor the stats-update loop to use structured bindings
  and a clear `++it` for readability.

Fixes: https://tracker.ceph.com/issues/71271
Signed-off-by: Nitzan Mordechai <nmordech@redhat.com>
(cherry picked from commit 7369a4dded210d9410fb00e259d95df013532cc1)

src/mon/MgrStatMonitor.cc

index 224651a2eb6ee06f2cfdda9b37bedbcb3d8d024d..b2d4c2056447510b8491ad4c94c806b3830014e4 100644 (file)
@@ -113,88 +113,86 @@ void MgrStatMonitor::calc_pool_availability()
     return;
   }
 
-  // if reset_availability_last_uptime_downtime_val is not utime_t(1, 2), 
-  // update last_uptime and last_downtime for all pools to the 
-  // recorded values
-  if (reset_availability_last_uptime_downtime_val.has_value()) {
-    for (const auto& i : pool_availability) {
-      const auto& poolid = i.first;
-      pool_availability[poolid].last_downtime = reset_availability_last_uptime_downtime_val.value();
-      pool_availability[poolid].last_uptime = reset_availability_last_uptime_downtime_val.value();
+  // Add new pools from digest
+  for ([[maybe_unused]] const auto& [poolid, _] : digest.pool_pg_unavailable_map) {
+    if (!pool_availability.contains(poolid)) {
+      pool_availability.emplace(poolid, PoolAvailability{});
+      dout(20) << fmt::format("{}: Adding pool: {}",
+                              __func__, poolid) << dendl;
     }
-    dout(20) << __func__ << " reset last_uptime and last_downtime to " 
-             << reset_availability_last_uptime_downtime_val << dendl;
-    reset_availability_last_uptime_downtime_val.reset();
   }
 
-  auto pool_avail_end = pool_availability.end();
-  for (const auto& i : digest.pool_pg_unavailable_map) {
-    const auto& poolid = i.first;
-    if (pool_availability.find(poolid) == pool_avail_end){
-      // New Pool so we add.
-      pool_availability.insert({poolid, PoolAvailability()});
-      dout(20) << __func__ << "Adding pool: " << poolid << dendl;
+  // Remove unavailable pools
+  // A pool is removed if it meets either of the following criteria:
+  // 1. It is not present in the digest's pool_pg_unavailable_map.
+  // 2. It is not present in the osdmap (checked via mon.osdmon()->osdmap).
+  std::erase_if(pool_availability, [&](const auto& kv) {
+    const auto& poolid = kv.first;
+
+    if (!digest.pool_pg_unavailable_map.contains(poolid)) {
+      dout(20) << fmt::format("{}: Deleting pool (not in digest): {}",
+                              __func__, poolid) << dendl;
+      return true;
     }
-  }
-  utime_t now(ceph_clock_now());
-  auto pool_unavail_end = digest.pool_pg_unavailable_map.end();
-  for (const auto& i : pool_availability) {
-    const auto& poolid = i.first;
-    if (digest.pool_pg_unavailable_map.find(poolid) ==
-      pool_unavail_end) {
-      // delete none exist pool
-      pool_availability.erase(poolid);
-      dout(20) << __func__ << "Deleting pool: " << poolid << dendl;
-      continue;
-    }
-    if (mon.osdmon()->osdmap.have_pg_pool(poolid)){
-      // Currently, couldn't find an elegant way to get pool name
-      pool_availability[poolid].pool_name = mon.osdmon()->osdmap.get_pool_name(poolid);
-    } else {
-      pool_availability.erase(poolid);
-      dout(20) << __func__ << "pool: " 
-              << poolid << " no longer exists in osdmap! Deleting pool: " 
-         << poolid << dendl;
-      continue;
+    if (!mon.osdmon()->osdmap.have_pg_pool(poolid)) {
+      dout(20) << fmt::format("{}: Deleting pool (not in osdmap): {}",
+                              __func__, poolid) << dendl;
+      return true;
     }
-    if (pool_availability[poolid].is_avail) {
-      if (!digest.pool_pg_unavailable_map[poolid].empty()) {
-        // avail to unavail
-        dout(20) << __func__ 
-                << ": Pool " << poolid << " status: Available to Unavailable" << dendl;
-        pool_availability[poolid].is_avail = false;
-        pool_availability[poolid].num_failures += 1;
-        pool_availability[poolid].last_downtime = now;
-        pool_availability[poolid].uptime +=
-          now - pool_availability[poolid].last_uptime;
+    return false;
+  });
+
+  // Update pool availability
+  utime_t now(ceph_clock_now());
+  for (auto& [poolid, avail] : pool_availability) {
+    avail.pool_name = mon.osdmon()->osdmap.get_pool_name(poolid);
+
+    auto it = digest.pool_pg_unavailable_map.find(poolid);
+    const bool currently_avail = (it != digest.pool_pg_unavailable_map.end()) &&
+                                 it->second.empty();
+
+    if (avail.is_avail) {
+      if (!currently_avail) {            // Available → Unavailable
+        dout(20) << fmt::format("{}: Pool {} status: Available to Unavailable",
+                              __func__, poolid) << dendl;
+        avail.is_avail        = false;
+        ++avail.num_failures;
+        avail.last_downtime   = now;
+        avail.uptime         += now - avail.last_uptime;
       } else {
-        // avail to avail
-        dout(20) << __func__ 
-                << ": Pool " << poolid << " status: Available to Available" << dendl;
-        pool_availability[poolid].uptime +=
-          now - pool_availability[poolid].last_uptime;
-        pool_availability[poolid].last_uptime = now;
+        // Available to Available
+        dout(20) << fmt::format("{}: Pool {} status: Available to Available",
+                              __func__, poolid) << dendl;
+        avail.uptime         += now - avail.last_uptime;
+        avail.last_uptime     = now;
       }
-    } else {
-      if (!digest.pool_pg_unavailable_map[poolid].empty()) {
-        // unavail to unavail
-        dout(20) << __func__ 
-                << ": Pool " << poolid << " status: Unavailable to Unavailable" << dendl;
-        pool_availability[poolid].downtime +=
-          now - pool_availability[poolid].last_downtime;
-        pool_availability[poolid].last_downtime = now;
-      } else {
-        // unavail to avail
-        dout(20) << __func__ 
-                << ": Pool " << poolid << " status: Unavailable to Available" << dendl;
-        pool_availability[poolid].is_avail = true;
-        pool_availability[poolid].last_uptime = now;
-        pool_availability[poolid].uptime +=
-          now - pool_availability[poolid].last_downtime;
+    } else {                             // Unavailable
+      if (currently_avail) {             // Unavailable to Available
+        dout(20) << fmt::format("{}: Pool {} status: Unavailable to Available",
+                              __func__, poolid) << dendl;
+        avail.is_avail        = true;
+        avail.last_uptime     = now;
+        avail.uptime         += now - avail.last_downtime;
+      } else {                           // Unavailable to Unavailable
+        dout(20) << fmt::format("{}: Pool {} status: Unavailable to Unavailable",
+                              __func__, poolid) << dendl;
+        avail.downtime       += now - avail.last_downtime;
+        avail.last_downtime   = now;
       }
     }
+
+    // if reset_availability_last_uptime_downtime_val is not utime_t(1, 2), 
+    // update last_uptime and last_downtime for all pools to the 
+    // recorded values
+    if (reset_availability_last_uptime_downtime_val.has_value()) {
+      dout(20) << fmt::format("{}: Pool {} reset last_uptime and last_downtime to {}",
+                              __func__, poolid, reset_availability_last_uptime_downtime_val.value()) << dendl;
+      avail.last_downtime = reset_availability_last_uptime_downtime_val.value();
+      avail.last_uptime = reset_availability_last_uptime_downtime_val.value();
+    }
   }
   pending_pool_availability.swap(pool_availability);
+  reset_availability_last_uptime_downtime_val.reset();
 }
 
 void MgrStatMonitor::update_from_paxos(bool *need_bootstrap)