]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mon/OSDMonitor: account for PG merging in epoch_by_pg accounting 42136/head
authorDan van der Ster <daniel.vanderster@cern.ch>
Thu, 1 Jul 2021 14:12:26 +0000 (16:12 +0200)
committerDan van der Ster <daniel.vanderster@cern.ch>
Wed, 7 Jul 2021 11:24:00 +0000 (13:24 +0200)
After a pool has merged PGs, the epoch_by_pg accounting will refer
to osdmap epochs of PGs that no longer exist. We'll never again get
OSD beacons for these PGs, so the min epoch in epoch_by_pg will not
advance until the mon leader has restarted. The effect of this is
that osdmaps are not trimmed after a pool has undergone PG merging,
until the mon leader restarts. To fix, we unconditionally resize
epoch_by_pg to the pg_num of the pool during each beacon report.

Fixes: https://tracker.ceph.com/issues/48212
Signed-off-by: Dan van der Ster <daniel.vanderster@cern.ch>
src/mon/OSDMonitor.cc
src/mon/OSDMonitor.h

index db8e9990e91a372be089126bf322aba3c477160c..590ff5b5d1d89a90fee6b8f6ecf31178f39fcec9 100644 (file)
@@ -341,11 +341,14 @@ bool is_unmanaged_snap_op_permitted(CephContext* cct,
 
 } // anonymous namespace
 
-void LastEpochClean::Lec::report(ps_t ps, epoch_t last_epoch_clean)
+void LastEpochClean::Lec::report(unsigned pg_num, ps_t ps,
+                                epoch_t last_epoch_clean)
 {
-  if (epoch_by_pg.size() <= ps) {
-    epoch_by_pg.resize(ps + 1, 0);
+  if (ps >= pg_num) {
+    // removed PG
+    return;
   }
+  epoch_by_pg.resize(pg_num, 0);
   const auto old_lec = epoch_by_pg[ps];
   if (old_lec >= last_epoch_clean) {
     // stale lec
@@ -377,10 +380,11 @@ void LastEpochClean::remove_pool(uint64_t pool)
   report_by_pool.erase(pool);
 }
 
-void LastEpochClean::report(const pg_t& pg, epoch_t last_epoch_clean)
+void LastEpochClean::report(unsigned pg_num, const pg_t& pg,
+                           epoch_t last_epoch_clean)
 {
   auto& lec = report_by_pool[pg.pool()];
-  return lec.report(pg.ps(), last_epoch_clean);
+  return lec.report(pg_num, pg.ps(), last_epoch_clean);
 }
 
 epoch_t LastEpochClean::get_lower_bound(const OSDMap& latest) const
@@ -4385,7 +4389,10 @@ bool OSDMonitor::prepare_beacon(MonOpRequestRef op)
   osd_epochs[from] = beacon->version;
 
   for (const auto& pg : beacon->pgs) {
-    last_epoch_clean.report(pg, beacon->min_last_epoch_clean);
+    if (auto* pool = osdmap.get_pg_pool(pg.pool()); pool != nullptr) {
+      unsigned pg_num = pool->get_pg_num();
+      last_epoch_clean.report(pg_num, pg, beacon->min_last_epoch_clean);
+    }
   }
 
   if (osdmap.osd_xinfo[from].last_purged_snaps_scrub <
index 599fbc1ac4f22e7be61717a179e6cf2ab419dfe1..0ecdcbb31f10f1f9a4df211592ce62c6505b70d7 100644 (file)
@@ -107,11 +107,11 @@ class LastEpochClean {
     std::vector<epoch_t> epoch_by_pg;
     ps_t next_missing = 0;
     epoch_t floor = std::numeric_limits<epoch_t>::max();
-    void report(ps_t pg, epoch_t last_epoch_clean);
+    void report(unsigned pg_num, ps_t pg, epoch_t last_epoch_clean);
   };
   std::map<uint64_t, Lec> report_by_pool;
 public:
-  void report(const pg_t& pg, epoch_t last_epoch_clean);
+  void report(unsigned pg_num, const pg_t& pg, epoch_t last_epoch_clean);
   void remove_pool(uint64_t pool);
   epoch_t get_lower_bound(const OSDMap& latest) const;