]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
osd/OSD: fix track_pools_and_pg_num_changes on mapgaps
authorMatan Breizman <mbreizma@redhat.com>
Thu, 21 Dec 2023 10:25:46 +0000 (10:25 +0000)
committerMatan Breizman <mbreizma@redhat.com>
Sun, 31 Mar 2024 12:08:36 +0000 (12:08 +0000)
* track_pools_and_pg_num_changes call is moved before the superblock updates to
  know if we have any OSDMaps *before* trimming them (if possible).
  This is used to identify map gaps.

* track_pools_and_pg_num_changes inner loop is moved into
  _track_pools_and_pg_num_changes.

* lastmap now starts with the newest_map we have, even on mapgap.
  Previously, on mapgaps, we would (falsly) assume that this is the first start
  of this OSD and would ignore the state before the gap. This case
  is fully described in the tracker below:

Fixes: https://tracker.ceph.com/issues/63881
Signed-off-by: Matan Breizman <mbreizma@redhat.com>
src/osd/OSD.cc
src/osd/OSD.h

index 35e7dee45c24737e9470913a3a84f7f9586139bb..49c2a37630ef2b612b116f36477c281ed6709ba6 100644 (file)
@@ -8245,13 +8245,18 @@ void OSD::handle_osd_map(MOSDMap *m)
     rerequest_full_maps();
   }
 
+  track_pools_and_pg_num_changes(added_maps, t);
+
   if (!superblock.maps.empty()) {
     trim_maps(m->cluster_osdmap_trim_lower_bound);
     pg_num_history.prune(superblock.get_oldest_map());
   }
   superblock.insert_osdmap_epochs(first, last);
   if (superblock.maps.num_intervals() > 1) {
-    dout(10) << __func__ << " osd map gap " << superblock.maps << dendl;
+    // we had a map gap and not yet trimmed all the way up to
+    // cluster_osdmap_trim_lower_bound
+    dout(10) << __func__ << " osd maps are not contiguous"
+             << superblock.maps << dendl;
   }
   superblock.current_epoch = last;
 
@@ -8262,8 +8267,6 @@ void OSD::handle_osd_map(MOSDMap *m)
     superblock.clean_thru = last;
   }
 
-  track_pools_and_pg_num_changes(added_maps, t, last);
-
   {
     bufferlist bl;
     ::encode(pg_num_history, bl);
@@ -8308,65 +8311,98 @@ void OSD::handle_osd_map(MOSDMap *m)
 }
 
 /*
- *  For each added map which was sent in this MOSDMap [first, last]:
- *
- *  1) Check if a pool was deleted
- *  2) For existing pools, check if pg_num was changed
- *  3) Check if a pool was created
- *
- *  Track all of the changes above in pg_num_history
+ *  Compare between the previous last_map we had to
+ *  each one of the added_maps.
+ *  Track all of the changes relevant in pg_num_history.
  */
-void OSD::track_pools_and_pg_num_changes(const map<epoch_t,OSDMapRef>& added_maps,
-                                         ObjectStore::Transaction& t,
-                                         epoch_t last)
+void OSD::track_pools_and_pg_num_changes(
+  const map<epoch_t,OSDMapRef>& added_maps,
+  ObjectStore::Transaction& t)
 {
+  epoch_t first = added_maps.begin()->first;
+  epoch_t last = added_maps.rbegin()->first;
+
+  // Unless this is the first start of this OSD,
+  // lastmap should be the newest_map we have.
   OSDMapRef lastmap;
-  for (auto& [added_map_epoch, added_map] : added_maps) {
-    if (!lastmap) {
-      if (!(lastmap = service.try_get_map(added_map_epoch - 1))) {
-        dout(10) << __func__ << " can't get previous map " << added_map_epoch - 1
-                 << " probably first start of this osd" << dendl;
-        continue;
-      }
+
+  if (superblock.maps.empty()) {
+    dout(10) << __func__ << " no maps stored, this is probably "
+             << "the first start of this osd" << dendl;
+    lastmap = added_maps.at(first);
+  } else {
+    if (first > superblock.get_newest_map() + 1) {
+      ceph_assert(first == superblock.cluster_osdmap_trim_lower_bound);
+      dout(20) << __func__ << " can't get previous map "
+               << superblock.get_newest_map()
+               << " first start of this osd after a map gap" << dendl;
+    }
+    if (!(lastmap =
+          service.try_get_map(superblock.get_newest_map()))) {
+      // This is unexpected
+      ceph_abort();
     }
-    ceph_assert(lastmap->get_epoch() + 1 == added_map->get_epoch());
-    for (auto& [pool_id, pg_pool] : lastmap->get_pools()) {
-      if (!added_map->have_pg_pool(pool_id)) {
-       pg_num_history.log_pool_delete(added_map_epoch, pool_id);
-       dout(10) << __func__ << " recording final pg_pool_t for pool "
-                << pool_id << dendl;
-       // this information is needed by _make_pg() if have to restart before
-       // the pool is deleted and need to instantiate a new (zombie) PG[Pool].
-       ghobject_t obj = make_final_pool_info_oid(pool_id);
-       bufferlist bl;
-       encode(pg_pool, bl, CEPH_FEATURES_ALL);
-       string name = lastmap->get_pool_name(pool_id);
-       encode(name, bl);
-       map<string,string> profile;
-       if (lastmap->get_pg_pool(pool_id)->is_erasure()) {
-         profile = lastmap->get_erasure_code_profile(
-           lastmap->get_pg_pool(pool_id)->erasure_code_profile);
-       }
-       encode(profile, bl);
-       t.write(coll_t::meta(), obj, 0, bl.length(), bl);
-      } else if (unsigned new_pg_num = added_map->get_pg_num(pool_id);
-                new_pg_num != pg_pool.get_pg_num()) {
-       dout(10) << __func__ << " recording pool " << pool_id << " pg_num "
-                << pg_pool.get_pg_num() << " -> " << new_pg_num << dendl;
-       pg_num_history.log_pg_num_change(added_map_epoch, pool_id, new_pg_num);
+  }
+
+  // For each added map, record any changes into pg_num_history
+  // and update lastmap afterwards.
+  for (auto& [current_added_map_epoch, current_added_map] : added_maps) {
+    _track_pools_and_pg_num_changes(t, lastmap,
+                                    current_added_map,
+                                    current_added_map_epoch);
+    lastmap = current_added_map;
+  }
+  pg_num_history.epoch = last;
+}
+
+void OSD::_track_pools_and_pg_num_changes(
+  ObjectStore::Transaction& t,
+  const OSDMapRef& lastmap,
+  const OSDMapRef& current_added_map,
+  epoch_t current_added_map_epoch)
+{
+  // 1) Check if a pool was deleted
+  for (auto& [pool_id, pg_pool] : lastmap->get_pools()) {
+    if (!current_added_map->have_pg_pool(pool_id)) {
+      pg_num_history.log_pool_delete(current_added_map_epoch, pool_id);
+      dout(10) << __func__ << " recording final pg_pool_t for pool "
+               << pool_id << dendl;
+      // this information is needed by _make_pg() if have to restart before
+      // the pool is deleted and need to instantiate a new (zombie) PG[Pool].
+      ghobject_t obj = make_final_pool_info_oid(pool_id);
+      bufferlist bl;
+      encode(pg_pool, bl, CEPH_FEATURES_ALL);
+      string name = lastmap->get_pool_name(pool_id);
+      encode(name, bl);
+      map<string,string> profile;
+      if (lastmap->get_pg_pool(pool_id)->is_erasure()) {
+        profile = lastmap->get_erasure_code_profile(
+        lastmap->get_pg_pool(pool_id)->erasure_code_profile);
       }
+      encode(profile, bl);
+      t.write(coll_t::meta(), obj, 0, bl.length(), bl);
+
+    // 2) For existing pools, check if pg_num was changed
+    } else if (unsigned new_pg_num = current_added_map->get_pg_num(pool_id);
+               new_pg_num != pg_pool.get_pg_num()) {
+      dout(10) << __func__ << " recording pool " << pool_id << " pg_num "
+               << pg_pool.get_pg_num() << " -> " << new_pg_num << dendl;
+      pg_num_history.log_pg_num_change(current_added_map_epoch,
+                                       pool_id,
+                                       new_pg_num);
     }
-    for (auto& [pool_id, pg_pool] : added_map->get_pools()) {
-      if (!lastmap->have_pg_pool(pool_id)) {
-       dout(10) << __func__ << " recording new pool " <<pool_id << " pg_num "
-                << pg_pool.get_pg_num() << dendl;
-       pg_num_history.log_pg_num_change(added_map_epoch, pool_id,
-                                        pg_pool.get_pg_num());
-      }
+  }
+
+  // 3) Check if a pool was created
+  for (auto& [pool_id, pg_pool] : current_added_map->get_pools()) {
+    if (!lastmap->have_pg_pool(pool_id)) {
+        dout(10) << __func__ << " recording new pool " <<pool_id << " pg_num "
+                 << pg_pool.get_pg_num() << dendl;
+    pg_num_history.log_pg_num_change(current_added_map_epoch,
+                                     pool_id,
+                                     pg_pool.get_pg_num());
     }
-    lastmap = added_map;
   }
-  pg_num_history.epoch = last;
 }
 
 void OSD::_committed_osd_maps(epoch_t first, epoch_t last, MOSDMap *m)
index cd479dfe9ea4d1d742907ac95525b2d081fc7b6c..e5f3510e2a5239b14d5e6e5031f963b74def7901 100644 (file)
@@ -1675,8 +1675,11 @@ protected:
 
   void handle_osd_map(class MOSDMap *m);
   void track_pools_and_pg_num_changes(const std::map<epoch_t,OSDMapRef>& added_maps,
-                                      ObjectStore::Transaction& t,
-                                      epoch_t last);
+                                      ObjectStore::Transaction& t);
+  void _track_pools_and_pg_num_changes(ObjectStore::Transaction& t,
+                                       const OSDMapRef& lastmap,
+                                       const OSDMapRef& current_added_map,
+                                       epoch_t current_added_map_epoch);
   void _committed_osd_maps(epoch_t first, epoch_t last, class MOSDMap *m);
   void trim_maps(epoch_t oldest);
   void note_down_osd(int osd);