]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
crimson/osd: introduce osdmap trimming
authorMatan Breizman <mbreizma@redhat.com>
Thu, 9 Nov 2023 12:44:42 +0000 (12:44 +0000)
committerMatan Breizman <mbreizma@redhat.com>
Wed, 29 Nov 2023 09:23:41 +0000 (09:23 +0000)
Signed-off-by: Matan Breizman <mbreizma@redhat.com>
src/crimson/osd/osd.cc
src/crimson/osd/pg_shard_manager.h
src/crimson/osd/shard_services.cc
src/crimson/osd/shard_services.h

index 8da2d566e6b50aac560e3ff28eae06bcf4615af3..53004f387b8862da6992524ea5302e61f821907f 100644 (file)
@@ -400,6 +400,9 @@ seastar::future<> OSD::start()
     );
   }).then([this](OSDSuperblock&& sb) {
     superblock = std::move(sb);
+    if (!superblock.cluster_osdmap_trim_lower_bound) {
+      superblock.cluster_osdmap_trim_lower_bound = superblock.get_oldest_map();
+    }
     pg_shard_manager.set_superblock(superblock);
     return pg_shard_manager.get_local_map(superblock.current_epoch);
   }).then([this](OSDMapService::local_cached_map_t&& map) {
@@ -934,6 +937,16 @@ seastar::future<> OSD::_handle_osd_map(Ref<MOSDMap> m)
   logger().info("handle_osd_map epochs [{}..{}], i have {}, src has [{}..{}]",
                 first, last, superblock.get_newest_map(),
                 m->cluster_osdmap_trim_lower_bound, m->newest_map);
+
+  if (superblock.cluster_osdmap_trim_lower_bound <
+      m->cluster_osdmap_trim_lower_bound) {
+    superblock.cluster_osdmap_trim_lower_bound =
+      m->cluster_osdmap_trim_lower_bound;
+    logger().debug("{} superblock cluster_osdmap_trim_lower_bound new epoch is: {}",
+                   __func__, superblock.cluster_osdmap_trim_lower_bound);
+    ceph_assert(
+      superblock.cluster_osdmap_trim_lower_bound >= superblock.get_oldest_map());
+  }
   // make sure there is something new, here, before we bother flushing
   // the queues and such
   if (last <= superblock.get_newest_map()) {
@@ -964,8 +977,7 @@ seastar::future<> OSD::_handle_osd_map(Ref<MOSDMap> m)
       monc->sub_got("osdmap", last);
 
       if (!superblock.maps.empty()) {
-        // TODO: support osdmap trimming
-        // See: <tracker>
+        pg_shard_manager.trim_maps(t, superblock);
       }
 
       superblock.insert_osdmap_epochs(first, last);
index cf13cb52bbf70c400ebbbad59e6641f6c4135216..bb74c18e37143c829bcb7e367548f79c48fc2968 100644 (file)
@@ -136,6 +136,7 @@ public:
   FORWARD_TO_OSD_SINGLETON(load_map_bl)
   FORWARD_TO_OSD_SINGLETON(load_map_bls)
   FORWARD_TO_OSD_SINGLETON(store_maps)
+  FORWARD_TO_OSD_SINGLETON(trim_maps)
 
   seastar::future<> set_up_epoch(epoch_t e);
 
index c0688f9576996820ed9330c4053addc0ba2878ec..faa224f99eaebd045d32abc1cecfdb208352cc42 100644 (file)
@@ -457,6 +457,34 @@ seastar::future<> OSDSingletonState::store_maps(ceph::os::Transaction& t,
     });
 }
 
+// Note: store/set_superblock is called in later OSD::handle_osd_map
+//       so we use the OSD's superblock reference meanwhile.
+void OSDSingletonState::trim_maps(ceph::os::Transaction& t,
+                                  OSDSuperblock& superblock)
+{
+  epoch_t min =
+    std::min(superblock.cluster_osdmap_trim_lower_bound,
+             osdmaps.cached_key_lower_bound());
+
+  if (min <= superblock.get_oldest_map()) {
+    return;
+  }
+  logger().debug("{}: min={} oldest_map={}", __func__, min,  superblock.get_oldest_map());
+
+  // Trim from the superblock's oldest_map up to `min`.
+  // Break if we have exceeded the txn target size.
+  while (superblock.get_oldest_map() < min &&
+         t.get_num_ops() < crimson::common::local_conf()->osd_target_transaction_size) {
+    logger().debug("{}: removing old osdmap epoch {}", __func__, superblock.get_oldest_map());
+    meta_coll->remove_map(t, superblock.get_oldest_map());
+    superblock.maps.erase(superblock.get_oldest_map());
+  }
+
+  // we should not trim past osdmaps.cached_key_lower_bound()
+  // as there may still be PGs with those map epochs recorded.
+  ceph_assert(min <= osdmaps.cached_key_lower_bound());
+}
+
 seastar::future<Ref<PG>> ShardServices::make_pg(
   OSDMapService::cached_map_t create_map,
   spg_t pgid,
@@ -715,30 +743,34 @@ seastar::future<> OSDSingletonState::send_incremental_map(
                 "superblock's oldest map: {}",
                 __func__, first, superblock.get_oldest_map());
   if (first >= superblock.get_oldest_map()) {
+    if (first < superblock.cluster_osdmap_trim_lower_bound) {
+      logger().info("{}: cluster osdmap lower bound: {} "
+                " > first {}, starting with full map",
+                __func__, superblock.cluster_osdmap_trim_lower_bound, first);
+      // we don't have the next map the target wants,
+      // so start with a full map.
+      first = superblock.cluster_osdmap_trim_lower_bound;
+    }
     return load_map_bls(
       first, superblock.get_newest_map()
-    ).then([this, &conn, first](auto&& bls) {
+    ).then([this, &conn](auto&& bls) {
       auto m = crimson::make_message<MOSDMap>(
        monc.get_fsid(),
        osdmap->get_encoding_features());
-      m->cluster_osdmap_trim_lower_bound = first;
+      m->cluster_osdmap_trim_lower_bound = superblock.cluster_osdmap_trim_lower_bound;
       m->newest_map = superblock.get_newest_map();
       m->maps = std::move(bls);
       return conn.send(std::move(m));
     });
   } else {
+    // See OSDService::send_incremental_map
+    // just send latest full map
     return load_map_bl(osdmap->get_epoch()
     ).then([this, &conn](auto&& bl) mutable {
       auto m = crimson::make_message<MOSDMap>(
        monc.get_fsid(),
        osdmap->get_encoding_features());
-      /* TODO: once we support the tracking of superblock's
-       *       cluster_osdmap_trim_lower_bound, the MOSDMap should
-       *       be populated with this value instead of the oldest_map.
-       *       See: OSD::handle_osd_map for how classic updates the
-       *       cluster's trim lower bound.
-       */
-      m->cluster_osdmap_trim_lower_bound = superblock.get_oldest_map();
+      m->cluster_osdmap_trim_lower_bound = superblock.cluster_osdmap_trim_lower_bound;
       m->newest_map = superblock.get_newest_map();
       m->maps.emplace(osdmap->get_epoch(), std::move(bl));
       return conn.send(std::move(m));
index d71513a6645efed5b6aeaa29d3194017856cd6ce..50a4bc633135928dfb4cbee3fc4b2362c7971740 100644 (file)
@@ -316,6 +316,7 @@ private:
                     epoch_t e, bufferlist&& bl);
   seastar::future<> store_maps(ceph::os::Transaction& t,
                                epoch_t start, Ref<MOSDMap> m);
+  void trim_maps(ceph::os::Transaction& t, OSDSuperblock& superblock);
 };
 
 /**