From: Matan Breizman Date: Thu, 9 Nov 2023 12:44:42 +0000 (+0000) Subject: crimson/osd: introduce osdmap trimming X-Git-Tag: v19.0.0~4^2~5 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=1c3629d6593da326ff97002f680133137d3cab01;p=ceph-ci.git crimson/osd: introduce osdmap trimming Signed-off-by: Matan Breizman --- diff --git a/src/crimson/osd/osd.cc b/src/crimson/osd/osd.cc index 8da2d566e6b..53004f387b8 100644 --- a/src/crimson/osd/osd.cc +++ b/src/crimson/osd/osd.cc @@ -400,6 +400,9 @@ seastar::future<> OSD::start() ); }).then([this](OSDSuperblock&& sb) { superblock = std::move(sb); + if (!superblock.cluster_osdmap_trim_lower_bound) { + superblock.cluster_osdmap_trim_lower_bound = superblock.get_oldest_map(); + } pg_shard_manager.set_superblock(superblock); return pg_shard_manager.get_local_map(superblock.current_epoch); }).then([this](OSDMapService::local_cached_map_t&& map) { @@ -934,6 +937,16 @@ seastar::future<> OSD::_handle_osd_map(Ref m) logger().info("handle_osd_map epochs [{}..{}], i have {}, src has [{}..{}]", first, last, superblock.get_newest_map(), m->cluster_osdmap_trim_lower_bound, m->newest_map); + + if (superblock.cluster_osdmap_trim_lower_bound < + m->cluster_osdmap_trim_lower_bound) { + superblock.cluster_osdmap_trim_lower_bound = + m->cluster_osdmap_trim_lower_bound; + logger().debug("{} superblock cluster_osdmap_trim_lower_bound new epoch is: {}", + __func__, superblock.cluster_osdmap_trim_lower_bound); + ceph_assert( + superblock.cluster_osdmap_trim_lower_bound >= superblock.get_oldest_map()); + } // make sure there is something new, here, before we bother flushing // the queues and such if (last <= superblock.get_newest_map()) { @@ -964,8 +977,7 @@ seastar::future<> OSD::_handle_osd_map(Ref m) monc->sub_got("osdmap", last); if (!superblock.maps.empty()) { - // TODO: support osdmap trimming - // See: + pg_shard_manager.trim_maps(t, superblock); } superblock.insert_osdmap_epochs(first, last); diff --git a/src/crimson/osd/pg_shard_manager.h b/src/crimson/osd/pg_shard_manager.h index cf13cb52bbf..bb74c18e371 100644 --- a/src/crimson/osd/pg_shard_manager.h +++ b/src/crimson/osd/pg_shard_manager.h @@ -136,6 +136,7 @@ public: FORWARD_TO_OSD_SINGLETON(load_map_bl) FORWARD_TO_OSD_SINGLETON(load_map_bls) FORWARD_TO_OSD_SINGLETON(store_maps) + FORWARD_TO_OSD_SINGLETON(trim_maps) seastar::future<> set_up_epoch(epoch_t e); diff --git a/src/crimson/osd/shard_services.cc b/src/crimson/osd/shard_services.cc index c0688f95769..faa224f99ea 100644 --- a/src/crimson/osd/shard_services.cc +++ b/src/crimson/osd/shard_services.cc @@ -457,6 +457,34 @@ seastar::future<> OSDSingletonState::store_maps(ceph::os::Transaction& t, }); } +// Note: store/set_superblock is called in later OSD::handle_osd_map +// so we use the OSD's superblock reference meanwhile. +void OSDSingletonState::trim_maps(ceph::os::Transaction& t, + OSDSuperblock& superblock) +{ + epoch_t min = + std::min(superblock.cluster_osdmap_trim_lower_bound, + osdmaps.cached_key_lower_bound()); + + if (min <= superblock.get_oldest_map()) { + return; + } + logger().debug("{}: min={} oldest_map={}", __func__, min, superblock.get_oldest_map()); + + // Trim from the superblock's oldest_map up to `min`. + // Break if we have exceeded the txn target size. + while (superblock.get_oldest_map() < min && + t.get_num_ops() < crimson::common::local_conf()->osd_target_transaction_size) { + logger().debug("{}: removing old osdmap epoch {}", __func__, superblock.get_oldest_map()); + meta_coll->remove_map(t, superblock.get_oldest_map()); + superblock.maps.erase(superblock.get_oldest_map()); + } + + // we should not trim past osdmaps.cached_key_lower_bound() + // as there may still be PGs with those map epochs recorded. + ceph_assert(min <= osdmaps.cached_key_lower_bound()); +} + seastar::future> ShardServices::make_pg( OSDMapService::cached_map_t create_map, spg_t pgid, @@ -715,30 +743,34 @@ seastar::future<> OSDSingletonState::send_incremental_map( "superblock's oldest map: {}", __func__, first, superblock.get_oldest_map()); if (first >= superblock.get_oldest_map()) { + if (first < superblock.cluster_osdmap_trim_lower_bound) { + logger().info("{}: cluster osdmap lower bound: {} " + " > first {}, starting with full map", + __func__, superblock.cluster_osdmap_trim_lower_bound, first); + // we don't have the next map the target wants, + // so start with a full map. + first = superblock.cluster_osdmap_trim_lower_bound; + } return load_map_bls( first, superblock.get_newest_map() - ).then([this, &conn, first](auto&& bls) { + ).then([this, &conn](auto&& bls) { auto m = crimson::make_message( monc.get_fsid(), osdmap->get_encoding_features()); - m->cluster_osdmap_trim_lower_bound = first; + m->cluster_osdmap_trim_lower_bound = superblock.cluster_osdmap_trim_lower_bound; m->newest_map = superblock.get_newest_map(); m->maps = std::move(bls); return conn.send(std::move(m)); }); } else { + // See OSDService::send_incremental_map + // just send latest full map return load_map_bl(osdmap->get_epoch() ).then([this, &conn](auto&& bl) mutable { auto m = crimson::make_message( monc.get_fsid(), osdmap->get_encoding_features()); - /* TODO: once we support the tracking of superblock's - * cluster_osdmap_trim_lower_bound, the MOSDMap should - * be populated with this value instead of the oldest_map. - * See: OSD::handle_osd_map for how classic updates the - * cluster's trim lower bound. - */ - m->cluster_osdmap_trim_lower_bound = superblock.get_oldest_map(); + m->cluster_osdmap_trim_lower_bound = superblock.cluster_osdmap_trim_lower_bound; m->newest_map = superblock.get_newest_map(); m->maps.emplace(osdmap->get_epoch(), std::move(bl)); return conn.send(std::move(m)); diff --git a/src/crimson/osd/shard_services.h b/src/crimson/osd/shard_services.h index d71513a6645..50a4bc63313 100644 --- a/src/crimson/osd/shard_services.h +++ b/src/crimson/osd/shard_services.h @@ -316,6 +316,7 @@ private: epoch_t e, bufferlist&& bl); seastar::future<> store_maps(ceph::os::Transaction& t, epoch_t start, Ref m); + void trim_maps(ceph::os::Transaction& t, OSDSuperblock& superblock); }; /**