From 8cf896e382e5416dad15592de1cef23316f23fb8 Mon Sep 17 00:00:00 2001 From: Mohit Agrawal Date: Tue, 22 Apr 2025 20:42:01 +0530 Subject: [PATCH] osd: Access/Modify epoch maps under mutex in OSDSuperblock class The OSDSuperblock object access/modify epoch maps in multiple threads simultaneously due to that OSD is getting crashed. To avoid the crash access the maps under mutex. Fixes: https://tracker.ceph.com/issues/66819 Signed-off-by: Mohit Agrawal --- src/crimson/osd/osd.cc | 6 +- src/crimson/osd/shard_services.cc | 2 +- src/osd/OSD.cc | 14 ++-- src/osd/osd_types.cc | 19 ++++- src/osd/osd_types.h | 134 +++++++++++++++++++++++++++--- 5 files changed, 148 insertions(+), 27 deletions(-) diff --git a/src/crimson/osd/osd.cc b/src/crimson/osd/osd.cc index 27511df85822c..9266cc0b1cf1f 100644 --- a/src/crimson/osd/osd.cc +++ b/src/crimson/osd/osd.cc @@ -870,7 +870,7 @@ void OSD::dump_status(Formatter* f) const f->dump_stream("osd_fsid") << superblock.osd_fsid; f->dump_unsigned("whoami", superblock.whoami); f->dump_string("state", pg_shard_manager.get_osd_state_string()); - f->dump_stream("maps") << superblock.maps; + f->dump_stream("maps") << superblock.get_maps(); f->dump_stream("oldest_map") << superblock.get_oldest_map(); f->dump_stream("newest_map") << superblock.get_newest_map(); f->dump_unsigned("cluster_osdmap_trim_lower_bound", @@ -881,7 +881,7 @@ void OSD::dump_status(Formatter* f) const void OSD::print(std::ostream& out) const { out << "{osd." << superblock.whoami << " " - << superblock.osd_fsid << " maps " << superblock.maps + << superblock.osd_fsid << " maps " << superblock.get_maps() << " tlb:" << superblock.cluster_osdmap_trim_lower_bound << " pgs:" << pg_shard_manager.get_num_pgs() << "}"; @@ -1194,7 +1194,7 @@ seastar::future<> OSD::_handle_osd_map(Ref m) // even if this map isn't from a mon, we may have satisfied our subscription monc->sub_got("osdmap", last); - if (!superblock.maps.empty()) { + if (!superblock.is_maps_empty()) { pg_shard_manager.trim_maps(t, superblock); // TODO: once we support pg splitting, update pg_num_history here //pg_num_history.prune(superblock.get_oldest_map()); diff --git a/src/crimson/osd/shard_services.cc b/src/crimson/osd/shard_services.cc index 517cf6580bd34..77ed7084994d9 100644 --- a/src/crimson/osd/shard_services.cc +++ b/src/crimson/osd/shard_services.cc @@ -547,7 +547,7 @@ void OSDSingletonState::trim_maps(ceph::os::Transaction& t, DEBUG("removing old osdmap epoch {}", superblock.get_oldest_map()); meta_coll->remove_map(t, superblock.get_oldest_map()); meta_coll->remove_inc_map(t, superblock.get_oldest_map()); - superblock.maps.erase(superblock.get_oldest_map()); + superblock.erase_oldest_maps(); } // we should not trim past osdmaps.cached_key_lower_bound() diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 2ea6d25f44293..9b87f6474cf11 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -2793,7 +2793,7 @@ void OSD::asok_command( f->dump_stream("osd_fsid") << superblock.osd_fsid; f->dump_unsigned("whoami", superblock.whoami); f->dump_string("state", get_state_name(get_state())); - f->dump_stream("maps") << superblock.maps; + f->dump_stream("maps") << superblock.get_maps(); f->dump_stream("oldest_map") << superblock.get_oldest_map(); f->dump_stream("newest_map") << superblock.get_newest_map(); f->dump_unsigned("cluster_osdmap_trim_lower_bound", @@ -6908,7 +6908,7 @@ void OSD::start_boot() } dout(1) << __func__ << dendl; set_state(STATE_PREBOOT); - dout(10) << "start_boot - have maps " << superblock.maps << dendl; + dout(10) << "start_boot - have maps " << superblock.get_maps() << dendl; monc->get_version("osdmap", CB_OSD_GetVersion(this)); } @@ -8106,7 +8106,7 @@ void OSD::trim_maps(epoch_t oldest) dout(20) << " removing old osdmap epoch " << superblock.get_oldest_map() << dendl; t.remove(coll_t::meta(), get_osdmap_pobject_name(superblock.get_oldest_map())); t.remove(coll_t::meta(), get_inc_osdmap_pobject_name(superblock.get_oldest_map())); - superblock.maps.erase(superblock.get_oldest_map()); + superblock.erase_oldest_maps(); } service.publish_superblock(superblock); @@ -8407,16 +8407,16 @@ void OSD::handle_osd_map(MOSDMap *m) track_pools_and_pg_num_changes(added_maps, t); - if (!superblock.maps.empty()) { + if (!superblock.is_maps_empty()) { trim_maps(m->cluster_osdmap_trim_lower_bound); pg_num_history.prune(superblock.get_oldest_map()); } superblock.insert_osdmap_epochs(first, last); - if (superblock.maps.num_intervals() > 1) { + if (superblock.get_maps_num_intervals() > 1) { // we had a map gap and not yet trimmed all the way up to // cluster_osdmap_trim_lower_bound dout(10) << __func__ << " osd maps are not contiguous" - << superblock.maps << dendl; + << superblock.get_maps() << dendl; } superblock.current_epoch = last; @@ -8486,7 +8486,7 @@ void OSD::track_pools_and_pg_num_changes( // lastmap should be the newest_map we have. OSDMapRef lastmap; - if (superblock.maps.empty()) { + if (superblock.is_maps_empty()) { dout(10) << __func__ << " no maps stored, this is probably " << "the first start of this osd" << dendl; lastmap = added_maps.at(first); diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index 0301975cae363..d5e6f9c61cab3 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -5822,6 +5822,19 @@ void pg_hit_set_history_t::generate_test_instances(list& ls.back()->history.push_back(pg_hit_set_info_t()); } +// -- GuardedMap -- +void OSDSuperblock::GuardedMap::encode(ceph::buffer::list &bl) const +{ + std::lock_guard lock(map_lock); + ::encode(maps, bl); +} + +void OSDSuperblock::GuardedMap::decode(ceph::buffer::list::const_iterator &bl) +{ + std::lock_guard lock(map_lock); + ::decode(maps, bl); +} + // -- OSDSuperblock -- void OSDSuperblock::encode(ceph::buffer::list &bl) const @@ -5842,7 +5855,7 @@ void OSDSuperblock::encode(ceph::buffer::list &bl) const encode(purged_snaps_last, bl); encode(last_purged_snaps_scrub, bl); encode(cluster_osdmap_trim_lower_bound, bl); - encode(maps, bl); + mapc.encode(bl); ENCODE_FINISH(bl); } @@ -5889,7 +5902,7 @@ void OSDSuperblock::decode(ceph::buffer::list::const_iterator &bl) cluster_osdmap_trim_lower_bound = 0; } if (struct_v >= 11) { - decode(maps, bl); + mapc.decode(bl); } else { insert_osdmap_epochs(oldest_map, newest_map); } @@ -5912,7 +5925,7 @@ void OSDSuperblock::dump(Formatter *f) const f->dump_stream("last_purged_snaps_scrub") << last_purged_snaps_scrub; f->dump_int("cluster_osdmap_trim_lower_bound", cluster_osdmap_trim_lower_bound); - f->dump_stream("maps") << maps; + f->dump_stream("maps") << get_maps(); } void OSDSuperblock::generate_test_instances(list& o) diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index be34ad0f717b7..27f4d2eded938 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -49,6 +49,7 @@ #include "common/Formatter.h" #include "common/hobject.h" #include "common/snap_types.h" +#include "common/ceph_mutex.h" #include "common/strtol.h" // for ritoa() #include "HitSet.h" #include "librados/ListObjectImpl.h" @@ -5662,33 +5663,133 @@ inline std::ostream& operator<<(std::ostream& out, const ObjectExtent &ex) // --------------------------------------- class OSDSuperblock { -public: - uuid_d cluster_fsid, osd_fsid; - int32_t whoami = -1; // my role in this fs. - epoch_t current_epoch = 0; // most recent epoch - interval_set maps; // oldest/newest maps we have. +private: + class GuardedMap { +private: + mutable ceph::mutex map_lock = ceph::make_mutex("map_lock"); + interval_set maps; - epoch_t get_oldest_map() const { + epoch_t calc_newest_map() const { if (!maps.empty()) { - return maps.range_start(); + // maps stores [oldest_map, newest_map) (exclusive) + return maps.range_end() - 1; } return 0; } - - epoch_t get_newest_map() const { + epoch_t calc_oldest_map() const { if (!maps.empty()) { - // maps stores [oldest_map, newest_map) (exclusive) - return maps.range_end() - 1; + return maps.range_start(); } return 0; } +public: + GuardedMap() = default; + GuardedMap(const GuardedMap& other) + : map_lock(ceph::make_mutex("map_lock")) + { + std::lock_guard l(other.map_lock); // Lock before copying shared state + maps = other.maps; + } + + GuardedMap& operator=(const GuardedMap& other) { + if (this != &other) { + std::scoped_lock l(map_lock, other.map_lock); + maps = other.maps; + } + return *this; + } + + GuardedMap(GuardedMap&& other) + :map_lock(ceph::make_mutex("map_lock")) + { + std::lock_guard l(other.map_lock); + maps = std::move(other.maps); + } + + GuardedMap& operator=(GuardedMap&& other) noexcept { + if (this != &other) { + std::scoped_lock l(map_lock, other.map_lock); + maps = std::move(other.maps); + } + return *this; + } + + bool is_maps_empty() const { + std::lock_guard lock(map_lock); + return maps.empty(); + } + + void erase_oldest_maps() { + std::lock_guard lock(map_lock); + maps.erase(calc_oldest_map()); + } + + interval_set::size_type get_maps_num_intervals() const { + std::lock_guard lock(map_lock); + return maps.num_intervals(); + } + + interval_set get_maps() const { + std::lock_guard lock(map_lock); + return maps; + } + + epoch_t get_oldest_map() const { + std::lock_guard lock(map_lock); + return calc_oldest_map(); + } + + epoch_t get_newest_map() const { + std::lock_guard lock(map_lock); + return calc_newest_map(); + } void insert_osdmap_epochs(epoch_t first, epoch_t last) { ceph_assert(std::cmp_less_equal(first, last)); interval_set message_epochs; message_epochs.insert(first, last - first + 1); + std::lock_guard lock(map_lock); maps.union_of(message_epochs); - ceph_assert(last == get_newest_map()); + ceph_assert(last == calc_newest_map()); + } + + void encode(ceph::buffer::list &bl) const; + void decode(ceph::buffer::list::const_iterator &bl); + +}; + WRITE_CLASS_ENCODER(GuardedMap); + GuardedMap mapc; +public: + uuid_d cluster_fsid, osd_fsid; + int32_t whoami = -1; // my role in this fs. + epoch_t current_epoch = 0; // most recent epoch + + bool is_maps_empty() const { + return mapc.is_maps_empty(); + } + + void erase_oldest_maps() { + mapc.erase_oldest_maps(); + } + + interval_set::size_type get_maps_num_intervals() const { + return mapc.get_maps_num_intervals(); + } + + interval_set get_maps() const { + return mapc.get_maps(); + } + + epoch_t get_oldest_map() const { + return mapc.get_oldest_map(); + } + + epoch_t get_newest_map() const { + return mapc.get_newest_map(); + } + + void insert_osdmap_epochs(epoch_t first, epoch_t last) { + mapc.insert_osdmap_epochs(first, last); } double weight = 0.0; @@ -5708,6 +5809,13 @@ public: void decode(ceph::buffer::list::const_iterator &bl); void dump(ceph::Formatter *f) const; static void generate_test_instances(std::list& o); + + // Allow default operators to avoid crimson related errors + OSDSuperblock(OSDSuperblock&&) noexcept = default; + OSDSuperblock& operator=(OSDSuperblock&&) noexcept = default; + OSDSuperblock(const OSDSuperblock&) = default; + OSDSuperblock& operator=(const OSDSuperblock&) = default; + OSDSuperblock() = default; }; WRITE_CLASS_ENCODER(OSDSuperblock) @@ -5717,7 +5825,7 @@ inline std::ostream& operator<<(std::ostream& out, const OSDSuperblock& sb) << " osd." << sb.whoami << " " << sb.osd_fsid << " e" << sb.current_epoch - << " maps " << sb.maps + << " maps " << sb.get_maps() << " lci=[" << sb.mounted << "," << sb.clean_thru << "]" << " tlb=" << sb.cluster_osdmap_trim_lower_bound << ")"; -- 2.39.5