From: Mohit Agrawal Date: Tue, 22 Apr 2025 15:12:01 +0000 (+0530) Subject: osd: Access/Modify epoch maps under mutex in OSDSuperblock class X-Git-Tag: v20.1.1~136^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=d62dd59e0d87db59be474444b383db4a65b736a6;p=ceph.git osd: Access/Modify epoch maps under mutex in OSDSuperblock class The OSDSuperblock object access/modify epoch maps in multiple threads simultaneously due to that OSD is getting crashed. To avoid the crash access the maps under mutex. Fixes: https://tracker.ceph.com/issues/66819 Signed-off-by: Mohit Agrawal (cherry picked from commit 8cf896e382e5416dad15592de1cef23316f23fb8) --- diff --git a/src/crimson/osd/osd.cc b/src/crimson/osd/osd.cc index a90f00291591..674dc211a077 100644 --- a/src/crimson/osd/osd.cc +++ b/src/crimson/osd/osd.cc @@ -870,7 +870,7 @@ void OSD::dump_status(Formatter* f) const f->dump_stream("osd_fsid") << superblock.osd_fsid; f->dump_unsigned("whoami", superblock.whoami); f->dump_string("state", pg_shard_manager.get_osd_state_string()); - f->dump_stream("maps") << superblock.maps; + f->dump_stream("maps") << superblock.get_maps(); f->dump_stream("oldest_map") << superblock.get_oldest_map(); f->dump_stream("newest_map") << superblock.get_newest_map(); f->dump_unsigned("cluster_osdmap_trim_lower_bound", @@ -881,7 +881,7 @@ void OSD::dump_status(Formatter* f) const void OSD::print(std::ostream& out) const { out << "{osd." << superblock.whoami << " " - << superblock.osd_fsid << " maps " << superblock.maps + << superblock.osd_fsid << " maps " << superblock.get_maps() << " tlb:" << superblock.cluster_osdmap_trim_lower_bound << " pgs:" << pg_shard_manager.get_num_pgs() << "}"; @@ -1197,7 +1197,7 @@ seastar::future<> OSD::_handle_osd_map(Ref m) // even if this map isn't from a mon, we may have satisfied our subscription monc->sub_got("osdmap", last); - if (!superblock.maps.empty()) { + if (!superblock.is_maps_empty()) { pg_shard_manager.trim_maps(t, superblock); // TODO: once we support pg splitting, update pg_num_history here //pg_num_history.prune(superblock.get_oldest_map()); diff --git a/src/crimson/osd/shard_services.cc b/src/crimson/osd/shard_services.cc index 4ecb0b7aecb5..3cb39dd11d65 100644 --- a/src/crimson/osd/shard_services.cc +++ b/src/crimson/osd/shard_services.cc @@ -549,7 +549,7 @@ void OSDSingletonState::trim_maps(ceph::os::Transaction& t, DEBUG("removing old osdmap epoch {}", superblock.get_oldest_map()); meta_coll->remove_map(t, superblock.get_oldest_map()); meta_coll->remove_inc_map(t, superblock.get_oldest_map()); - superblock.maps.erase(superblock.get_oldest_map()); + superblock.erase_oldest_maps(); } // we should not trim past osdmaps.cached_key_lower_bound() diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 23b028fc8fd9..2b4d01c04335 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -2793,7 +2793,7 @@ void OSD::asok_command( f->dump_stream("osd_fsid") << superblock.osd_fsid; f->dump_unsigned("whoami", superblock.whoami); f->dump_string("state", get_state_name(get_state())); - f->dump_stream("maps") << superblock.maps; + f->dump_stream("maps") << superblock.get_maps(); f->dump_stream("oldest_map") << superblock.get_oldest_map(); f->dump_stream("newest_map") << superblock.get_newest_map(); f->dump_unsigned("cluster_osdmap_trim_lower_bound", @@ -6912,7 +6912,7 @@ void OSD::start_boot() } dout(1) << __func__ << dendl; set_state(STATE_PREBOOT); - dout(10) << "start_boot - have maps " << superblock.maps << dendl; + dout(10) << "start_boot - have maps " << superblock.get_maps() << dendl; monc->get_version("osdmap", CB_OSD_GetVersion(this)); } @@ -8110,7 +8110,7 @@ void OSD::trim_maps(epoch_t oldest) dout(20) << " removing old osdmap epoch " << superblock.get_oldest_map() << dendl; t.remove(coll_t::meta(), get_osdmap_pobject_name(superblock.get_oldest_map())); t.remove(coll_t::meta(), get_inc_osdmap_pobject_name(superblock.get_oldest_map())); - superblock.maps.erase(superblock.get_oldest_map()); + superblock.erase_oldest_maps(); } service.publish_superblock(superblock); @@ -8411,16 +8411,16 @@ void OSD::handle_osd_map(MOSDMap *m) track_pools_and_pg_num_changes(added_maps, t); - if (!superblock.maps.empty()) { + if (!superblock.is_maps_empty()) { trim_maps(m->cluster_osdmap_trim_lower_bound); pg_num_history.prune(superblock.get_oldest_map()); } superblock.insert_osdmap_epochs(first, last); - if (superblock.maps.num_intervals() > 1) { + if (superblock.get_maps_num_intervals() > 1) { // we had a map gap and not yet trimmed all the way up to // cluster_osdmap_trim_lower_bound dout(10) << __func__ << " osd maps are not contiguous" - << superblock.maps << dendl; + << superblock.get_maps() << dendl; } superblock.current_epoch = last; @@ -8490,7 +8490,7 @@ void OSD::track_pools_and_pg_num_changes( // lastmap should be the newest_map we have. OSDMapRef lastmap; - if (superblock.maps.empty()) { + if (superblock.is_maps_empty()) { dout(10) << __func__ << " no maps stored, this is probably " << "the first start of this osd" << dendl; lastmap = added_maps.at(first); diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index daaa929b9aab..0c47e84dc6c2 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -5822,6 +5822,19 @@ void pg_hit_set_history_t::generate_test_instances(list& ls.back()->history.push_back(pg_hit_set_info_t()); } +// -- GuardedMap -- +void OSDSuperblock::GuardedMap::encode(ceph::buffer::list &bl) const +{ + std::lock_guard lock(map_lock); + ::encode(maps, bl); +} + +void OSDSuperblock::GuardedMap::decode(ceph::buffer::list::const_iterator &bl) +{ + std::lock_guard lock(map_lock); + ::decode(maps, bl); +} + // -- OSDSuperblock -- void OSDSuperblock::encode(ceph::buffer::list &bl) const @@ -5842,7 +5855,7 @@ void OSDSuperblock::encode(ceph::buffer::list &bl) const encode(purged_snaps_last, bl); encode(last_purged_snaps_scrub, bl); encode(cluster_osdmap_trim_lower_bound, bl); - encode(maps, bl); + mapc.encode(bl); ENCODE_FINISH(bl); } @@ -5889,7 +5902,7 @@ void OSDSuperblock::decode(ceph::buffer::list::const_iterator &bl) cluster_osdmap_trim_lower_bound = 0; } if (struct_v >= 11) { - decode(maps, bl); + mapc.decode(bl); } else { insert_osdmap_epochs(oldest_map, newest_map); } @@ -5912,7 +5925,7 @@ void OSDSuperblock::dump(Formatter *f) const f->dump_stream("last_purged_snaps_scrub") << last_purged_snaps_scrub; f->dump_int("cluster_osdmap_trim_lower_bound", cluster_osdmap_trim_lower_bound); - f->dump_stream("maps") << maps; + f->dump_stream("maps") << get_maps(); } void OSDSuperblock::generate_test_instances(list& o) diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 492cc96c6019..5a58dddaf80e 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -49,6 +49,7 @@ #include "common/Formatter.h" #include "common/hobject.h" #include "common/snap_types.h" +#include "common/ceph_mutex.h" #include "common/strtol.h" // for ritoa() #include "HitSet.h" #include "librados/ListObjectImpl.h" @@ -5673,33 +5674,133 @@ inline std::ostream& operator<<(std::ostream& out, const ObjectExtent &ex) // --------------------------------------- class OSDSuperblock { -public: - uuid_d cluster_fsid, osd_fsid; - int32_t whoami = -1; // my role in this fs. - epoch_t current_epoch = 0; // most recent epoch - interval_set maps; // oldest/newest maps we have. +private: + class GuardedMap { +private: + mutable ceph::mutex map_lock = ceph::make_mutex("map_lock"); + interval_set maps; - epoch_t get_oldest_map() const { + epoch_t calc_newest_map() const { if (!maps.empty()) { - return maps.range_start(); + // maps stores [oldest_map, newest_map) (exclusive) + return maps.range_end() - 1; } return 0; } - - epoch_t get_newest_map() const { + epoch_t calc_oldest_map() const { if (!maps.empty()) { - // maps stores [oldest_map, newest_map) (exclusive) - return maps.range_end() - 1; + return maps.range_start(); } return 0; } +public: + GuardedMap() = default; + GuardedMap(const GuardedMap& other) + : map_lock(ceph::make_mutex("map_lock")) + { + std::lock_guard l(other.map_lock); // Lock before copying shared state + maps = other.maps; + } + + GuardedMap& operator=(const GuardedMap& other) { + if (this != &other) { + std::scoped_lock l(map_lock, other.map_lock); + maps = other.maps; + } + return *this; + } + + GuardedMap(GuardedMap&& other) + :map_lock(ceph::make_mutex("map_lock")) + { + std::lock_guard l(other.map_lock); + maps = std::move(other.maps); + } + + GuardedMap& operator=(GuardedMap&& other) noexcept { + if (this != &other) { + std::scoped_lock l(map_lock, other.map_lock); + maps = std::move(other.maps); + } + return *this; + } + + bool is_maps_empty() const { + std::lock_guard lock(map_lock); + return maps.empty(); + } + + void erase_oldest_maps() { + std::lock_guard lock(map_lock); + maps.erase(calc_oldest_map()); + } + + interval_set::size_type get_maps_num_intervals() const { + std::lock_guard lock(map_lock); + return maps.num_intervals(); + } + + interval_set get_maps() const { + std::lock_guard lock(map_lock); + return maps; + } + + epoch_t get_oldest_map() const { + std::lock_guard lock(map_lock); + return calc_oldest_map(); + } + + epoch_t get_newest_map() const { + std::lock_guard lock(map_lock); + return calc_newest_map(); + } void insert_osdmap_epochs(epoch_t first, epoch_t last) { ceph_assert(std::cmp_less_equal(first, last)); interval_set message_epochs; message_epochs.insert(first, last - first + 1); + std::lock_guard lock(map_lock); maps.union_of(message_epochs); - ceph_assert(last == get_newest_map()); + ceph_assert(last == calc_newest_map()); + } + + void encode(ceph::buffer::list &bl) const; + void decode(ceph::buffer::list::const_iterator &bl); + +}; + WRITE_CLASS_ENCODER(GuardedMap); + GuardedMap mapc; +public: + uuid_d cluster_fsid, osd_fsid; + int32_t whoami = -1; // my role in this fs. + epoch_t current_epoch = 0; // most recent epoch + + bool is_maps_empty() const { + return mapc.is_maps_empty(); + } + + void erase_oldest_maps() { + mapc.erase_oldest_maps(); + } + + interval_set::size_type get_maps_num_intervals() const { + return mapc.get_maps_num_intervals(); + } + + interval_set get_maps() const { + return mapc.get_maps(); + } + + epoch_t get_oldest_map() const { + return mapc.get_oldest_map(); + } + + epoch_t get_newest_map() const { + return mapc.get_newest_map(); + } + + void insert_osdmap_epochs(epoch_t first, epoch_t last) { + mapc.insert_osdmap_epochs(first, last); } double weight = 0.0; @@ -5719,6 +5820,13 @@ public: void decode(ceph::buffer::list::const_iterator &bl); void dump(ceph::Formatter *f) const; static void generate_test_instances(std::list& o); + + // Allow default operators to avoid crimson related errors + OSDSuperblock(OSDSuperblock&&) noexcept = default; + OSDSuperblock& operator=(OSDSuperblock&&) noexcept = default; + OSDSuperblock(const OSDSuperblock&) = default; + OSDSuperblock& operator=(const OSDSuperblock&) = default; + OSDSuperblock() = default; }; WRITE_CLASS_ENCODER(OSDSuperblock) @@ -5728,7 +5836,7 @@ inline std::ostream& operator<<(std::ostream& out, const OSDSuperblock& sb) << " osd." << sb.whoami << " " << sb.osd_fsid << " e" << sb.current_epoch - << " maps " << sb.maps + << " maps " << sb.get_maps() << " lci=[" << sb.mounted << "," << sb.clean_thru << "]" << " tlb=" << sb.cluster_osdmap_trim_lower_bound << ")";