From: Leonid Chernin Date: Thu, 4 Dec 2025 05:04:17 +0000 (+0200) Subject: add structures for failback-in-progress to location X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=eb765acb04b92a829e8d17f9351a30eed5b1f1fc;p=ceph-ci.git add structures for failback-in-progress to location Signed-off-by: Leonid Chernin --- diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc index 94f56e85b1e..4c41ce24b2b 100755 --- a/src/mon/NVMeofGwMap.cc +++ b/src/mon/NVMeofGwMap.cc @@ -374,26 +374,40 @@ int NVMeofGwMap::cfg_start_inter_location_failback( const NvmeGroupKey& group_key, std::string &location, bool &propose_pending) { auto& gws_states = created_gws[group_key]; + bool accept = false; // for all the gateways of the subsystem - for (auto& found_gw_state: gws_states) { - auto st = found_gw_state.second; - if (st.location == location) { - if(st.availability != gw_availability_t::GW_AVAILABLE || - st.sm_state[st.ana_grp_id] != gw_states_per_group_t::GW_STANDBY_STATE) { - dout(4) << "command rejected found gw in state " << st.availability - << " ana grp state " << st.sm_state[st.ana_grp_id] << dendl; - return -EINVAL; - } - } + if (!HAVE_FEATURE(mon->get_quorum_con_features(), NVMEOF_BEACON_DIFF)) { + dout(4) << "Command is not allowed - feature is not installed" + << group_key << dendl; + return -EINVAL; + } + if (failbacks_in_progress.find(group_key) != failbacks_in_progress.end()) + { + dout(4) << "command cannot be accepted since found active failback for a group " + << failbacks_in_progress[group_key] << dendl; + return -EEXIST; } for (auto& found_gw_state: gws_states) { auto st = found_gw_state.second; if (st.location == location) { - auto gw_id = found_gw_state.first; - find_failback_gw(gw_id, group_key, propose_pending, true); + if(st.availability == gw_availability_t::GW_AVAILABLE && + st.sm_state[st.ana_grp_id] == gw_states_per_group_t::GW_STANDBY_STATE) { + dout(10) << "command accepted found gw in state " << st.availability + << " ana grp state " << st.sm_state[st.ana_grp_id] << dendl; + accept = true; + break; + } } } - return 0; + if (accept) { + failbacks_in_progress[group_key] = location; + propose_pending = true; + return 0; + } else { + dout(10) << "command not accepted: not found AVAILABLE GW" + "with ANA grp in standby state" << dendl; + return -EINVAL; + } } void NVMeofGwMap::gw_performed_startup(const NvmeGwId &gw_id, @@ -676,6 +690,7 @@ void NVMeofGwMap::handle_abandoned_ana_groups(bool& propose) find_failback_gw(gw_id, group_key, propose); } } + check_relocate_ana_groups(group_key, propose); if (propose) { validate_gw_map(group_key); increment_gw_epoch(group_key); @@ -683,6 +698,123 @@ void NVMeofGwMap::handle_abandoned_ana_groups(bool& propose) } } +void NVMeofGwMap::check_relocate_ana_groups(const NvmeGroupKey& group_key, + bool &propose) { + /* if location exists in failbacks_in_progress find all gws in location. + * add ana-grp of not Available gws to the list. + * if ana-grp is already active on some gw in location skip it + * for ana-grp in list make relocation. + * if all ana-grps in location active remove location from the map failbacks_in_progress + */ + FailbackLocation location = ""; + std::list reloc_list; + auto& gws_states = created_gws[group_key]; + if (failbacks_in_progress.find(group_key) != failbacks_in_progress.end()) { + location = failbacks_in_progress[group_key]; + uint32_t num_gw_in_location = 0; + uint32_t num_active_ana_in_location = 0; + for (auto& gw_state : gws_states) { // loop for GWs inside group-key + NvmeGwMonState& state = gw_state.second; + if (state.location == location) { + num_gw_in_location ++; + if (state.availability != gw_availability_t::GW_AVAILABLE) { + reloc_list.push_back(state.ana_grp_id); + } else { // in parallel check condition to complete failback-in-process + for (auto& state_it: state.sm_state) { + if (state_it.second == gw_states_per_group_t::GW_ACTIVE_STATE) { + num_active_ana_in_location ++; + } + } + } + } + } + if (num_gw_in_location == num_active_ana_in_location) { + failbacks_in_progress.erase(group_key); // All ana groups of location are in Active + dout(4) << "the location entry is erased "<< location + << " num_ana_groups in location " << num_gw_in_location + << " from the failbacks-in-progress of group " << group_key < gw_epoch; + /* in stretched cluster configuration + * failbacks between locations not happens automatically + * */ + std::map failbacks_in_progress; void to_gmap(std::map& Gmap) const; void track_deleting_gws(const NvmeGroupKey& group_key, @@ -135,9 +139,8 @@ private: void find_failover_candidate( const NvmeGwId &gw_id, const NvmeGroupKey& group_key, NvmeAnaGrpId grpid, bool &propose_pending); - void find_failback_gw( - const NvmeGwId &gw_id, const NvmeGroupKey& group_key, - bool &propose_pending, bool force_inter_location = false); + void find_failback_gw(const NvmeGwId &gw_id, const NvmeGroupKey& group_key, + bool &propose_pending); void set_failover_gw_for_ANA_group( const NvmeGwId &failed_gw_id, const NvmeGroupKey& group_key, const NvmeGwId &gw_id, NvmeAnaGrpId groupid); @@ -158,6 +161,11 @@ private: int find_failover_gw_logic(NvmeGwMonStates& gws_states, NvmeLocation& location, NvmeGwId& min_loaded_gw_id); bool validate_number_locations(int num_gws, int num_locations); + void check_relocate_ana_groups(const NvmeGroupKey& group_key, + bool &propose); + int relocate_ana_grp(const NvmeGwId &src_gw_id, + const NvmeGroupKey& group_key, NvmeAnaGrpId grpid, + NvmeLocation& location, bool &propose); public: int blocklist_gw( @@ -168,7 +176,10 @@ public: using ceph::encode; uint8_t version = 1; if (HAVE_FEATURE(features, NVMEOFHAMAP)) { - version = 2; + version = 2; + } + if (HAVE_FEATURE(features, NVMEOF_BEACON_DIFF)) { + version = 3; } ENCODE_START(version, version, bl); encode(epoch, bl);// global map epoch @@ -178,12 +189,15 @@ public: if (version >= 2) { encode(gw_epoch, bl); } + if (version >=3) { + encode(failbacks_in_progress, bl); + } ENCODE_FINISH(bl); } void decode(ceph::buffer::list::const_iterator &bl) { using ceph::decode; - DECODE_START(2, bl); + DECODE_START(3, bl); decode(epoch, bl); decode(created_gws, bl); @@ -191,6 +205,9 @@ public: if (struct_v >= 2) { decode(gw_epoch, bl); } + if (struct_v >=3) { + decode(failbacks_in_progress, bl); + } DECODE_FINISH(bl); } diff --git a/src/mon/NVMeofGwMon.cc b/src/mon/NVMeofGwMon.cc index 361f4f4120e..cd54637efbe 100644 --- a/src/mon/NVMeofGwMon.cc +++ b/src/mon/NVMeofGwMon.cc @@ -667,6 +667,19 @@ bool NVMeofGwMon::prepare_command(MonOpRequestRef op) <<" location "<< location << dendl; rc = pending_map.cfg_start_inter_location_failback(group_key, location, propose); + if (rc == -EINVAL || rc == -EEXIST) { + err = rc; + sstrm.str(""); + if (rc == -EEXIST) { + sstrm.str("command already set please wait until completed"); + } + if (rc == EINVAL) { + sstrm.str("command cannot be executed"); + } + } + if (rc == 0 && propose == true) { + response = true; + } } getline(sstrm, rs); if (response == false) { diff --git a/src/mon/NVMeofGwSerialize.h b/src/mon/NVMeofGwSerialize.h index b75bebc3107..17b816dd14e 100755 --- a/src/mon/NVMeofGwSerialize.h +++ b/src/mon/NVMeofGwSerialize.h @@ -190,7 +190,7 @@ inline std::ostream& print_gw_created_t( os << " " << state_itr.first <<": " << state_itr.second << ","; } os << "]\n"<< MODULE_PREFFIX << " entity-addr : " << value.addr_vect - << " availability " << value.availability + << " availability " << value.availability << "location " << value.location << " full-startup " << value.performed_full_startup << " ]"; return os; @@ -237,6 +237,10 @@ inline std::ostream& operator<<(std::ostream& os, const NVMeofGwMap value) { os << "\n" << MODULE_PREFFIX << "{ " << group_gws.first << " } -> GW epoch: " << group_gws.second << " }"; } + for (auto& group_gws: value.failbacks_in_progress) { + os << "\n" << MODULE_PREFFIX << "{ " << group_gws.first + << " } -> failback-to: " << group_gws.second << " }"; + } for (auto& group_gws: value.created_gws) { os << "\n" << MODULE_PREFFIX << "{ " << group_gws.first << " } -> { " << group_gws.second << " }"; @@ -681,6 +685,39 @@ inline void decode(std::map& gw_epoch, DECODE_FINISH(bl); } +inline void encode( + const std::map &failbacks_in_progress, + ceph::bufferlist &bl) { + ENCODE_START(1, 1, bl); + encode ((uint32_t)failbacks_in_progress.size(), bl); // number of groups + for (auto& group_failbacks: failbacks_in_progress) { + auto& group_key = group_failbacks.first; + encode(group_key.first, bl); // pool + encode(group_key.second, bl); // group + encode(group_failbacks.second, bl); + } + ENCODE_FINISH(bl); +} + +inline void decode( + std::map &failbacks_in_progress, + ceph::buffer::list::const_iterator &bl) { + failbacks_in_progress.clear(); + uint32_t ngroups; + DECODE_START(1, bl); + decode(ngroups, bl); + for(uint32_t i = 0; i& created_gws, ceph::bufferlist &bl, uint64_t features) { diff --git a/src/mon/NVMeofGwTypes.h b/src/mon/NVMeofGwTypes.h index 91c6b4834b3..940107f2006 100755 --- a/src/mon/NVMeofGwTypes.h +++ b/src/mon/NVMeofGwTypes.h @@ -26,6 +26,7 @@ #include "msg/msg_types.h" using NvmeGwId = std::string; +using FailbackLocation = std::string; using NvmeLocation = std::string; using NvmeGroupKey = std::pair; using NvmeNqnId = std::string;