From 01d0d69d37fb881823c3debe7552a785dcb2c399 Mon Sep 17 00:00:00 2001 From: Leonid Chernin Date: Thu, 23 Oct 2025 08:48:24 +0300 Subject: [PATCH] nvmeofgw: added support to nvmeof stretched cluster: nope GW commands added : set location and set admin state enable/disable added start-failback command. failover logic is impacted by GW location implement GW admin commands enable/disable added map for location-failback-in-progress failback between locations happens only by monitor command implemented new ana-group relocation process used when inter-location failback command sent added upgrade rules fixes: https://tracker.ceph.com/issues/74210 Signed-off-by: Leonid Chernin --- src/mon/MonCommands.h | 29 +++ src/mon/NVMeofGwMap.cc | 419 ++++++++++++++++++++++++++++++------ src/mon/NVMeofGwMap.h | 31 ++- src/mon/NVMeofGwMon.cc | 82 ++++++- src/mon/NVMeofGwSerialize.h | 61 +++++- src/mon/NVMeofGwTypes.h | 9 + 6 files changed, 564 insertions(+), 67 deletions(-) diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h index ca9907c51e6..484603e3d5e 100644 --- a/src/mon/MonCommands.h +++ b/src/mon/MonCommands.h @@ -1459,6 +1459,35 @@ COMMAND("nvme-gw listeners" " show all nvmeof gateways listeners within (pool, group)", "mon", "r") +COMMAND("nvme-gw enable" + " name=id,type=CephString" + " name=pool,type=CephString" + " name=group,type=CephString", + "administratively enables nvmeof gateway id for (pool, group)", + "mgr", "rw") + +COMMAND("nvme-gw disable" + " name=id,type=CephString" + " name=pool,type=CephString" + " name=group,type=CephString", + "administratively disables nvmeof gateway id for (pool, group)", + "mgr", "rw") + +COMMAND("nvme-gw set-location" + " name=id,type=CephString" + " name=pool,type=CephString" + " name=group,type=CephString" + " name=location,type=CephString", + "set location for nvmeof gateway id for (pool, group)", + "mgr", "rw") + +COMMAND("nvme-gw start-failback" + " name=pool,type=CephString" + " name=group,type=CephString" + " name=location,type=CephString", + "start failbacks for recovered location within (pool, group)", + "mgr", "rw") + // these are tell commands that were implemented as CLI commands in // the broken pre-octopus way that we want to allow to work when a // monitor has upgraded to octopus+ but the monmap min_mon_release is diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc index 5d1a42089e4..03ca657e126 100755 --- a/src/mon/NVMeofGwMap.cc +++ b/src/mon/NVMeofGwMap.cc @@ -259,6 +259,141 @@ int NVMeofGwMap::do_delete_gw( return -EINVAL; } +int NVMeofGwMap::cfg_admin_state_change(const NvmeGwId &gw_id, + const NvmeGroupKey& group_key, + gw_admin_state_t state, bool &propose_pending, bool test) +{ + auto& gws_states = created_gws[group_key]; + auto gw_state = gws_states.find(gw_id); + if (gw_state != gws_states.end()) { + auto& st = gw_state->second; + if (state == gw_admin_state_t::GW_ADMIN_DISABLED) { + if (st.gw_admin_state == gw_admin_state_t::GW_ADMIN_ENABLED) { + dout(4) << "GW-id set admin Disabled " << group_key + << " " << gw_id << dendl; + if (st.availability == gw_availability_t::GW_AVAILABLE) { + skip_failovers_for_group(group_key, 5); + process_gw_map_gw_down(gw_id, group_key, propose_pending); + } + propose_pending = true; + } + } else if (state == gw_admin_state_t::GW_ADMIN_ENABLED) { + if (st.gw_admin_state == gw_admin_state_t::GW_ADMIN_DISABLED) { + dout(4) << "GW-id set admin Enabled " << group_key + << " " << gw_id << dendl; + propose_pending = true; + } + } + st.gw_admin_state = state; + } else { + dout(4) << "GW-id not created yet " << group_key << " " << gw_id << dendl; + return -EINVAL; + } + return 0; +} + +bool NVMeofGwMap::validate_number_locations(int num_gws, int num_locations) +{ + return true; // This function may have not empty body in the nex commit +} + +int NVMeofGwMap::cfg_set_location(const NvmeGwId &gw_id, + const NvmeGroupKey& group_key, + std::string &location, bool &propose_pending, bool test) { + // validate that location differs from gw location + auto& gws_states = created_gws[group_key]; + auto gw_state = gws_states.find(gw_id); + std::set locations; + if (!HAVE_FEATURE(mon->get_quorum_con_features(), NVMEOF_BEACON_DIFF)) { + dout(4) << "Command is not allowed - feature is not installed" + << group_key << " " << gw_id << dendl; + return -EINVAL; + } + int num_gws = gws_states.size(); + if (gw_state != gws_states.end()) { + auto& st = gw_state->second; + if (st.location == location) { + dout(4) << "GW-id same location is set " << group_key + << " " << gw_id << " " << location << dendl; + return 0; + } else { + bool last_gw = true; + for (auto& states: created_gws[group_key]) { + auto &state = states.second; + // calculate number set locations + locations.insert(state.location); + if (state.location == st.location && states.first != gw_id) { + last_gw = false; + break; + } + } + if (last_gw) { // this location would be removed so erase from set + locations.erase(st.location); + } + locations.insert(location); + dout(10) << "num GWs " << num_gws << " num set locations " + << locations.size() << dendl; + bool rc = validate_number_locations(num_gws, locations.size()); + if (rc ==false) { + dout(4) << "Try to define invalid number of locations " + << locations.size() << dendl; + return -EINVAL; + } + if (last_gw) { + dout(4) << "remove location:last gw-id " << gw_id << " location " + << st.location << dendl; + } + st.location = location; + dout(10) << "GW-id location is set " << group_key + << " " << gw_id << " " << location << dendl; + propose_pending = true; + return 0; + } + } else { + dout(4) << "GW-id not created yet " << group_key << " " << gw_id << dendl; + return -EINVAL; + } +} + +int NVMeofGwMap::cfg_start_inter_location_failback( + const NvmeGroupKey& group_key, + std::string &location, bool &propose_pending) { + auto& gws_states = created_gws[group_key]; + bool accept = false; + // for all the gateways of the subsystem + if (!HAVE_FEATURE(mon->get_quorum_con_features(), NVMEOF_BEACON_DIFF)) { + dout(4) << "Command is not allowed - feature is not installed" + << group_key << dendl; + return -EINVAL; + } + if (failbacks_in_progress.find(group_key) != failbacks_in_progress.end()) { + dout(4) << "command cannot be accepted since found active failback for a group " + << failbacks_in_progress[group_key] << dendl; + return -EEXIST; + } + for (auto& found_gw_state: gws_states) { + auto st = found_gw_state.second; + if (st.location == location) { + if(st.availability == gw_availability_t::GW_AVAILABLE && + st.sm_state[st.ana_grp_id] == gw_states_per_group_t::GW_STANDBY_STATE) { + dout(10) << "command accepted found gw in state " << st.availability + << " ana grp state " << st.sm_state[st.ana_grp_id] << dendl; + accept = true; + break; + } + } + } + if (accept) { + failbacks_in_progress[group_key] = location; + propose_pending = true; + return 0; + } else { + dout(10) << "command not accepted: not found AVAILABLE GW" + "with ANA grp in standby state" << dendl; + return -EINVAL; + } +} + void NVMeofGwMap::gw_performed_startup(const NvmeGwId &gw_id, const NvmeGroupKey& group_key, bool &propose_pending) { @@ -520,6 +655,7 @@ void NVMeofGwMap::handle_abandoned_ana_groups(bool& propose) find_failback_gw(gw_id, group_key, propose); } } + check_relocate_ana_groups(group_key, propose); if (propose) { validate_gw_map(group_key); increment_gw_epoch(group_key); @@ -527,6 +663,123 @@ void NVMeofGwMap::handle_abandoned_ana_groups(bool& propose) } } +void NVMeofGwMap::check_relocate_ana_groups(const NvmeGroupKey& group_key, + bool &propose) { + /* if location exists in failbacks_in_progress find all gws in location. + * add ana-grp of not Available gws to the list. + * if ana-grp is already active on some gw in location skip it + * for ana-grp in list make relocation. + * if all ana-grps in location active remove location from the map failbacks_in_progress + */ + FailbackLocation location = ""; + std::list reloc_list; + auto& gws_states = created_gws[group_key]; + if (failbacks_in_progress.find(group_key) != failbacks_in_progress.end()) { + location = failbacks_in_progress[group_key]; + uint32_t num_gw_in_location = 0; + uint32_t num_active_ana_in_location = 0; + for (auto& gw_state : gws_states) { // loop for GWs inside group-key + NvmeGwMonState& state = gw_state.second; + if (state.location == location) { + num_gw_in_location ++; + if (state.availability != gw_availability_t::GW_AVAILABLE) { + reloc_list.push_back(state.ana_grp_id); + } else { // in parallel check condition to complete failback-in-process + for (auto& state_it: state.sm_state) { + if (state_it.second == gw_states_per_group_t::GW_ACTIVE_STATE) { + num_active_ana_in_location ++; + } + } + } + } + } + if (num_gw_in_location == num_active_ana_in_location) { + failbacks_in_progress.erase(group_key); // All ana groups of location are in Active + dout(4) << "the location entry is erased "<< location + << " num_ana_groups in location " << num_gw_in_location + << " from the failbacks-in-progress of group " << group_key < current_ana_groups_in_gw) { + min_num_ana_groups_in_gw = current_ana_groups_in_gw; + min_loaded_gw_id = found_gw_state.first; + dout(10) << "choose: gw-id min_ana_groups " << min_loaded_gw_id + << current_ana_groups_in_gw << " min " + << min_num_ana_groups_in_gw << dendl; + } + } + } + if (min_loaded_gw_id !=ILLEGAL_GW_ID) { // some GW choosen + return 0; + } else if (num_busy) { + dout(4) << "some GWs are busy " << num_busy + << "num Available " << num_gws << dendl; + return -EBUSY; + } else { + dout(4) << "no GWs in Active state. num Available " << num_gws << dendl; + return -ENOENT; + } +} + void NVMeofGwMap::find_failover_candidate( const NvmeGwId &gw_id, const NvmeGroupKey& group_key, NvmeAnaGrpId grpid, bool &propose_pending) { dout(10) << __func__<< " " << gw_id << dendl; -#define ILLEGAL_GW_ID " " -#define MIN_NUM_ANA_GROUPS 0xFFF - int min_num_ana_groups_in_gw = 0; - int current_ana_groups_in_gw = 0; + NvmeLocation ana_location = ""; std::chrono::system_clock::time_point now = std::chrono::system_clock::now(); NvmeGwId min_loaded_gw_id = ILLEGAL_GW_ID; auto& gws_states = created_gws[group_key]; @@ -648,40 +972,19 @@ void NVMeofGwMap::find_failover_candidate( gw_state->second.standby_state(grpid); return ; } + if (st.ana_grp_id == grpid) { + ana_location = st.location; // found original location of the ANA group + dout(10) << "Found location " << ana_location + << " for anagrp " << grpid << dendl; + } } // Find a GW that takes over the ANA group(s) - min_num_ana_groups_in_gw = MIN_NUM_ANA_GROUPS; - min_loaded_gw_id = ILLEGAL_GW_ID; - - // for all the gateways of the subsystem - for (auto& found_gw_state: gws_states) { - auto st = found_gw_state.second; - if (st.availability == gw_availability_t::GW_AVAILABLE) { - current_ana_groups_in_gw = 0; - for (auto& state_itr: created_gws[group_key][gw_id].sm_state) { - NvmeAnaGrpId anagrp = state_itr.first; - if ((st.sm_state[anagrp] == - gw_states_per_group_t::GW_OWNER_WAIT_FAILBACK_PREPARED) || - (st.sm_state[anagrp] == - gw_states_per_group_t::GW_WAIT_FAILBACK_PREPARED) || - (st.sm_state[anagrp] == - gw_states_per_group_t::GW_WAIT_BLOCKLIST_CMPL)) { - current_ana_groups_in_gw = 0xFFFF; - break; // dont take into account GWs in the transitive state - } else if (st.sm_state[anagrp] == - gw_states_per_group_t::GW_ACTIVE_STATE) { - // how many ANA groups are handled by this GW - current_ana_groups_in_gw++; - } - } - if (min_num_ana_groups_in_gw > current_ana_groups_in_gw) { - min_num_ana_groups_in_gw = current_ana_groups_in_gw; - min_loaded_gw_id = found_gw_state.first; - dout(10) << "choose: gw-id min_ana_groups " << min_loaded_gw_id - << current_ana_groups_in_gw << " min " - << min_num_ana_groups_in_gw << dendl; - } - } + //Find GW among the GWs belong to the same location + int rc = find_failover_gw_logic(gws_states, ana_location, min_loaded_gw_id); + if (rc == -ENOENT) { + ana_location = ""; // looks at all GWs + dout(10) << "Find Failover GW -look at all Gateways in the pool/group" << dendl; + rc = find_failover_gw_logic(gws_states, ana_location, min_loaded_gw_id); } if (min_loaded_gw_id != ILLEGAL_GW_ID) { propose_pending = true; @@ -910,7 +1213,8 @@ void NVMeofGwMap::fsm_handle_to_expired( for (auto& gw_state: created_gws[group_key]) { auto& st = gw_state.second; // group owner - if (st.ana_grp_id == grpid) { + if (st.sm_state[grpid] == + gw_states_per_group_t::GW_OWNER_WAIT_FAILBACK_PREPARED) { grp_owner_found = true; if (st.availability == gw_availability_t::GW_AVAILABLE) { if (!(fbp_gw_state.last_gw_map_epoch_valid && @@ -924,34 +1228,29 @@ void NVMeofGwMap::fsm_handle_to_expired( } cancel_timer(gw_id, group_key, grpid); map_modified = true; - if ((st.sm_state[grpid] == - gw_states_per_group_t::GW_OWNER_WAIT_FAILBACK_PREPARED) && - (st.availability == gw_availability_t::GW_AVAILABLE)) { - // Previous failover GW set to standby - fbp_gw_state.standby_state(grpid); + // unconditionaly previous failover GW set to standby + fbp_gw_state.standby_state(grpid); + if (st.availability == gw_availability_t::GW_AVAILABLE) { st.active_state(grpid); dout(10) << "Expired Failback-preparation timer from GW " - << gw_id << " ANA groupId "<< grpid << dendl; - map_modified = true; + << gw_id << " ANA groupId "<< grpid << dendl; break; - } else if ((st.sm_state[grpid] == - gw_states_per_group_t::GW_STANDBY_STATE) && - (st.availability == gw_availability_t::GW_AVAILABLE)) { - // GW failed during the persistency interval - st.standby_state(grpid); - dout(10) << "Failback unsuccessfull. GW: " << gw_state.first - << " becomes Standby for the ANA groupId " << grpid << dendl; } - fbp_gw_state.standby_state(grpid); - dout(10) << "Failback unsuccessfull GW: " << gw_id - << " becomes Standby for the ANA groupId " << grpid << dendl; - map_modified = true; - break; + else { + st.standby_state(grpid); + dout(10) << "GW failed durind failback/relocation persistency interval" + << gw_state.first << dendl; + } } } if (grp_owner_found == false) { - // when GW group owner is deleted the fbk gw is put to standby - dout(4) << "group owner not found " << grpid << " GW: " << gw_id << dendl; + cancel_timer(gw_id, group_key, grpid); + fbp_gw_state.standby_state(grpid); + dout(4) << "group owner/relocation target not found " + << grpid << " GW: " << gw_id << dendl; + dout(10) << "Failback unsuccessfull GW: " << gw_id + << " becomes Standby for the ANA groupId " << grpid << dendl; + map_modified = true; } } else if (fbp_gw_state.sm_state[grpid] == gw_states_per_group_t::GW_WAIT_BLOCKLIST_CMPL) { diff --git a/src/mon/NVMeofGwMap.h b/src/mon/NVMeofGwMap.h index bfc45a00985..2cc11150a3a 100755 --- a/src/mon/NVMeofGwMap.h +++ b/src/mon/NVMeofGwMap.h @@ -61,6 +61,10 @@ public: * updating the map with a change affecting gws in group_key. */ std::map gw_epoch; + /* in stretched cluster configuration + * failbacks between locations does not happen automatically + * */ + std::map failbacks_in_progress; void to_gmap(std::map& Gmap) const; void track_deleting_gws(const NvmeGroupKey& group_key, @@ -70,6 +74,12 @@ public: int cfg_add_gw(const NvmeGwId &gw_id, const NvmeGroupKey& group_key, uint64_t features); int cfg_delete_gw(const NvmeGwId &gw_id, const NvmeGroupKey& group_key); + int cfg_admin_state_change(const NvmeGwId &gw_id, const NvmeGroupKey& group_key, + gw_admin_state_t state, bool &propose_pending, bool test = false); + int cfg_set_location(const NvmeGwId &gw_id, const NvmeGroupKey& group_key, + std::string &location, bool &propose_pending, bool test = false); + int cfg_start_inter_location_failback(const NvmeGroupKey& group_key, + std::string &location, bool &propose_pending); void process_gw_map_ka( const NvmeGwId &gw_id, const NvmeGroupKey& group_key, epoch_t& last_osd_epoch, bool &propose_pending); @@ -146,6 +156,14 @@ private: void validate_gw_map( const NvmeGroupKey& group_key); void increment_gw_epoch(const NvmeGroupKey& group_key); + int find_failover_gw_logic(NvmeGwMonStates& gws_states, + NvmeLocation& location, NvmeGwId& min_loaded_gw_id); + bool validate_number_locations(int num_gws, int num_locations); + void check_relocate_ana_groups(const NvmeGroupKey& group_key, + bool &propose); + int relocate_ana_grp(const NvmeGwId &src_gw_id, + const NvmeGroupKey& group_key, NvmeAnaGrpId grpid, + NvmeLocation& location, bool &propose); public: int blocklist_gw( @@ -156,7 +174,10 @@ public: using ceph::encode; uint8_t version = 1; if (HAVE_FEATURE(features, NVMEOFHAMAP)) { - version = 2; + version = 2; + } + if (HAVE_FEATURE(features, NVMEOF_BEACON_DIFF)) { + version = 3; } ENCODE_START(version, version, bl); encode(epoch, bl);// global map epoch @@ -166,12 +187,15 @@ public: if (version >= 2) { encode(gw_epoch, bl); } + if (version >=3) { + encode(failbacks_in_progress, bl); + } ENCODE_FINISH(bl); } void decode(ceph::buffer::list::const_iterator &bl) { using ceph::decode; - DECODE_START(2, bl); + DECODE_START(3, bl); decode(epoch, bl); decode(created_gws, bl); @@ -179,6 +203,9 @@ public: if (struct_v >= 2) { decode(gw_epoch, bl); } + if (struct_v >=3) { + decode(failbacks_in_progress, bl); + } DECODE_FINISH(bl); } diff --git a/src/mon/NVMeofGwMon.cc b/src/mon/NVMeofGwMon.cc index b4c5113ff37..63a4f5a9eb2 100644 --- a/src/mon/NVMeofGwMon.cc +++ b/src/mon/NVMeofGwMon.cc @@ -510,6 +510,8 @@ bool NVMeofGwMon::preprocess_command(MonOpRequestRef op) f->open_object_section("stat"); f->dump_string("gw-id", gw_id); f->dump_unsigned("anagrp-id",state.ana_grp_id+1); + f->dump_string("location", state.location); + f->dump_unsigned("admin state", (uint32_t)state.gw_admin_state); f->dump_unsigned("num-namespaces", num_ns[state.ana_grp_id+1]); f->dump_unsigned("performed-full-startup", state.performed_full_startup); std::stringstream sstrm1; @@ -669,18 +671,91 @@ bool NVMeofGwMon::prepare_command(MonOpRequestRef op) response = true; } } + else if (prefix == "nvme-gw enable" || prefix == "nvme-gw disable") { + std::string id, pool, group; + cmd_getval(cmdmap, "id", id); + cmd_getval(cmdmap, "pool", pool); + cmd_getval(cmdmap, "group", group); + auto group_key = std::make_pair(pool, group); + dout(10) << " id "<< id <<" pool "<< pool << " group "<< group + << " " << prefix << dendl; + gw_admin_state_t set = (prefix == "nvme-gw enable") ? + gw_admin_state_t::GW_ADMIN_ENABLED : + gw_admin_state_t::GW_ADMIN_DISABLED; + bool propose = false; + rc = pending_map.cfg_admin_state_change(id, group_key, set, propose); + if (rc == -EINVAL) { + err = rc; + dout (4) << "Error: GW cannot be set to admin state " << id + << " " << pool << " " << group << " rc " << rc << dendl; + sstrm.str(""); + } + // propose pending would be generated by the PaxosService + if (rc == 0 && propose == true) { + response = true; + } + } else if (prefix == "nvme-gw set-location") { + + std::string id, pool, group, location; + cmd_getval(cmdmap, "id", id); + cmd_getval(cmdmap, "pool", pool); + cmd_getval(cmdmap, "group", group); + cmd_getval(cmdmap, "location", location); + auto group_key = std::make_pair(pool, group); + dout(10) << " id "<< id <<" pool "<< pool << " group "<< group + <<" location "<< location << dendl; + bool propose = false; + rc = pending_map.cfg_set_location(id, group_key, location, propose); + if (rc == -EINVAL || rc == -EEXIST) { + err = rc; + dout (4) << "Error: GW cannot set location " << id + << " " << pool << " " << group << " rc " << rc << dendl; + sstrm.str(""); + if (rc == -EEXIST) { + sstrm.str("old location removed but exist namespaces assigned to it"); + } + } + // propose pending would be generated by the PaxosService + if (rc == 0 && propose == true) { + response = true; + } + } else if (prefix == "nvme-gw start-failback") { + std::string id, pool, group, location; + bool propose = false; + cmd_getval(cmdmap, "pool", pool); + cmd_getval(cmdmap, "group", group); + cmd_getval(cmdmap, "location", location); + auto group_key = std::make_pair(pool, group); + dout(10) << id <<" pool "<< pool << " group "<< group + <<" location "<< location << dendl; + rc = pending_map.cfg_start_inter_location_failback(group_key, + location, propose); + if (rc == -EINVAL || rc == -EEXIST) { + err = rc; + sstrm.str(""); + if (rc == -EEXIST) { + sstrm.str("command already set please wait until completed"); + } + if (rc == EINVAL) { + sstrm.str("command cannot be executed"); + } + } + if (rc == 0 && propose == true) { + response = true; + } + } getline(sstrm, rs); if (response == false) { if (err < 0 && rs.length() == 0) { rs = cpp_strerror(err); dout(10) << "Error command err : "<< err << " rs-len: " - << rs.length() << dendl; + << rs.length() << dendl; } mon.reply_command(op, err, rs, rdata, get_last_committed()); } else { wait_for_commit(op, new Monitor::C_Command(mon, op, 0, rs, - get_last_committed() + 1)); + get_last_committed() + 1)); } return response; } @@ -850,6 +925,9 @@ int NVMeofGwMon::apply_beacon(const NvmeGwId &gw_id, int gw_version, if (changed) { avail = gw_availability_t::GW_AVAILABLE; } + if (state.gw_admin_state ==gw_admin_state_t::GW_ADMIN_DISABLED) { + avail = gw_availability_t::GW_CREATED; + } if (gw_subs.size() == 0) { avail = gw_availability_t::GW_CREATED; dout(10) << "No-subsystems condition detected for GW " << gw_id < GW epoch: " << group_gws.second << " }"; } + for (auto& group_gws: value.failbacks_in_progress) { + os << "\n" << MODULE_PREFFIX << "{ " << group_gws.first + << " } -> failback-to: " << group_gws.second << " }"; + } for (auto& group_gws: value.created_gws) { os << "\n" << MODULE_PREFFIX << "{ " << group_gws.first << " } -> { " << group_gws.second << " }"; @@ -335,10 +339,12 @@ inline void decode( decode(state.gw_map_epoch, bl); decode(state.subsystems, bl); uint32_t avail; + uint64_t last_beacon_seq_number; decode(avail, bl); state.availability = (gw_availability_t)avail; if (struct_v >= 2) { - decode(state.last_beacon_seq_number, bl); + decode(last_beacon_seq_number, bl); + state.last_beacon_seq_number = last_beacon_seq_number; decode(state.last_beacon_seq_ooo, bl); } DECODE_FINISH(bl); @@ -474,6 +480,9 @@ inline void encode(const NvmeGwMonStates& gws, ceph::bufferlist &bl, if (HAVE_FEATURE(features, NVMEOFHAMAP)) { version = 3; } + if (HAVE_FEATURE(features, NVMEOF_BEACON_DIFF)) { + version = 4; + } ENCODE_START(version, version, bl); dout(20) << "encode NvmeGwMonStates. struct_v: " << (int)version << dendl; encode ((uint32_t)gws.size(), bl); // number of gws in the group @@ -526,6 +535,11 @@ inline void encode(const NvmeGwMonStates& gws, ceph::bufferlist &bl, gw.second.addr_vect.encode(bl, features); encode(gw.second.beacon_index, bl); } + if (version >= 4) { + encode((int)gw.second.gw_admin_state, bl); + dout(10) << "encode location " << gw.second.location << dendl; + encode(gw.second.location, bl); + } } ENCODE_FINISH(bl); } @@ -534,7 +548,7 @@ inline void decode( NvmeGwMonStates& gws, ceph::buffer::list::const_iterator &bl) { gws.clear(); uint32_t num_created_gws; - DECODE_START(3, bl); + DECODE_START(4, bl); dout(20) << "decode NvmeGwMonStates. struct_v: " << struct_v << dendl; decode(num_created_gws, bl); dout(20) << "decode NvmeGwMonStates. num gws " << num_created_gws << dendl; @@ -613,6 +627,14 @@ inline void decode( decode(gw_created.beacon_index, bl); dout(20) << "decoded beacon_index " << gw_created.beacon_index << dendl; } + if (struct_v >= 4) { + dout(20) << "decode admin state and location" << dendl; + int admin_state; + decode(admin_state, bl); + gw_created.gw_admin_state = (gw_admin_state_t)admin_state; + decode(gw_created.location, bl); + dout(20) << "decoded location " << gw_created.location << dendl; + } gws[gw_name] = gw_created; } @@ -661,6 +683,39 @@ inline void decode(std::map& gw_epoch, DECODE_FINISH(bl); } +inline void encode( + const std::map &failbacks_in_progress, + ceph::bufferlist &bl) { + ENCODE_START(1, 1, bl); + encode ((uint32_t)failbacks_in_progress.size(), bl); // number of groups + for (auto& group_failbacks: failbacks_in_progress) { + auto& group_key = group_failbacks.first; + encode(group_key.first, bl); // pool + encode(group_key.second, bl); // group + encode(group_failbacks.second, bl); + } + ENCODE_FINISH(bl); +} + +inline void decode( + std::map &failbacks_in_progress, + ceph::buffer::list::const_iterator &bl) { + failbacks_in_progress.clear(); + uint32_t ngroups; + DECODE_START(1, bl); + decode(ngroups, bl); + for(uint32_t i = 0; i& created_gws, ceph::bufferlist &bl, uint64_t features) { diff --git a/src/mon/NVMeofGwTypes.h b/src/mon/NVMeofGwTypes.h index cd22dcbc4fe..940107f2006 100755 --- a/src/mon/NVMeofGwTypes.h +++ b/src/mon/NVMeofGwTypes.h @@ -26,6 +26,8 @@ #include "msg/msg_types.h" using NvmeGwId = std::string; +using FailbackLocation = std::string; +using NvmeLocation = std::string; using NvmeGroupKey = std::pair; using NvmeNqnId = std::string; using NvmeAnaGrpId = uint32_t; @@ -53,6 +55,11 @@ enum class gw_availability_t { GW_DELETED }; +enum class gw_admin_state_t { + GW_ADMIN_ENABLED = 0, + GW_ADMIN_DISABLED, +}; + enum class subsystem_change_t { SUBSYSTEM_ADDED, SUBSYSTEM_CHANGED, @@ -167,6 +174,8 @@ struct NvmeGwMonState { * it from being overriden by new epochs in monitor's function create_pending - * function restore_pending_map_info is called for this purpose */ + gw_admin_state_t gw_admin_state = gw_admin_state_t::GW_ADMIN_ENABLED; + std::string location = ""; std::chrono::system_clock::time_point allow_failovers_ts = std::chrono::system_clock::now(); std::chrono::system_clock::time_point last_gw_down_ts = -- 2.47.3