From: Leonid Chernin Date: Tue, 17 Mar 2026 15:40:16 +0000 (+0200) Subject: nvmeofgw: propagate quorum feature to the NVMeofMonClient, X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=f3f8bde10c86e0c5d4f5286f8e249fc26fa3605d;p=ceph.git nvmeofgw: propagate quorum feature to the NVMeofMonClient, reverted feature bit NVMEOF_BEACON_DIFF: -NVMeofGwMon adds a quorum_features indication to the MonClient map. -MonClient initially sends beacons without applying the BEACON_DIFF logic. -MonClient begins applying the BEACON_DIFF logic only when the BEACON_DIFF bit is set in the quorum_features field of the NVMeoF monitor map. -added mon commands: nvme-gw set beacon-diff disable nvme-gw set beacon-diff enable -performed changes in encode/decode of the BEACON_DIFF feature -reverted NVMEOF_BEACON_DIFF bit Signed-off-by: Leonid Chernin --- diff --git a/src/include/ceph_features.h b/src/include/ceph_features.h index ca02b485f69c..5960b9f24aa6 100644 --- a/src/include/ceph_features.h +++ b/src/include/ceph_features.h @@ -160,7 +160,8 @@ DEFINE_CEPH_FEATURE_RETIRED(49, 1, OSD_PROXY_FEATURES, JEWEL, LUMINOUS) // overl DEFINE_CEPH_FEATURE(49, 2, SERVER_SQUID); DEFINE_CEPH_FEATURE_RETIRED(50, 1, MON_METADATA, MIMIC, OCTOPUS) DEFINE_CEPH_FEATURE(50, 2, SERVER_TENTACLE); -DEFINE_CEPH_FEATURE(51, 2, NVMEOF_BEACON_DIFF) +DEFINE_CEPH_FEATURE_RETIRED(51, 1, OSD_BITWISE_HOBJ_SORT, MIMIC, OCTOPUS) +// available DEFINE_CEPH_FEATURE_RETIRED(52, 1, OSD_PROXY_WRITE_FEATURES, MIMIC, OCTOPUS) DEFINE_CEPH_FEATURE(52, 2, SERVER_UMBRELLA); DEFINE_CEPH_FEATURE_RETIRED(53, 1, ERASURE_CODE_PLUGINS_V3, MIMIC, OCTOPUS) @@ -256,7 +257,6 @@ DEFINE_CEPH_FEATURE_RETIRED(63, 1, RESERVED_BROKEN, LUMINOUS, QUINCY) // client- CEPH_FEATUREMASK_SERVER_REEF | \ CEPH_FEATUREMASK_SERVER_SQUID | \ CEPH_FEATUREMASK_SERVER_TENTACLE | \ - CEPH_FEATUREMASK_NVMEOF_BEACON_DIFF | \ CEPH_FEATUREMASK_SERVER_UMBRELLA | \ 0ULL) diff --git a/src/messages/MNVMeofGwBeacon.h b/src/messages/MNVMeofGwBeacon.h index 239410f49da5..4d59a734f4ea 100644 --- a/src/messages/MNVMeofGwBeacon.h +++ b/src/messages/MNVMeofGwBeacon.h @@ -60,8 +60,7 @@ public: features ? BEACON_VERSION_ENHANCED : BEACON_VERSION_LEGACY}, gw_id(gw_id_), gw_pool(gw_pool_), gw_group(gw_group_), subsystems(subsystems_), availability(availability_), last_osd_epoch(last_osd_epoch_), - last_gwmap_epoch(last_gwmap_epoch_), sequence(sequence_), - affected_features(features) + last_gwmap_epoch(last_gwmap_epoch_), sequence(sequence_) { set_priority(CEPH_MSG_PRIO_HIGH); } @@ -100,13 +99,14 @@ public: encode(gw_id, payload); encode(gw_pool, payload); encode(gw_group, payload); - encode(subsystems, payload, affected_features); + encode(subsystems, payload); encode((uint32_t)availability, payload); encode(last_osd_epoch, payload); encode(last_gwmap_epoch, payload); // Only encode sequence for enhanced beacons (version >= 2) if (get_header().version >= 2) { encode(sequence, payload); + encode_beacon_change_descriptors(subsystems, payload); } } @@ -127,6 +127,7 @@ public: // Only decode sequence for enhanced beacons (version >= 2) if (get_header().version >= 2 && !p.end()) { decode(sequence, p); + decode_beacon_change_descriptors(subsystems, p); } else { sequence = 0; // Legacy beacons don't have sequence field } diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h index ed1523f652f6..03d1d8699f7c 100644 --- a/src/mon/MonCommands.h +++ b/src/mon/MonCommands.h @@ -1495,6 +1495,11 @@ COMMAND("nvme-gw disaster-clear" " set location to clear Disaster state - failbacks allowed for recovered location", "mgr", "rw") +COMMAND("nvme-gw set" + " name=var,type=CephChoices,strings=beacon-diff" + " name=val,type=CephString ", + "config nvme-gw", + "mgr", "rw") // these are tell commands that were implemented as CLI commands in // the broken pre-octopus way that we want to allow to work when a diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index 8224d15f49d7..c4a5d7211aa7 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -2648,7 +2648,6 @@ void Monitor::apply_monmap_to_compatset_features() ceph_assert(ceph::features::mon::get_persistent().contains_all( ceph::features::mon::FEATURE_NVMEOF_BEACON_DIFF)); // this feature should only ever be set if the quorum supports it. - ceph_assert(HAVE_FEATURE(quorum_con_features, NVMEOF_BEACON_DIFF)); new_features.incompat.insert(CEPH_MON_FEATURE_INCOMPAT_NVMEOF_BEACON_DIFF); } @@ -2718,12 +2717,6 @@ void Monitor::calc_quorum_requirements() CEPH_FEATUREMASK_CEPHX_V2; } - // Release-independent features - if (monmap->get_required_features().contains_all( - ceph::features::mon::FEATURE_NVMEOF_BEACON_DIFF)) { - required_features |= CEPH_FEATUREMASK_NVMEOF_BEACON_DIFF; - } - dout(10) << __func__ << " required_features " << required_features << dendl; } diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc index f37083ecd4e6..6cf45bcd7bfd 100644 --- a/src/mon/NVMeofGwMap.cc +++ b/src/mon/NVMeofGwMap.cc @@ -19,6 +19,7 @@ #include "Monitor.h" #include "OSDMonitor.h" #include "mon/health_check.h" +#include "messages/MNVMeofGwBeacon.h" using std::list; using std::map; @@ -50,7 +51,7 @@ void NVMeofGwMap::to_gmap( auto gw_state = NvmeGwClientState( gw_created.ana_grp_id, epoch, availability, gw_created.beacon_sequence, - gw_created.beacon_sequence_ooo); + gw_created.beacon_sequence_ooo, published_features); for (const auto& sub: gw_created.subsystems) { gw_state.subsystems.insert({ sub.nqn, @@ -306,7 +307,7 @@ int NVMeofGwMap::cfg_set_location(const NvmeGwId &gw_id, const NvmeGroupKey& group_key, std::string &location, bool &propose_pending) { - if (!HAVE_FEATURE(mon->get_quorum_con_features(), NVMEOF_BEACON_DIFF)) { + if (!mon->get_quorum_mon_features().contains_all(ceph::features::mon::FEATURE_NVMEOF_BEACON_DIFF)) { dout(4) << "Command is not allowed - feature is not installed" << group_key << " " << gw_id << dendl; return -EINVAL; @@ -415,10 +416,10 @@ int NVMeofGwMap::cfg_location_disaster_set( const NvmeGroupKey& group_key, std::string &location, bool &propose_pending) { - if (!HAVE_FEATURE(mon->get_quorum_con_features(), NVMEOF_BEACON_DIFF)) { + if (!mon->get_quorum_mon_features().contains_all(ceph::features::mon::FEATURE_NVMEOF_BEACON_DIFF)) { dout(4) << "Command is not allowed - feature is not installed" << group_key << dendl; - return -EINVAL; + return -EOPNOTSUPP; } bool cleanup_in_process = false; bool location_exists = false; @@ -457,10 +458,10 @@ int NVMeofGwMap::cfg_location_disaster_clear( const NvmeGroupKey& group_key, std::string &location, bool &propose_pending) { - if (!HAVE_FEATURE(mon->get_quorum_con_features(), NVMEOF_BEACON_DIFF)) { + if (!mon->get_quorum_mon_features().contains_all(ceph::features::mon::FEATURE_NVMEOF_BEACON_DIFF)) { dout(4) << "Command is not allowed - feature is not installed" << group_key << dendl; - return -EINVAL; + return -EOPNOTSUPP; } auto& gws_states = created_gws[group_key]; bool accept = false; @@ -502,6 +503,30 @@ int NVMeofGwMap::cfg_location_disaster_clear( } } +int NVMeofGwMap::cfg_enable_disable_beacon_diff(bool enable, + bool &propose_pending) +{ + int rc = 0; + if (enable && ((published_features & FLAG_BEACONDIFF) == 0)) { + if (!mon->get_quorum_mon_features().contains_all(ceph::features::mon::FEATURE_NVMEOF_BEACON_DIFF)) { + dout(1) << "beacon-diff not supported by a quorum of monitors" << dendl; + rc = -EOPNOTSUPP; + } else { + published_features ^= FLAG_BEACONDIFF; + dout (10) << "enabled beacon-diff" << dendl; + propose_pending = true; + } + } else if (!enable && (published_features & FLAG_BEACONDIFF)) { + published_features ^= FLAG_BEACONDIFF; + dout (10) << "disabled beacon-diff" << dendl; + propose_pending = true; + } else { + dout (10) << "no change" << dendl; + /* allow idempotency */ + } + return rc; +} + void NVMeofGwMap::gw_performed_startup(const NvmeGwId &gw_id, const NvmeGroupKey& group_key, bool &propose_pending) { @@ -782,7 +807,7 @@ void NVMeofGwMap::check_relocate_ana_groups(const NvmeGroupKey& group_key, * for ana-grp in list make relocation. * if all ana-grps in location active remove location from the map disaster_locations.group */ - if (!HAVE_FEATURE(mon->get_quorum_con_features(), NVMEOF_BEACON_DIFF)) { + if (!mon->get_quorum_mon_features().contains_all(ceph::features::mon::FEATURE_NVMEOF_BEACON_DIFF)) { dout(4) << "relocate is not allowed - feature is not installed" << group_key << dendl; return ; @@ -1590,9 +1615,7 @@ bool NVMeofGwMap::put_gw_beacon_sequence_number(const NvmeGwId &gw_id, { bool rc = true; NvmeGwMonState& gw_map = created_gws[group_key][gw_id]; - - if (HAVE_FEATURE(mon->get_quorum_con_features(), NVMEOF_BEACON_DIFF) || - (gw_version > 0) ) { + if (gw_version > BEACON_VERSION_LEGACY) { uint64_t seq_number = gw_map.beacon_sequence; if ((beacon_sequence != seq_number+1) && !(beacon_sequence == 0 && seq_number == 0 )) {// new GW startup @@ -1613,8 +1636,7 @@ bool NVMeofGwMap::set_gw_beacon_sequence_number(const NvmeGwId &gw_id, int gw_version, const NvmeGroupKey& group_key, uint64_t beacon_sequence) { NvmeGwMonState& gw_map = created_gws[group_key][gw_id]; - if (HAVE_FEATURE(mon->get_quorum_con_features(), NVMEOF_BEACON_DIFF) || - (gw_version > 0)) { + if (gw_version > BEACON_VERSION_LEGACY) { gw_map.beacon_sequence = beacon_sequence; gw_map.beacon_sequence_ooo = false; dout(10) << gw_id << " set beacon_sequence " << beacon_sequence << dendl; diff --git a/src/mon/NVMeofGwMap.h b/src/mon/NVMeofGwMap.h index 4fbb26a3c60c..60d44796f252 100644 --- a/src/mon/NVMeofGwMap.h +++ b/src/mon/NVMeofGwMap.h @@ -35,6 +35,14 @@ using ceph::coarse_mono_clock; #define dout_prefix *_dout << MODULE_PREFFIX << __PRETTY_FUNCTION__ << " " class health_check_map_t; +inline void encode_gws_beacon_diff_additions( + const std::map& created_gws, + ceph::bufferlist &bl, uint64_t features); + +inline void decode_gws_beacon_diff_additions( + std::map& created_gws, + ceph::buffer::list::const_iterator &bl); + class Monitor; /*-------------------*/ class NVMeofGwMap @@ -45,6 +53,9 @@ public: // epoch is for Paxos synchronization mechanizm epoch_t epoch = 0; bool delay_propose = false; + uint64_t published_features = 0; + static constexpr uint64_t FLAG_BEACONDIFF = (1<<0); + uint64_t ever_enabled_features = 0; std::map created_gws; @@ -94,6 +105,7 @@ public: std::string &location, bool &propose_pending); int cfg_location_disaster_clear(const NvmeGroupKey& group_key, std::string &location, bool &propose_pending); + int cfg_enable_disable_beacon_diff(bool enable, bool &propose_pending); void process_gw_map_ka( const NvmeGwId &gw_id, const NvmeGroupKey& group_key, epoch_t& last_osd_epoch, bool &propose_pending); @@ -193,24 +205,18 @@ public: void encode(ceph::buffer::list &bl, uint64_t features) const { using ceph::encode; - uint8_t version = 1; - if (HAVE_FEATURE(features, NVMEOFHAMAP)) { - version = 2; - } - if (HAVE_FEATURE(features, NVMEOF_BEACON_DIFF)) { - version = 3; - } - ENCODE_START(version, version, bl); + static const uint8_t version = 3; + static const uint8_t cversion = 1; + ENCODE_START(version, cversion, bl); encode(epoch, bl);// global map epoch encode(created_gws, bl, features); //Encode created GWs encode(fsm_timers, bl, features); - if (version >= 2) { - encode(gw_epoch, bl); - } - if (version >=3) { - encode(disaster_locations, bl); - } + encode(gw_epoch, bl); + encode_gws_beacon_diff_additions(created_gws, bl, features); + encode(disaster_locations, bl); + encode(ever_enabled_features, bl); + encode(published_features, bl); ENCODE_FINISH(bl); } @@ -229,8 +235,11 @@ public: dout(20) << "decode gw epoch " << dendl; } if (struct_v >=3) { + decode_gws_beacon_diff_additions(created_gws, bl); decode(disaster_locations, bl); - dout(20) << "decode disaster location " << dendl; + decode(ever_enabled_features, bl); + decode(published_features, bl); + dout(20) << " decoded features " << published_features << dendl; } DECODE_FINISH(bl); } diff --git a/src/mon/NVMeofGwMon.cc b/src/mon/NVMeofGwMon.cc index 7efc4ae7e66c..5cd9089caba5 100644 --- a/src/mon/NVMeofGwMon.cc +++ b/src/mon/NVMeofGwMon.cc @@ -160,7 +160,19 @@ void NVMeofGwMon::tick() pending_map.track_deleting_gws(group_key, *subsystems, propose); _propose_pending |= propose; } - // Periodic: take care of not handled ANA groups + + if (mon.get_quorum_mon_features().contains_all(ceph::features::mon::FEATURE_NVMEOF_BEACON_DIFF)) { + /* only automatically upgrade once: */ + if ((pending_map.ever_enabled_features & NVMeofGwMap::FLAG_BEACONDIFF) == 0) { + pending_map.ever_enabled_features |= NVMeofGwMap::FLAG_BEACONDIFF; + pending_map.published_features |= NVMeofGwMap::FLAG_BEACONDIFF; + dout(4) << " Updating map to enable beacon-diff, features " + << pending_map.published_features << dendl; + _propose_pending = true; + } + } + + // Periodically: take care of not handled ANA groups pending_map.handle_abandoned_ana_groups(propose); _propose_pending |= propose; @@ -255,7 +267,8 @@ void NVMeofGwMon::encode_pending(MonitorDBStore::TransactionRef t) pending_map.encode(bl, features); dout(10) << " has NVMEOFHA: " << HAVE_FEATURE(features, NVMEOFHA) << " has NVMEOFHAMAP: " << HAVE_FEATURE(features, NVMEOFHAMAP) - << " has BEACON_DIFF: " << HAVE_FEATURE(features, NVMEOF_BEACON_DIFF) << dendl; + << " has BEACON_DIFF: " + << mon.get_quorum_mon_features().contains_all(ceph::features::mon::FEATURE_NVMEOF_BEACON_DIFF) << dendl; put_version(t, pending_map.epoch, bl); put_last_committed(t, pending_map.epoch); @@ -268,8 +281,8 @@ void NVMeofGwMon::encode_pending(MonitorDBStore::TransactionRef t) void NVMeofGwMon::update_from_paxos(bool *need_bootstrap) { version_t version = get_last_committed(); - uint64_t features = mon.get_quorum_con_features(); - dout(20) << " has BEACON_DIFF: " << HAVE_FEATURE(features, NVMEOF_BEACON_DIFF) + dout(20) << " has BEACON_DIFF: " + << mon.get_quorum_mon_features().contains_all(ceph::features::mon::FEATURE_NVMEOF_BEACON_DIFF) << dendl; if (version != map.epoch) { dout(10) << " NVMeGW loading version " << version @@ -347,6 +360,7 @@ void NVMeofGwMon::check_sub(Subscription *sub) = map.created_gws[group_key][gw_id]; // respond with a map slice correspondent to the same GW unicast_map.epoch = map.gw_epoch[group_key];//map.epoch; + unicast_map.published_features = map.published_features; sub->session->con->send_message2(make_message(unicast_map)); if (sub->onetime) { mon.session_map.remove_sub(sub); @@ -462,6 +476,8 @@ bool NVMeofGwMon::preprocess_command(MonOpRequestRef op) f->dump_unsigned("epoch", map.epoch); f->dump_string("pool", pool); f->dump_string("group", group); + f->dump_bool("beacon_diff_enabled", + map.published_features & NVMeofGwMap::FLAG_BEACONDIFF); if (HAVE_FEATURE(mon.get_quorum_con_features(), NVMEOFHA)) { f->dump_string("features", "LB"); if (map.created_gws[group_key].size()) { @@ -751,14 +767,15 @@ bool NVMeofGwMon::prepare_command(MonOpRequestRef op) <<" location "<< location << dendl; rc = pending_map.cfg_location_disaster_set(group_key, location, propose); - if (rc == -EINVAL || rc == -EEXIST) { + if (rc == -EINVAL || rc == -EEXIST || rc == -EOPNOTSUPP) { err = rc; sstrm.str(""); if (rc == -EEXIST) { sstrm.str("command already set please wait until completed"); - } - if (rc == -EINVAL) { + } else if (rc == -EINVAL) { sstrm.str("command cannot be executed"); + } else if (rc == -EOPNOTSUPP) { + sstrm.str("command not supported"); } } if (rc == 0 && propose == true) { @@ -775,19 +792,49 @@ bool NVMeofGwMon::prepare_command(MonOpRequestRef op) <<" location "<< location << dendl; rc = pending_map.cfg_location_disaster_clear(group_key, location, propose); - if (rc == -EINVAL || rc == -EEXIST) { + if (rc == -EINVAL || rc == -EEXIST || rc == -EOPNOTSUPP) { err = rc; sstrm.str(""); if (rc == -EEXIST) { sstrm.str("command already set please wait until completed"); - } - if (rc == -EINVAL) { + } else if (rc == -EINVAL) { sstrm.str("command cannot be executed"); + } else if (rc == -EOPNOTSUPP) { + sstrm.str("command not supported"); } } if (rc == 0 && propose == true) { response = true; } + } else if (prefix == "nvme-gw set") { + std::string choice, value; + cmd_getval(cmdmap, "var", choice); + cmd_getval(cmdmap, "val", value); + if (choice == "beacon-diff") { + bool propose = false; + dout(10) << "Command "<< prefix << " " << choice + << " " << value << dendl; + if (value != "enable" && value != "disable") { + rc = -EINVAL; + sstrm.str("command not permitted - illegal value"); + } else { + bool command = (value == "enable") ? true: false; + rc = pending_map.cfg_enable_disable_beacon_diff(command, propose); + if (rc == -EOPNOTSUPP) { + err = rc; + sstrm.str(""); + if (rc == -EOPNOTSUPP) { + sstrm.str("command not supported"); + } + } + if (rc == 0 && propose == true) { + response = true; + } + } + } else { + rc = -EPERM; + sstrm.str("command not permitted - illegal choice"); + } } getline(sstrm, rs); if (response == false) { @@ -863,6 +910,7 @@ void NVMeofGwMon::do_send_map_ack(MonOpRequestRef op, pending_gw_map : map.created_gws[group_key][gw_id]; ack_map.created_gws[group_key][gw_id].beacon_sequence = pending_gw_map.beacon_sequence; + ack_map.published_features = pending_map.published_features; if (!is_correct_sequence) { dout(4) << " GW " << gw_id << " sending ACK due to receiving beacon_sequence out of order" << dendl; @@ -879,7 +927,8 @@ void NVMeofGwMon::do_send_map_ack(MonOpRequestRef op, if (!gw_created) dout(10) << "gw not created, ack map " << ack_map << " epoch " << ack_map.epoch << dendl; - dout(20) << "ack_map " << ack_map <(ack_map); mon.send_reply(op, msg.detach()); } @@ -898,7 +947,7 @@ void NVMeofGwMon::do_send_map_ack(MonOpRequestRef op, * if descriptor is DELETED do the following * look for subs nqn in the gw.subs list and if found - delete */ -int NVMeofGwMon::apply_beacon(const NvmeGwId &gw_id, int gw_version, +int NVMeofGwMon::apply_beacon(const NvmeGwId &gw_id, int affected_version, const NvmeGroupKey& group_key, void *msg, const BeaconSubsystems& sub, gw_availability_t &avail, bool &propose_pending) @@ -909,8 +958,7 @@ int NVMeofGwMon::apply_beacon(const NvmeGwId &gw_id, int gw_version, pending_map.created_gws[group_key][gw_id].subsystems; auto &state = pending_map.created_gws[group_key][gw_id]; - if (!HAVE_FEATURE(mon.get_quorum_con_features(), NVMEOF_BEACON_DIFF) && - gw_version == 0) { + if (affected_version == BEACON_VERSION_LEGACY) { if (gw_subs != sub) { dout(10) << "BEACON_DIFF logic not applied." "subsystems of GW changed, propose pending " << gw_id << dendl; @@ -920,6 +968,12 @@ int NVMeofGwMon::apply_beacon(const NvmeGwId &gw_id, int gw_version, changed = true; } } else { + if (!state.nonce_map.empty()) { + dout(4) << "Erase nonce map when Beacon-diff feature enabled for GW " + << gw_id << dendl; + state.nonce_map.clear();// no need anymore,used just in compatibility mode + propose_pending = true; + } if (state.beacon_sequence_ooo) { dout(10) << "Good sequence after out of order detection " "sequence "<< state.beacon_sequence << " " << gw_id << dendl; @@ -999,8 +1053,10 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op) auto m = op->get_req(); uint64_t sequence = m->get_sequence(); int version = m->version; + uint16_t header_ver = m->get_header().version; dout(10) << "availability " << m->get_availability() - << " sequence " << sequence << " version " << version + << " sequence " << sequence << " version " << version + << " header version " << header_ver << " GW : " << m->get_gw_id() << " osdmap_epoch " << m->get_last_osd_epoch() << " subsystems " << m->get_subsystems().size() << dendl; @@ -1053,7 +1109,7 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op) goto false_return; // not sending ack to this beacon } pending_map.created_gws[group_key][gw_id].subsystems.clear(); - pending_map.set_gw_beacon_sequence_number(gw_id, version, + pending_map.set_gw_beacon_sequence_number(gw_id, header_ver, group_key, sequence); dout(4) << "GW beacon: Created state - full startup done " << gw_id << " GW state in monitor data-base : " @@ -1084,7 +1140,7 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op) // if GW reports Avail/Unavail but in monitor's database it is Unavailable if (gw_exists) { correct_sequence = pending_map.put_gw_beacon_sequence_number - (gw_id, version, group_key, sequence, stored_sequence); + (gw_id, header_ver, group_key, sequence, stored_sequence); // it means it did not perform "exit" after failover was set by // NVMeofGWMon if ((pending_map.created_gws[group_key][gw_id].availability == @@ -1139,7 +1195,7 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op) pending_map.set_addr_vect(gw_id, group_key, con->get_peer_addr()); gw_propose = true; } - if (apply_beacon(gw_id, version, group_key, (void *)m, sub, avail, propose) !=0) { + if (apply_beacon(gw_id, header_ver, group_key, (void *)m, sub, avail, propose) !=0) { nonce_propose = true; dout(10) << "subsystem(subs/listener/nonce/NM) of GW changed, propose pending " << gw_id << " available " << avail << dendl; diff --git a/src/mon/NVMeofGwSerialize.h b/src/mon/NVMeofGwSerialize.h index 89b9bc5880e9..9e4f9a1b21f5 100644 --- a/src/mon/NVMeofGwSerialize.h +++ b/src/mon/NVMeofGwSerialize.h @@ -145,6 +145,7 @@ inline std::ostream& operator<<( << " availablilty " << value.availability << " sequence " << value.last_beacon_seq_number << " sequence-ooo " << value.last_beacon_seq_ooo + << " map_features " << value.map_features << " GwSubsystems: [ "; for (const auto& sub: value.subsystems) { os << sub.second << " "; @@ -258,7 +259,7 @@ inline std::ostream& operator<<(std::ostream& os, const LocationStates value) { inline std::ostream& operator<<(std::ostream& os, const NVMeofGwMap value) { os << "\n" << MODULE_PREFFIX << "== NVMeofGwMap [ Created_gws: epoch " - << value.epoch; + << value.epoch << " features " << value.published_features; for (auto& group_gws: value.gw_epoch) { os << "\n" << MODULE_PREFFIX << "{ " << group_gws.first << " } -> GW epoch: " << group_gws.second << " }"; @@ -274,6 +275,10 @@ inline std::ostream& operator<<(std::ostream& os, const NVMeofGwMap value) { return os; } +inline void encode_beacon_change_descriptors(const BeaconSubsystems& sub, ceph::bufferlist &bl); +inline void decode_beacon_change_descriptors(BeaconSubsystems& subs, ceph::buffer::list::const_iterator &bl); + + inline void encode(const ana_state_t& st, ceph::bufferlist &bl) { ENCODE_START(1, 1, bl); encode((uint32_t)st.size(), bl); @@ -344,19 +349,16 @@ inline void decode( } inline void encode(const NvmeGwClientState& state, ceph::bufferlist &bl, uint64_t features) { - uint8_t version = 1; - if (HAVE_FEATURE(features, NVMEOF_BEACON_DIFF)) { - version = 2; - } - ENCODE_START(version, version, bl); + static const uint8_t version = 2; + static const uint8_t cversion = 1; + ENCODE_START(version, cversion, bl); encode(state.group_id, bl); encode(state.gw_map_epoch, bl); encode (state.subsystems, bl, features); encode((uint32_t)state.availability, bl); - if (version >= 2) { - encode((uint64_t)state.last_beacon_seq_number, bl); - encode(state.last_beacon_seq_ooo, bl); - } + encode((uint64_t)state.last_beacon_seq_number, bl); + encode((uint8_t)state.last_beacon_seq_ooo, bl); + encode((uint64_t)state.map_features, bl); ENCODE_FINISH(bl); } @@ -373,7 +375,10 @@ inline void decode( if (struct_v >= 2) { decode(last_beacon_seq_number, bl); state.last_beacon_seq_number = last_beacon_seq_number; - decode(state.last_beacon_seq_ooo, bl); + uint8_t last_beacon_seq_ooo; + decode(last_beacon_seq_ooo, bl); + state.last_beacon_seq_ooo = (bool)last_beacon_seq_ooo; + decode(state.map_features, bl); } DECODE_FINISH(bl); } @@ -508,9 +513,6 @@ inline void encode(const NvmeGwMonStates& gws, ceph::bufferlist &bl, if (HAVE_FEATURE(features, NVMEOFHAMAP)) { version = 3; } - if (HAVE_FEATURE(features, NVMEOF_BEACON_DIFF)) { - version = 4; - } ENCODE_START(version, version, bl); dout(20) << "encode NvmeGwMonStates. struct_v: " << (int)version << dendl; encode ((uint32_t)gws.size(), bl); // number of gws in the group @@ -529,7 +531,7 @@ inline void encode(const NvmeGwMonStates& gws, ceph::bufferlist &bl, encode((uint16_t)gw.second.last_gw_map_epoch_valid, bl); dout(20) << "encode availability " << gw.second.availability << " startup " << (int)gw.second.performed_full_startup << dendl; - encode(gw.second.subsystems, bl, features); + encode(gw.second.subsystems, bl); encode((uint32_t)gw.second.blocklist_data.size(), bl); for (auto &blklst_itr: gw.second.blocklist_data) { @@ -546,7 +548,7 @@ inline void encode(const NvmeGwMonStates& gws, ceph::bufferlist &bl, encode((uint32_t)gw.second.availability, bl); encode((uint16_t)gw.second.performed_full_startup, bl); encode((uint16_t)gw.second.last_gw_map_epoch_valid, bl); - encode(gw.second.subsystems, bl, features); + encode(gw.second.subsystems, bl); Blocklist_data bl_data[MAX_SUPPORTED_ANA_GROUPS]; for (auto &blklst_itr: gw.second.blocklist_data) { bl_data[blklst_itr.first].osd_epoch = blklst_itr.second.osd_epoch; @@ -563,11 +565,6 @@ inline void encode(const NvmeGwMonStates& gws, ceph::bufferlist &bl, gw.second.addr_vect.encode(bl, features); encode(gw.second.beacon_index, bl); } - if (version >= 4) { - encode((int)gw.second.gw_admin_state, bl); - dout(10) << "encode location " << gw.second.location << dendl; - encode(gw.second.location, bl); - } } ENCODE_FINISH(bl); } @@ -655,15 +652,6 @@ inline void decode( decode(gw_created.beacon_index, bl); dout(20) << "decoded beacon_index " << gw_created.beacon_index << dendl; } - if (struct_v >= 4) { - dout(20) << "decode admin state and location" << dendl; - int admin_state; - decode(admin_state, bl); - gw_created.gw_admin_state = (gw_admin_state_t)admin_state; - decode(gw_created.location, bl); - dout(20) << "decoded location " << gw_created.location << dendl; - } - gws[gw_name] = gw_created; } if (struct_v == 1) { //Fix allocations of states and blocklist_data @@ -736,6 +724,41 @@ inline void encode( ENCODE_FINISH(bl); } +inline void encode_gws_beacon_diff_additions( + const std::map& created_gws, + ceph::bufferlist &bl, uint64_t features) { + ENCODE_START(1, 1, bl); + for (auto& group_gws: created_gws) { + auto& gws = group_gws.second; + for (auto& gw : gws) { + encode((uint8_t)gw.second.gw_admin_state, bl); + dout(10) << "encode location " << gw.second.location << " admin state " << (int)gw.second.gw_admin_state << dendl; + encode(gw.second.location, bl); + encode_beacon_change_descriptors(gw.second.subsystems, bl); + } + } + ENCODE_FINISH(bl); +} + +inline void decode_gws_beacon_diff_additions( + std::map& created_gws, + ceph::buffer::list::const_iterator &bl) { + DECODE_START(1, bl); + for (auto& group_gws: created_gws) { + auto& gws = group_gws.second; + for (auto& gw : gws) { + dout(20) << "decode admin state and location" << dendl; + uint8_t admin_state; + decode(admin_state, bl); + gw.second.gw_admin_state = (gw_admin_state_t)admin_state; + decode(gw.second.location, bl); + dout(20) << "decoded location " << gw.second.location << " admin state " << (int)gw.second.gw_admin_state << dendl; + decode_beacon_change_descriptors(gw.second.subsystems, bl); + } + } + DECODE_FINISH(bl); +} + inline void decode( std::map& created_gws, ceph::buffer::list::const_iterator &bl) { @@ -905,15 +928,12 @@ inline void decode(BeaconListener& ls, ceph::buffer::list::const_iterator &bl) { DECODE_FINISH(bl); } -inline void encode(const BeaconSubsystem& sub, ceph::bufferlist &bl, uint64_t features) { +inline void encode(const BeaconSubsystem& sub, ceph::bufferlist &bl) { uint8_t version = 1; - if (HAVE_FEATURE(features, NVMEOF_BEACON_DIFF)) { - version = 2; - } ENCODE_START(version, version, bl); encode(sub.nqn, bl); - dout(20) << "encode BeaconSubsystems " << sub.nqn <<" features " << features + dout(20) << "encode BeaconSubsystems " << sub.nqn << " version " << (int)version << dendl; encode((uint32_t)sub.listeners.size(), bl); for (const auto& ls: sub.listeners) @@ -921,15 +941,33 @@ inline void encode(const BeaconSubsystem& sub, ceph::bufferlist &bl, uint64_t f encode((uint32_t)sub.namespaces.size(), bl); for (const auto& ns: sub.namespaces) encode(ns, bl); - if (version >= 2) { - encode((uint32_t)sub.change_descriptor, bl); - dout(20) << "encode BeaconSubsystems change-descr: " << (uint32_t)sub.change_descriptor << dendl; + ENCODE_FINISH(bl); +} + + + +inline void encode_beacon_change_descriptors(const BeaconSubsystems& subs, ceph::bufferlist &bl) { + ENCODE_START(1, 1, bl); + for (auto &sub_it:subs) { + encode((uint32_t)sub_it.change_descriptor, bl); + dout(20) << "encode change_descriptor " << (uint32_t)sub_it.change_descriptor << dendl; } ENCODE_FINISH(bl); } +inline void decode_beacon_change_descriptors(BeaconSubsystems& subs, ceph::buffer::list::const_iterator &bl) { + DECODE_START(1, bl); + for (auto &sub_it:subs) { + uint32_t change_desc; + decode(change_desc, bl); + sub_it.change_descriptor = static_cast(change_desc); + dout(20) << "decode change_descriptor " << change_desc << dendl; + } + DECODE_FINISH(bl); +} + inline void decode(BeaconSubsystem& sub, ceph::buffer::list::const_iterator &bl) { - DECODE_START(2, bl); + DECODE_START(1, bl); decode(sub.nqn, bl); dout(20) << "decode BeaconSubsystems " << sub.nqn << dendl; uint32_t s; @@ -949,12 +987,6 @@ inline void decode(BeaconSubsystem& sub, ceph::buffer::list::const_iterator &bl) decode(ns, bl); sub.namespaces.push_back(ns); } - if (struct_v >= 2) { - uint32_t change_desc; - decode(change_desc, bl); - sub.change_descriptor = static_cast(change_desc); - dout(20) << "decode BeaconSubsystems version >= " << 2 << dendl; - } DECODE_FINISH(bl); } diff --git a/src/mon/NVMeofGwTypes.h b/src/mon/NVMeofGwTypes.h index a2f2c559fc2e..01f9e2e33e78 100644 --- a/src/mon/NVMeofGwTypes.h +++ b/src/mon/NVMeofGwTypes.h @@ -260,14 +260,16 @@ struct NvmeGwClientState { gw_availability_t availability; uint64_t last_beacon_seq_number; bool last_beacon_seq_ooo; //out of order sequence + uint64_t map_features; // last map features NvmeGwClientState(NvmeAnaGrpId id, epoch_t epoch, gw_availability_t available, - uint64_t sequence, bool sequence_ooo) + uint64_t sequence, bool sequence_ooo, uint64_t last_published_features) : group_id(id), gw_map_epoch(epoch), availability(available), - last_beacon_seq_number(sequence), last_beacon_seq_ooo(sequence_ooo) {} + last_beacon_seq_number(sequence), last_beacon_seq_ooo(sequence_ooo), + map_features(last_published_features) {} NvmeGwClientState() : NvmeGwClientState( - REDUNDANT_GW_ANA_GROUP_ID, 0, gw_availability_t::GW_UNAVAILABLE, 0, 0) {} + REDUNDANT_GW_ANA_GROUP_ID, 0, gw_availability_t::GW_UNAVAILABLE, 0, 0, 0) {} }; struct Tmdata { diff --git a/src/nvmeof/NVMeofGwMonitorClient.cc b/src/nvmeof/NVMeofGwMonitorClient.cc index 3cd31a389696..b90f9d509905 100644 --- a/src/nvmeof/NVMeofGwMonitorClient.cc +++ b/src/nvmeof/NVMeofGwMonitorClient.cc @@ -269,7 +269,7 @@ void NVMeofGwMonitorClient::send_beacon() gwmap_epoch, beacon_sequence, // Pass affected features to the constructor - cluster_beacon_diff_included ? CEPH_FEATUREMASK_NVMEOF_BEACON_DIFF : 0 + cluster_beacon_diff_included ? 1 : 0 ); dout(10) << "sending beacon with diff support: " << (cluster_beacon_diff_included ? "enabled" : "disabled") << dendl; @@ -413,6 +413,13 @@ void NVMeofGwMonitorClient::handle_nvmeof_gw_map(ceph::ref_t nmap) // ensure that the gateway state has not vanished ceph_assert(got_new_gw_state || !got_old_gw_state); + uint64_t old_cluster_beacon_diff_included = cluster_beacon_diff_included; + cluster_beacon_diff_included = (new_gw_state.map_features & NVMeofGwMap::FLAG_BEACONDIFF) != 0; + if (old_cluster_beacon_diff_included != cluster_beacon_diff_included) { + dout(0) << fmt::format("Updated cluster features: 0x{:x}", cluster_beacon_diff_included) + << dendl; + } + // Check if the last_beacon_seq_number in the received map doesn't match our last sent beacon_sequence // beacon_sequence is incremented after sending, so we compare with (beacon_sequence - 1) { @@ -552,12 +559,6 @@ Dispatcher::dispatch_result_t NVMeofGwMonitorClient::ms_dispatch2(const ref_tget_type() << dendl; - // print connection features for all incoming messages and update cluster features - if (m->get_connection()) { - cluster_beacon_diff_included = monc.get_monmap_required_features().contains_all(ceph::features::mon::FEATURE_NVMEOF_BEACON_DIFF); - dout(10) << fmt::format("Updated cluster features: 0x{:x}", cluster_beacon_diff_included) << dendl; - } - if (m->get_type() == MSG_MNVMEOF_GW_MAP) { handle_nvmeof_gw_map(ref_cast(m)); return Dispatcher::HANDLED(); diff --git a/src/test/test_nvmeof_mon_encoding.cc b/src/test/test_nvmeof_mon_encoding.cc index 9d936b8021d8..50c36efcac4d 100644 --- a/src/test/test_nvmeof_mon_encoding.cc +++ b/src/test/test_nvmeof_mon_encoding.cc @@ -76,7 +76,7 @@ void test_MNVMeofGwMap() { std::string pool = "pool1"; std::string group = "grp1"; std::string gw_id = "GW1"; - NvmeGwClientState state(1, 32, gw_availability_t::GW_UNAVAILABLE, 0, false); + NvmeGwClientState state(1, 32, gw_availability_t::GW_UNAVAILABLE, 0, false, 0); std::string nqn = "nqn"; ana_state_t ana_state; NqnState nqn_state(nqn, ana_state); @@ -229,7 +229,7 @@ void test_subsystem_change_descriptors() { BeaconSubsystems subs = { sub1, sub2, sub3 }; // Encode and decode ceph::buffer::list bl; - encode(subs, bl, CEPH_FEATURES_ALL); + encode(subs, bl); auto p = bl.cbegin(); BeaconSubsystems decoded_subs; decode(decoded_subs, p);