From: Leonid Chernin Date: Tue, 17 Mar 2026 15:40:16 +0000 (+0200) Subject: nvmeofgw: propagate quorum feature to the NVMeofMonClient, X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=9c03feade9fff097a3995dbabbd246b4265b60aa;p=ceph.git nvmeofgw: propagate quorum feature to the NVMeofMonClient, reverted feature bit NVMEOF_BEACON_DIFF: -NVMeofGwMon adds a quorum_features indication to the MonClient map. -MonClient initially sends beacons without applying the BEACON_DIFF logic. -MonClient begins applying the BEACON_DIFF logic only when the BEACON_DIFF bit is set in the quorum_features field of the NVMeoF monitor map. -added mon commands: nvme-gw set beacon-diff disable nvme-gw set beacon-diff enable -performed changes in encode/decode of the BEACON_DIFF feature -reverted NVMEOF_BEACON_DIFF bit Signed-off-by: Leonid Chernin (cherry picked from commit f3f8bde10c86e0c5d4f5286f8e249fc26fa3605d) Conflicts: ../src/include/ceph_features.h conflict because UMBRELLA feature was in main and should not be in tentacle --- diff --git a/src/include/ceph_features.h b/src/include/ceph_features.h index 5a607ef4b5a4..5d8b38fc0a1d 100644 --- a/src/include/ceph_features.h +++ b/src/include/ceph_features.h @@ -161,7 +161,7 @@ DEFINE_CEPH_FEATURE_RETIRED(49, 1, OSD_PROXY_FEATURES, JEWEL, LUMINOUS) // overl DEFINE_CEPH_FEATURE(49, 2, SERVER_SQUID); DEFINE_CEPH_FEATURE_RETIRED(50, 1, MON_METADATA, MIMIC, OCTOPUS) DEFINE_CEPH_FEATURE(50, 2, SERVER_TENTACLE); -DEFINE_CEPH_FEATURE(51, 2, NVMEOF_BEACON_DIFF) +DEFINE_CEPH_FEATURE_RETIRED(51, 1, OSD_BITWISE_HOBJ_SORT, MIMIC, OCTOPUS) // available DEFINE_CEPH_FEATURE_RETIRED(52, 1, OSD_PROXY_WRITE_FEATURES, MIMIC, OCTOPUS) // available @@ -258,7 +258,6 @@ DEFINE_CEPH_FEATURE_RETIRED(63, 1, RESERVED_BROKEN, LUMINOUS, QUINCY) // client- CEPH_FEATUREMASK_SERVER_REEF | \ CEPH_FEATUREMASK_SERVER_SQUID | \ CEPH_FEATUREMASK_SERVER_TENTACLE | \ - CEPH_FEATUREMASK_NVMEOF_BEACON_DIFF | \ 0ULL) #define CEPH_FEATURES_SUPPORTED_DEFAULT CEPH_FEATURES_ALL diff --git a/src/messages/MNVMeofGwBeacon.h b/src/messages/MNVMeofGwBeacon.h index b39c10576d26..4b61a8546a82 100644 --- a/src/messages/MNVMeofGwBeacon.h +++ b/src/messages/MNVMeofGwBeacon.h @@ -59,8 +59,7 @@ public: features ? BEACON_VERSION_ENHANCED : BEACON_VERSION_LEGACY}, gw_id(gw_id_), gw_pool(gw_pool_), gw_group(gw_group_), subsystems(subsystems_), availability(availability_), last_osd_epoch(last_osd_epoch_), - last_gwmap_epoch(last_gwmap_epoch_), sequence(sequence_), - affected_features(features) + last_gwmap_epoch(last_gwmap_epoch_), sequence(sequence_) { set_priority(CEPH_MSG_PRIO_HIGH); } @@ -99,13 +98,14 @@ public: encode(gw_id, payload); encode(gw_pool, payload); encode(gw_group, payload); - encode(subsystems, payload, affected_features); + encode(subsystems, payload); encode((uint32_t)availability, payload); encode(last_osd_epoch, payload); encode(last_gwmap_epoch, payload); // Only encode sequence for enhanced beacons (version >= 2) if (get_header().version >= 2) { encode(sequence, payload); + encode_beacon_change_descriptors(subsystems, payload); } } @@ -126,6 +126,7 @@ public: // Only decode sequence for enhanced beacons (version >= 2) if (get_header().version >= 2 && !p.end()) { decode(sequence, p); + decode_beacon_change_descriptors(subsystems, p); } else { sequence = 0; // Legacy beacons don't have sequence field } diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h index 21daba955f31..1f033ba9f84e 100644 --- a/src/mon/MonCommands.h +++ b/src/mon/MonCommands.h @@ -1493,6 +1493,11 @@ COMMAND("nvme-gw disaster-clear" " set location to clear Disaster state - failbacks allowed for recovered location", "mgr", "rw") +COMMAND("nvme-gw set" + " name=var,type=CephChoices,strings=beacon-diff" + " name=val,type=CephString ", + "config nvme-gw", + "mgr", "rw") // these are tell commands that were implemented as CLI commands in // the broken pre-octopus way that we want to allow to work when a diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index 69c1159453d6..d94f9dbad695 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -2581,7 +2581,6 @@ void Monitor::apply_monmap_to_compatset_features() ceph_assert(ceph::features::mon::get_persistent().contains_all( ceph::features::mon::FEATURE_NVMEOF_BEACON_DIFF)); // this feature should only ever be set if the quorum supports it. - ceph_assert(HAVE_FEATURE(quorum_con_features, NVMEOF_BEACON_DIFF)); new_features.incompat.insert(CEPH_MON_FEATURE_INCOMPAT_NVMEOF_BEACON_DIFF); } @@ -2648,12 +2647,6 @@ void Monitor::calc_quorum_requirements() CEPH_FEATUREMASK_CEPHX_V2; } - // Release-independent features - if (monmap->get_required_features().contains_all( - ceph::features::mon::FEATURE_NVMEOF_BEACON_DIFF)) { - required_features |= CEPH_FEATUREMASK_NVMEOF_BEACON_DIFF; - } - dout(10) << __func__ << " required_features " << required_features << dendl; } diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc index f5ca2aaa653a..b1eeada8fb01 100755 --- a/src/mon/NVMeofGwMap.cc +++ b/src/mon/NVMeofGwMap.cc @@ -17,6 +17,7 @@ #include "NVMeofGwMap.h" #include "OSDMonitor.h" #include "mon/health_check.h" +#include "messages/MNVMeofGwBeacon.h" using std::list; using std::map; @@ -48,7 +49,7 @@ void NVMeofGwMap::to_gmap( auto gw_state = NvmeGwClientState( gw_created.ana_grp_id, epoch, availability, gw_created.beacon_sequence, - gw_created.beacon_sequence_ooo); + gw_created.beacon_sequence_ooo, published_features); for (const auto& sub: gw_created.subsystems) { gw_state.subsystems.insert({ sub.nqn, @@ -304,7 +305,7 @@ int NVMeofGwMap::cfg_set_location(const NvmeGwId &gw_id, const NvmeGroupKey& group_key, std::string &location, bool &propose_pending) { - if (!HAVE_FEATURE(mon->get_quorum_con_features(), NVMEOF_BEACON_DIFF)) { + if (!mon->get_quorum_mon_features().contains_all(ceph::features::mon::FEATURE_NVMEOF_BEACON_DIFF)) { dout(4) << "Command is not allowed - feature is not installed" << group_key << " " << gw_id << dendl; return -EINVAL; @@ -413,10 +414,10 @@ int NVMeofGwMap::cfg_location_disaster_set( const NvmeGroupKey& group_key, std::string &location, bool &propose_pending) { - if (!HAVE_FEATURE(mon->get_quorum_con_features(), NVMEOF_BEACON_DIFF)) { + if (!mon->get_quorum_mon_features().contains_all(ceph::features::mon::FEATURE_NVMEOF_BEACON_DIFF)) { dout(4) << "Command is not allowed - feature is not installed" << group_key << dendl; - return -EINVAL; + return -EOPNOTSUPP; } bool cleanup_in_process = false; bool location_exists = false; @@ -455,10 +456,10 @@ int NVMeofGwMap::cfg_location_disaster_clear( const NvmeGroupKey& group_key, std::string &location, bool &propose_pending) { - if (!HAVE_FEATURE(mon->get_quorum_con_features(), NVMEOF_BEACON_DIFF)) { + if (!mon->get_quorum_mon_features().contains_all(ceph::features::mon::FEATURE_NVMEOF_BEACON_DIFF)) { dout(4) << "Command is not allowed - feature is not installed" << group_key << dendl; - return -EINVAL; + return -EOPNOTSUPP; } auto& gws_states = created_gws[group_key]; bool accept = false; @@ -500,6 +501,30 @@ int NVMeofGwMap::cfg_location_disaster_clear( } } +int NVMeofGwMap::cfg_enable_disable_beacon_diff(bool enable, + bool &propose_pending) +{ + int rc = 0; + if (enable && ((published_features & FLAG_BEACONDIFF) == 0)) { + if (!mon->get_quorum_mon_features().contains_all(ceph::features::mon::FEATURE_NVMEOF_BEACON_DIFF)) { + dout(1) << "beacon-diff not supported by a quorum of monitors" << dendl; + rc = -EOPNOTSUPP; + } else { + published_features ^= FLAG_BEACONDIFF; + dout (10) << "enabled beacon-diff" << dendl; + propose_pending = true; + } + } else if (!enable && (published_features & FLAG_BEACONDIFF)) { + published_features ^= FLAG_BEACONDIFF; + dout (10) << "disabled beacon-diff" << dendl; + propose_pending = true; + } else { + dout (10) << "no change" << dendl; + /* allow idempotency */ + } + return rc; +} + void NVMeofGwMap::gw_performed_startup(const NvmeGwId &gw_id, const NvmeGroupKey& group_key, bool &propose_pending) { @@ -780,7 +805,7 @@ void NVMeofGwMap::check_relocate_ana_groups(const NvmeGroupKey& group_key, * for ana-grp in list make relocation. * if all ana-grps in location active remove location from the map disaster_locations.group */ - if (!HAVE_FEATURE(mon->get_quorum_con_features(), NVMEOF_BEACON_DIFF)) { + if (!mon->get_quorum_mon_features().contains_all(ceph::features::mon::FEATURE_NVMEOF_BEACON_DIFF)) { dout(4) << "relocate is not allowed - feature is not installed" << group_key << dendl; return ; @@ -1588,9 +1613,7 @@ bool NVMeofGwMap::put_gw_beacon_sequence_number(const NvmeGwId &gw_id, { bool rc = true; NvmeGwMonState& gw_map = created_gws[group_key][gw_id]; - - if (HAVE_FEATURE(mon->get_quorum_con_features(), NVMEOF_BEACON_DIFF) || - (gw_version > 0) ) { + if (gw_version > BEACON_VERSION_LEGACY) { uint64_t seq_number = gw_map.beacon_sequence; if ((beacon_sequence != seq_number+1) && !(beacon_sequence == 0 && seq_number == 0 )) {// new GW startup @@ -1611,8 +1634,7 @@ bool NVMeofGwMap::set_gw_beacon_sequence_number(const NvmeGwId &gw_id, int gw_version, const NvmeGroupKey& group_key, uint64_t beacon_sequence) { NvmeGwMonState& gw_map = created_gws[group_key][gw_id]; - if (HAVE_FEATURE(mon->get_quorum_con_features(), NVMEOF_BEACON_DIFF) || - (gw_version > 0)) { + if (gw_version > BEACON_VERSION_LEGACY) { gw_map.beacon_sequence = beacon_sequence; gw_map.beacon_sequence_ooo = false; dout(10) << gw_id << " set beacon_sequence " << beacon_sequence << dendl; diff --git a/src/mon/NVMeofGwMap.h b/src/mon/NVMeofGwMap.h index f5af47d8ac53..14a7ea82a198 100755 --- a/src/mon/NVMeofGwMap.h +++ b/src/mon/NVMeofGwMap.h @@ -34,6 +34,14 @@ using ceph::coarse_mono_clock; #define dout_prefix *_dout << MODULE_PREFFIX << __PRETTY_FUNCTION__ << " " class health_check_map_t; +inline void encode_gws_beacon_diff_additions( + const std::map& created_gws, + ceph::bufferlist &bl, uint64_t features); + +inline void decode_gws_beacon_diff_additions( + std::map& created_gws, + ceph::buffer::list::const_iterator &bl); + class Monitor; /*-------------------*/ class NVMeofGwMap @@ -44,6 +52,9 @@ public: // epoch is for Paxos synchronization mechanizm epoch_t epoch = 0; bool delay_propose = false; + uint64_t published_features = 0; + static constexpr uint64_t FLAG_BEACONDIFF = (1<<0); + uint64_t ever_enabled_features = 0; std::map created_gws; @@ -93,6 +104,7 @@ public: std::string &location, bool &propose_pending); int cfg_location_disaster_clear(const NvmeGroupKey& group_key, std::string &location, bool &propose_pending); + int cfg_enable_disable_beacon_diff(bool enable, bool &propose_pending); void process_gw_map_ka( const NvmeGwId &gw_id, const NvmeGroupKey& group_key, epoch_t& last_osd_epoch, bool &propose_pending); @@ -192,24 +204,18 @@ public: void encode(ceph::buffer::list &bl, uint64_t features) const { using ceph::encode; - uint8_t version = 1; - if (HAVE_FEATURE(features, NVMEOFHAMAP)) { - version = 2; - } - if (HAVE_FEATURE(features, NVMEOF_BEACON_DIFF)) { - version = 3; - } - ENCODE_START(version, version, bl); + static const uint8_t version = 3; + static const uint8_t cversion = 1; + ENCODE_START(version, cversion, bl); encode(epoch, bl);// global map epoch encode(created_gws, bl, features); //Encode created GWs encode(fsm_timers, bl, features); - if (version >= 2) { - encode(gw_epoch, bl); - } - if (version >=3) { - encode(disaster_locations, bl); - } + encode(gw_epoch, bl); + encode_gws_beacon_diff_additions(created_gws, bl, features); + encode(disaster_locations, bl); + encode(ever_enabled_features, bl); + encode(published_features, bl); ENCODE_FINISH(bl); } @@ -228,8 +234,11 @@ public: dout(20) << "decode gw epoch " << dendl; } if (struct_v >=3) { + decode_gws_beacon_diff_additions(created_gws, bl); decode(disaster_locations, bl); - dout(20) << "decode disaster location " << dendl; + decode(ever_enabled_features, bl); + decode(published_features, bl); + dout(20) << " decoded features " << published_features << dendl; } DECODE_FINISH(bl); } diff --git a/src/mon/NVMeofGwMon.cc b/src/mon/NVMeofGwMon.cc index 6a4dd2326c91..80d5d852a5e8 100644 --- a/src/mon/NVMeofGwMon.cc +++ b/src/mon/NVMeofGwMon.cc @@ -157,7 +157,19 @@ void NVMeofGwMon::tick() pending_map.track_deleting_gws(group_key, *subsystems, propose); _propose_pending |= propose; } - // Periodic: take care of not handled ANA groups + + if (mon.get_quorum_mon_features().contains_all(ceph::features::mon::FEATURE_NVMEOF_BEACON_DIFF)) { + /* only automatically upgrade once: */ + if ((pending_map.ever_enabled_features & NVMeofGwMap::FLAG_BEACONDIFF) == 0) { + pending_map.ever_enabled_features |= NVMeofGwMap::FLAG_BEACONDIFF; + pending_map.published_features |= NVMeofGwMap::FLAG_BEACONDIFF; + dout(4) << " Updating map to enable beacon-diff, features " + << pending_map.published_features << dendl; + _propose_pending = true; + } + } + + // Periodically: take care of not handled ANA groups pending_map.handle_abandoned_ana_groups(propose); _propose_pending |= propose; @@ -252,7 +264,8 @@ void NVMeofGwMon::encode_pending(MonitorDBStore::TransactionRef t) pending_map.encode(bl, features); dout(10) << " has NVMEOFHA: " << HAVE_FEATURE(features, NVMEOFHA) << " has NVMEOFHAMAP: " << HAVE_FEATURE(features, NVMEOFHAMAP) - << " has BEACON_DIFF: " << HAVE_FEATURE(features, NVMEOF_BEACON_DIFF) << dendl; + << " has BEACON_DIFF: " + << mon.get_quorum_mon_features().contains_all(ceph::features::mon::FEATURE_NVMEOF_BEACON_DIFF) << dendl; put_version(t, pending_map.epoch, bl); put_last_committed(t, pending_map.epoch); @@ -265,8 +278,8 @@ void NVMeofGwMon::encode_pending(MonitorDBStore::TransactionRef t) void NVMeofGwMon::update_from_paxos(bool *need_bootstrap) { version_t version = get_last_committed(); - uint64_t features = mon.get_quorum_con_features(); - dout(20) << " has BEACON_DIFF: " << HAVE_FEATURE(features, NVMEOF_BEACON_DIFF) + dout(20) << " has BEACON_DIFF: " + << mon.get_quorum_mon_features().contains_all(ceph::features::mon::FEATURE_NVMEOF_BEACON_DIFF) << dendl; if (version != map.epoch) { dout(10) << " NVMeGW loading version " << version @@ -344,6 +357,7 @@ void NVMeofGwMon::check_sub(Subscription *sub) = map.created_gws[group_key][gw_id]; // respond with a map slice correspondent to the same GW unicast_map.epoch = map.gw_epoch[group_key];//map.epoch; + unicast_map.published_features = map.published_features; sub->session->con->send_message2(make_message(unicast_map)); if (sub->onetime) { mon.session_map.remove_sub(sub); @@ -459,6 +473,8 @@ bool NVMeofGwMon::preprocess_command(MonOpRequestRef op) f->dump_unsigned("epoch", map.epoch); f->dump_string("pool", pool); f->dump_string("group", group); + f->dump_bool("beacon_diff_enabled", + map.published_features & NVMeofGwMap::FLAG_BEACONDIFF); if (HAVE_FEATURE(mon.get_quorum_con_features(), NVMEOFHA)) { f->dump_string("features", "LB"); if (map.created_gws[group_key].size()) { @@ -748,14 +764,15 @@ bool NVMeofGwMon::prepare_command(MonOpRequestRef op) <<" location "<< location << dendl; rc = pending_map.cfg_location_disaster_set(group_key, location, propose); - if (rc == -EINVAL || rc == -EEXIST) { + if (rc == -EINVAL || rc == -EEXIST || rc == -EOPNOTSUPP) { err = rc; sstrm.str(""); if (rc == -EEXIST) { sstrm.str("command already set please wait until completed"); - } - if (rc == -EINVAL) { + } else if (rc == -EINVAL) { sstrm.str("command cannot be executed"); + } else if (rc == -EOPNOTSUPP) { + sstrm.str("command not supported"); } } if (rc == 0 && propose == true) { @@ -772,19 +789,49 @@ bool NVMeofGwMon::prepare_command(MonOpRequestRef op) <<" location "<< location << dendl; rc = pending_map.cfg_location_disaster_clear(group_key, location, propose); - if (rc == -EINVAL || rc == -EEXIST) { + if (rc == -EINVAL || rc == -EEXIST || rc == -EOPNOTSUPP) { err = rc; sstrm.str(""); if (rc == -EEXIST) { sstrm.str("command already set please wait until completed"); - } - if (rc == -EINVAL) { + } else if (rc == -EINVAL) { sstrm.str("command cannot be executed"); + } else if (rc == -EOPNOTSUPP) { + sstrm.str("command not supported"); } } if (rc == 0 && propose == true) { response = true; } + } else if (prefix == "nvme-gw set") { + std::string choice, value; + cmd_getval(cmdmap, "var", choice); + cmd_getval(cmdmap, "val", value); + if (choice == "beacon-diff") { + bool propose = false; + dout(10) << "Command "<< prefix << " " << choice + << " " << value << dendl; + if (value != "enable" && value != "disable") { + rc = -EINVAL; + sstrm.str("command not permitted - illegal value"); + } else { + bool command = (value == "enable") ? true: false; + rc = pending_map.cfg_enable_disable_beacon_diff(command, propose); + if (rc == -EOPNOTSUPP) { + err = rc; + sstrm.str(""); + if (rc == -EOPNOTSUPP) { + sstrm.str("command not supported"); + } + } + if (rc == 0 && propose == true) { + response = true; + } + } + } else { + rc = -EPERM; + sstrm.str("command not permitted - illegal choice"); + } } getline(sstrm, rs); if (response == false) { @@ -860,6 +907,7 @@ void NVMeofGwMon::do_send_map_ack(MonOpRequestRef op, pending_gw_map : map.created_gws[group_key][gw_id]; ack_map.created_gws[group_key][gw_id].beacon_sequence = pending_gw_map.beacon_sequence; + ack_map.published_features = pending_map.published_features; if (!is_correct_sequence) { dout(4) << " GW " << gw_id << " sending ACK due to receiving beacon_sequence out of order" << dendl; @@ -876,7 +924,8 @@ void NVMeofGwMon::do_send_map_ack(MonOpRequestRef op, if (!gw_created) dout(10) << "gw not created, ack map " << ack_map << " epoch " << ack_map.epoch << dendl; - dout(20) << "ack_map " << ack_map <(ack_map); mon.send_reply(op, msg.detach()); } @@ -895,7 +944,7 @@ void NVMeofGwMon::do_send_map_ack(MonOpRequestRef op, * if descriptor is DELETED do the following * look for subs nqn in the gw.subs list and if found - delete */ -int NVMeofGwMon::apply_beacon(const NvmeGwId &gw_id, int gw_version, +int NVMeofGwMon::apply_beacon(const NvmeGwId &gw_id, int affected_version, const NvmeGroupKey& group_key, void *msg, const BeaconSubsystems& sub, gw_availability_t &avail, bool &propose_pending) @@ -906,8 +955,7 @@ int NVMeofGwMon::apply_beacon(const NvmeGwId &gw_id, int gw_version, pending_map.created_gws[group_key][gw_id].subsystems; auto &state = pending_map.created_gws[group_key][gw_id]; - if (!HAVE_FEATURE(mon.get_quorum_con_features(), NVMEOF_BEACON_DIFF) && - gw_version == 0) { + if (affected_version == BEACON_VERSION_LEGACY) { if (gw_subs != sub) { dout(10) << "BEACON_DIFF logic not applied." "subsystems of GW changed, propose pending " << gw_id << dendl; @@ -917,6 +965,12 @@ int NVMeofGwMon::apply_beacon(const NvmeGwId &gw_id, int gw_version, changed = true; } } else { + if (!state.nonce_map.empty()) { + dout(4) << "Erase nonce map when Beacon-diff feature enabled for GW " + << gw_id << dendl; + state.nonce_map.clear();// no need anymore,used just in compatibility mode + propose_pending = true; + } if (state.beacon_sequence_ooo) { dout(10) << "Good sequence after out of order detection " "sequence "<< state.beacon_sequence << " " << gw_id << dendl; @@ -996,8 +1050,10 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op) auto m = op->get_req(); uint64_t sequence = m->get_sequence(); int version = m->version; + uint16_t header_ver = m->get_header().version; dout(10) << "availability " << m->get_availability() - << " sequence " << sequence << " version " << version + << " sequence " << sequence << " version " << version + << " header version " << header_ver << " GW : " << m->get_gw_id() << " osdmap_epoch " << m->get_last_osd_epoch() << " subsystems " << m->get_subsystems().size() << dendl; @@ -1042,7 +1098,7 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op) goto set_propose; } else { pending_map.created_gws[group_key][gw_id].subsystems.clear(); - pending_map.set_gw_beacon_sequence_number(gw_id, version, + pending_map.set_gw_beacon_sequence_number(gw_id, header_ver, group_key, sequence); dout(4) << "GW beacon: Created state - full startup done " << gw_id << " GW state in monitor data-base : " @@ -1073,7 +1129,7 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op) // if GW reports Avail/Unavail but in monitor's database it is Unavailable if (gw_exists) { correct_sequence = pending_map.put_gw_beacon_sequence_number - (gw_id, version, group_key, sequence, stored_sequence); + (gw_id, header_ver, group_key, sequence, stored_sequence); // it means it did not perform "exit" after failover was set by // NVMeofGWMon if ((pending_map.created_gws[group_key][gw_id].availability == @@ -1128,7 +1184,7 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op) pending_map.set_addr_vect(gw_id, group_key, con->get_peer_addr()); gw_propose = true; } - if (apply_beacon(gw_id, version, group_key, (void *)m, sub, avail, propose) !=0) { + if (apply_beacon(gw_id, header_ver, group_key, (void *)m, sub, avail, propose) !=0) { nonce_propose = true; dout(10) << "subsystem(subs/listener/nonce/NM) of GW changed, propose pending " << gw_id << " available " << avail << dendl; diff --git a/src/mon/NVMeofGwSerialize.h b/src/mon/NVMeofGwSerialize.h index 24dee67d70bb..d1cd01cc2605 100755 --- a/src/mon/NVMeofGwSerialize.h +++ b/src/mon/NVMeofGwSerialize.h @@ -144,6 +144,7 @@ inline std::ostream& operator<<( << " availablilty " << value.availability << " sequence " << value.last_beacon_seq_number << " sequence-ooo " << value.last_beacon_seq_ooo + << " map_features " << value.map_features << " GwSubsystems: [ "; for (const auto& sub: value.subsystems) { os << sub.second << " "; @@ -257,7 +258,7 @@ inline std::ostream& operator<<(std::ostream& os, const LocationStates value) { inline std::ostream& operator<<(std::ostream& os, const NVMeofGwMap value) { os << "\n" << MODULE_PREFFIX << "== NVMeofGwMap [ Created_gws: epoch " - << value.epoch; + << value.epoch << " features " << value.published_features; for (auto& group_gws: value.gw_epoch) { os << "\n" << MODULE_PREFFIX << "{ " << group_gws.first << " } -> GW epoch: " << group_gws.second << " }"; @@ -273,6 +274,10 @@ inline std::ostream& operator<<(std::ostream& os, const NVMeofGwMap value) { return os; } +inline void encode_beacon_change_descriptors(const BeaconSubsystems& sub, ceph::bufferlist &bl); +inline void decode_beacon_change_descriptors(BeaconSubsystems& subs, ceph::buffer::list::const_iterator &bl); + + inline void encode(const ana_state_t& st, ceph::bufferlist &bl) { ENCODE_START(1, 1, bl); encode((uint32_t)st.size(), bl); @@ -343,19 +348,16 @@ inline void decode( } inline void encode(const NvmeGwClientState& state, ceph::bufferlist &bl, uint64_t features) { - uint8_t version = 1; - if (HAVE_FEATURE(features, NVMEOF_BEACON_DIFF)) { - version = 2; - } - ENCODE_START(version, version, bl); + static const uint8_t version = 2; + static const uint8_t cversion = 1; + ENCODE_START(version, cversion, bl); encode(state.group_id, bl); encode(state.gw_map_epoch, bl); encode (state.subsystems, bl, features); encode((uint32_t)state.availability, bl); - if (version >= 2) { - encode((uint64_t)state.last_beacon_seq_number, bl); - encode(state.last_beacon_seq_ooo, bl); - } + encode((uint64_t)state.last_beacon_seq_number, bl); + encode((uint8_t)state.last_beacon_seq_ooo, bl); + encode((uint64_t)state.map_features, bl); ENCODE_FINISH(bl); } @@ -372,7 +374,10 @@ inline void decode( if (struct_v >= 2) { decode(last_beacon_seq_number, bl); state.last_beacon_seq_number = last_beacon_seq_number; - decode(state.last_beacon_seq_ooo, bl); + uint8_t last_beacon_seq_ooo; + decode(last_beacon_seq_ooo, bl); + state.last_beacon_seq_ooo = (bool)last_beacon_seq_ooo; + decode(state.map_features, bl); } DECODE_FINISH(bl); } @@ -507,9 +512,6 @@ inline void encode(const NvmeGwMonStates& gws, ceph::bufferlist &bl, if (HAVE_FEATURE(features, NVMEOFHAMAP)) { version = 3; } - if (HAVE_FEATURE(features, NVMEOF_BEACON_DIFF)) { - version = 4; - } ENCODE_START(version, version, bl); dout(20) << "encode NvmeGwMonStates. struct_v: " << (int)version << dendl; encode ((uint32_t)gws.size(), bl); // number of gws in the group @@ -528,7 +530,7 @@ inline void encode(const NvmeGwMonStates& gws, ceph::bufferlist &bl, encode((uint16_t)gw.second.last_gw_map_epoch_valid, bl); dout(20) << "encode availability " << gw.second.availability << " startup " << (int)gw.second.performed_full_startup << dendl; - encode(gw.second.subsystems, bl, features); + encode(gw.second.subsystems, bl); encode((uint32_t)gw.second.blocklist_data.size(), bl); for (auto &blklst_itr: gw.second.blocklist_data) { @@ -545,7 +547,7 @@ inline void encode(const NvmeGwMonStates& gws, ceph::bufferlist &bl, encode((uint32_t)gw.second.availability, bl); encode((uint16_t)gw.second.performed_full_startup, bl); encode((uint16_t)gw.second.last_gw_map_epoch_valid, bl); - encode(gw.second.subsystems, bl, features); + encode(gw.second.subsystems, bl); Blocklist_data bl_data[MAX_SUPPORTED_ANA_GROUPS]; for (auto &blklst_itr: gw.second.blocklist_data) { bl_data[blklst_itr.first].osd_epoch = blklst_itr.second.osd_epoch; @@ -562,11 +564,6 @@ inline void encode(const NvmeGwMonStates& gws, ceph::bufferlist &bl, gw.second.addr_vect.encode(bl, features); encode(gw.second.beacon_index, bl); } - if (version >= 4) { - encode((int)gw.second.gw_admin_state, bl); - dout(10) << "encode location " << gw.second.location << dendl; - encode(gw.second.location, bl); - } } ENCODE_FINISH(bl); } @@ -654,15 +651,6 @@ inline void decode( decode(gw_created.beacon_index, bl); dout(20) << "decoded beacon_index " << gw_created.beacon_index << dendl; } - if (struct_v >= 4) { - dout(20) << "decode admin state and location" << dendl; - int admin_state; - decode(admin_state, bl); - gw_created.gw_admin_state = (gw_admin_state_t)admin_state; - decode(gw_created.location, bl); - dout(20) << "decoded location " << gw_created.location << dendl; - } - gws[gw_name] = gw_created; } if (struct_v == 1) { //Fix allocations of states and blocklist_data @@ -735,6 +723,41 @@ inline void encode( ENCODE_FINISH(bl); } +inline void encode_gws_beacon_diff_additions( + const std::map& created_gws, + ceph::bufferlist &bl, uint64_t features) { + ENCODE_START(1, 1, bl); + for (auto& group_gws: created_gws) { + auto& gws = group_gws.second; + for (auto& gw : gws) { + encode((uint8_t)gw.second.gw_admin_state, bl); + dout(10) << "encode location " << gw.second.location << " admin state " << (int)gw.second.gw_admin_state << dendl; + encode(gw.second.location, bl); + encode_beacon_change_descriptors(gw.second.subsystems, bl); + } + } + ENCODE_FINISH(bl); +} + +inline void decode_gws_beacon_diff_additions( + std::map& created_gws, + ceph::buffer::list::const_iterator &bl) { + DECODE_START(1, bl); + for (auto& group_gws: created_gws) { + auto& gws = group_gws.second; + for (auto& gw : gws) { + dout(20) << "decode admin state and location" << dendl; + uint8_t admin_state; + decode(admin_state, bl); + gw.second.gw_admin_state = (gw_admin_state_t)admin_state; + decode(gw.second.location, bl); + dout(20) << "decoded location " << gw.second.location << " admin state " << (int)gw.second.gw_admin_state << dendl; + decode_beacon_change_descriptors(gw.second.subsystems, bl); + } + } + DECODE_FINISH(bl); +} + inline void decode( std::map& created_gws, ceph::buffer::list::const_iterator &bl) { @@ -904,15 +927,12 @@ inline void decode(BeaconListener& ls, ceph::buffer::list::const_iterator &bl) { DECODE_FINISH(bl); } -inline void encode(const BeaconSubsystem& sub, ceph::bufferlist &bl, uint64_t features) { +inline void encode(const BeaconSubsystem& sub, ceph::bufferlist &bl) { uint8_t version = 1; - if (HAVE_FEATURE(features, NVMEOF_BEACON_DIFF)) { - version = 2; - } ENCODE_START(version, version, bl); encode(sub.nqn, bl); - dout(20) << "encode BeaconSubsystems " << sub.nqn <<" features " << features + dout(20) << "encode BeaconSubsystems " << sub.nqn << " version " << (int)version << dendl; encode((uint32_t)sub.listeners.size(), bl); for (const auto& ls: sub.listeners) @@ -920,15 +940,33 @@ inline void encode(const BeaconSubsystem& sub, ceph::bufferlist &bl, uint64_t f encode((uint32_t)sub.namespaces.size(), bl); for (const auto& ns: sub.namespaces) encode(ns, bl); - if (version >= 2) { - encode((uint32_t)sub.change_descriptor, bl); - dout(20) << "encode BeaconSubsystems change-descr: " << (uint32_t)sub.change_descriptor << dendl; + ENCODE_FINISH(bl); +} + + + +inline void encode_beacon_change_descriptors(const BeaconSubsystems& subs, ceph::bufferlist &bl) { + ENCODE_START(1, 1, bl); + for (auto &sub_it:subs) { + encode((uint32_t)sub_it.change_descriptor, bl); + dout(20) << "encode change_descriptor " << (uint32_t)sub_it.change_descriptor << dendl; } ENCODE_FINISH(bl); } +inline void decode_beacon_change_descriptors(BeaconSubsystems& subs, ceph::buffer::list::const_iterator &bl) { + DECODE_START(1, bl); + for (auto &sub_it:subs) { + uint32_t change_desc; + decode(change_desc, bl); + sub_it.change_descriptor = static_cast(change_desc); + dout(20) << "decode change_descriptor " << change_desc << dendl; + } + DECODE_FINISH(bl); +} + inline void decode(BeaconSubsystem& sub, ceph::buffer::list::const_iterator &bl) { - DECODE_START(2, bl); + DECODE_START(1, bl); decode(sub.nqn, bl); dout(20) << "decode BeaconSubsystems " << sub.nqn << dendl; uint32_t s; @@ -948,12 +986,6 @@ inline void decode(BeaconSubsystem& sub, ceph::buffer::list::const_iterator &bl) decode(ns, bl); sub.namespaces.push_back(ns); } - if (struct_v >= 2) { - uint32_t change_desc; - decode(change_desc, bl); - sub.change_descriptor = static_cast(change_desc); - dout(20) << "decode BeaconSubsystems version >= " << 2 << dendl; - } DECODE_FINISH(bl); } diff --git a/src/mon/NVMeofGwTypes.h b/src/mon/NVMeofGwTypes.h index 5c01e5b6bb19..5529cd419df7 100755 --- a/src/mon/NVMeofGwTypes.h +++ b/src/mon/NVMeofGwTypes.h @@ -259,14 +259,16 @@ struct NvmeGwClientState { gw_availability_t availability; uint64_t last_beacon_seq_number; bool last_beacon_seq_ooo; //out of order sequence + uint64_t map_features; // last map features NvmeGwClientState(NvmeAnaGrpId id, epoch_t epoch, gw_availability_t available, - uint64_t sequence, bool sequence_ooo) + uint64_t sequence, bool sequence_ooo, uint64_t last_published_features) : group_id(id), gw_map_epoch(epoch), availability(available), - last_beacon_seq_number(sequence), last_beacon_seq_ooo(sequence_ooo) {} + last_beacon_seq_number(sequence), last_beacon_seq_ooo(sequence_ooo), + map_features(last_published_features) {} NvmeGwClientState() : NvmeGwClientState( - REDUNDANT_GW_ANA_GROUP_ID, 0, gw_availability_t::GW_UNAVAILABLE, 0, 0) {} + REDUNDANT_GW_ANA_GROUP_ID, 0, gw_availability_t::GW_UNAVAILABLE, 0, 0, 0) {} }; struct Tmdata { diff --git a/src/nvmeof/NVMeofGwMonitorClient.cc b/src/nvmeof/NVMeofGwMonitorClient.cc index 6d80d2d19363..6eb32d3d62ef 100644 --- a/src/nvmeof/NVMeofGwMonitorClient.cc +++ b/src/nvmeof/NVMeofGwMonitorClient.cc @@ -269,7 +269,7 @@ void NVMeofGwMonitorClient::send_beacon() gwmap_epoch, beacon_sequence, // Pass affected features to the constructor - cluster_beacon_diff_included ? CEPH_FEATUREMASK_NVMEOF_BEACON_DIFF : 0 + cluster_beacon_diff_included ? 1 : 0 ); dout(10) << "sending beacon with diff support: " << (cluster_beacon_diff_included ? "enabled" : "disabled") << dendl; @@ -413,6 +413,13 @@ void NVMeofGwMonitorClient::handle_nvmeof_gw_map(ceph::ref_t nmap) // ensure that the gateway state has not vanished ceph_assert(got_new_gw_state || !got_old_gw_state); + uint64_t old_cluster_beacon_diff_included = cluster_beacon_diff_included; + cluster_beacon_diff_included = (new_gw_state.map_features & NVMeofGwMap::FLAG_BEACONDIFF) != 0; + if (old_cluster_beacon_diff_included != cluster_beacon_diff_included) { + dout(0) << fmt::format("Updated cluster features: 0x{:x}", cluster_beacon_diff_included) + << dendl; + } + // Check if the last_beacon_seq_number in the received map doesn't match our last sent beacon_sequence // beacon_sequence is incremented after sending, so we compare with (beacon_sequence - 1) { @@ -552,12 +559,6 @@ Dispatcher::dispatch_result_t NVMeofGwMonitorClient::ms_dispatch2(const ref_tget_type() << dendl; - // print connection features for all incoming messages and update cluster features - if (m->get_connection()) { - cluster_beacon_diff_included = monc.get_monmap_required_features().contains_all(ceph::features::mon::FEATURE_NVMEOF_BEACON_DIFF); - dout(10) << fmt::format("Updated cluster features: 0x{:x}", cluster_beacon_diff_included) << dendl; - } - if (m->get_type() == MSG_MNVMEOF_GW_MAP) { handle_nvmeof_gw_map(ref_cast(m)); return Dispatcher::HANDLED(); diff --git a/src/test/test_nvmeof_mon_encoding.cc b/src/test/test_nvmeof_mon_encoding.cc index ca7f01d3eb16..3a853d374503 100644 --- a/src/test/test_nvmeof_mon_encoding.cc +++ b/src/test/test_nvmeof_mon_encoding.cc @@ -75,7 +75,7 @@ void test_MNVMeofGwMap() { std::string pool = "pool1"; std::string group = "grp1"; std::string gw_id = "GW1"; - NvmeGwClientState state(1, 32, gw_availability_t::GW_UNAVAILABLE, 0, false); + NvmeGwClientState state(1, 32, gw_availability_t::GW_UNAVAILABLE, 0, false, 0); std::string nqn = "nqn"; ana_state_t ana_state; NqnState nqn_state(nqn, ana_state); @@ -228,7 +228,7 @@ void test_subsystem_change_descriptors() { BeaconSubsystems subs = { sub1, sub2, sub3 }; // Encode and decode ceph::buffer::list bl; - encode(subs, bl, CEPH_FEATURES_ALL); + encode(subs, bl); auto p = bl.cbegin(); BeaconSubsystems decoded_subs; decode(decoded_subs, p);