From: Leonid Chernin Date: Thu, 25 Apr 2024 12:01:17 +0000 (+0000) Subject: ceph-nvmeof-mon fixes X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=941fe9be8a3ebdcd47d821d5160b654bb0be4de6;p=ceph-ci.git ceph-nvmeof-mon fixes Resolves: rhbz#2277947 - add validation of the map after each map decision Signed-off-by: Leonid Chernin (cherry picked from commit c2f883d032c4f4cabbaccd69bce53aff9f368efa) - added availability per gw to the exported map Signed-off-by: Leonid Chernin (cherry picked from commit dbda4f7a5220c930ae1e10ff6a92287171d5e9f4) - src/nvmeof/NVMeofGwMonitorClient.cc: panic if the monitor flags gateway as unavailable Signed-off-by: Alexander Indenbaum (cherry picked from commit e5f9eeabde506501989170e26433bd3e698a429e) - ceph-nvmeof-mon: disconnect panic nvmeof_mon_client_disc * extract nvmeof_mon_client_disc conf option * default value 100 secs Signed-off-by: Alexander Indenbaum (cherry picked from commit 7b469988d81ca93ce0ae0694c50172bc70be20ea) (cherry picked from commit 041933408e62a4f3be449025a7e0438efb531a2e) Signed-off-by: Alexander Indenbaum --- diff --git a/src/common/options/mon.yaml.in b/src/common/options/mon.yaml.in index 7ff915903eb..58cee78207f 100644 --- a/src/common/options/mon.yaml.in +++ b/src/common/options/mon.yaml.in @@ -1345,6 +1345,14 @@ options: with_legacy: true see_also: - osd_heartbeat_use_min_delay_socket +- name: nvmeof_mon_client_disconnect_panic + type: secs + level: advanced + desc: The duration, expressed in seconds, after which the nvmeof gateway + should trigger a panic if it loses connection to the monitor + default: 100 + services: + - mon - name: nvmeof_mon_client_tick_period type: secs level: advanced diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc index c6c4cb6a323..03bc75dc68f 100755 --- a/src/mon/NVMeofGwMap.cc +++ b/src/mon/NVMeofGwMap.cc @@ -36,7 +36,7 @@ void NVMeofGwMap::to_gmap(std::map& Gmap) const { const auto& gw_id = gw_created_pair.first; const auto& gw_created = gw_created_pair.second; - auto gw_state = NvmeGwState(gw_created.ana_grp_id, epoch); + auto gw_state = NvmeGwState(gw_created.ana_grp_id, epoch, gw_created.availability); for (const auto& sub: gw_created.subsystems) { gw_state.subsystems.insert({sub.nqn, NqnState(sub.nqn, gw_created.sm_state, gw_created )}); } @@ -109,6 +109,7 @@ int NVMeofGwMap::process_gw_map_gw_down(const NvmeGwId &gw_id, const NvmeGroupKe st.standby_state(i); } propose_pending = true; // map should reflect that gw becames unavailable + if (propose_pending) validate_gw_map(group_key); } else { dout(1) << __FUNCTION__ << "ERROR GW-id was not found in the map " << gw_id << dendl; @@ -152,6 +153,7 @@ void NVMeofGwMap::process_gw_map_ka(const NvmeGwId &gw_id, const NvmeGroupKey& g fsm_handle_gw_alive (gw_id, group_key, gw_state->second, st.sm_state[i], i, last_osd_epoch, propose_pending); } } + if (propose_pending) validate_gw_map(group_key); } @@ -192,6 +194,9 @@ void NVMeofGwMap::handle_abandoned_ana_groups(bool& propose) find_failback_gw(gw_id, group_key, propose); } } + if (propose) { + validate_gw_map(group_key); + } } } @@ -453,6 +458,7 @@ void NVMeofGwMap::fsm_handle_gw_delete (const NvmeGwId &gw_id, const NvmeGroupKe ceph_assert(false); } } + if (map_modified) validate_gw_map(group_key); } void NVMeofGwMap::fsm_handle_to_expired(const NvmeGwId &gw_id, const NvmeGroupKey& group_key, NvmeAnaGrpId grpid, bool &map_modified) @@ -494,6 +500,7 @@ void NVMeofGwMap::fsm_handle_to_expired(const NvmeGwId &gw_id, const NvmeGroupKe dout(1) << " Expired GW_WAIT_FAILOVER_PREPARED timer from GW " << gw_id << " ANA groupId: "<< grpid << dendl; ceph_assert(false); } + if (map_modified) validate_gw_map(group_key); } NvmeGwCreated& NVMeofGwMap::find_already_created_gw(const NvmeGwId &gw_id, const NvmeGroupKey& group_key) @@ -555,6 +562,34 @@ int NVMeofGwMap::blocklist_gw(const NvmeGwId &gw_id, const NvmeGroupKey& group_k return 0; } +void NVMeofGwMap::validate_gw_map(const NvmeGroupKey& group_key) +{ + NvmeAnaGrpId anas[MAX_SUPPORTED_ANA_GROUPS]; + int i = 0; + int max_groups = 0; + for (auto& gw_created_pair: Created_gws[group_key]) { + auto& st = gw_created_pair.second; + anas[i++] = st.ana_grp_id; + } + max_groups = i; + for(int i = 0; i < max_groups; i++) + { + int ana_group = anas[i]; + int count = 0; + for (auto& gw_created_pair: Created_gws[group_key]) { + auto& st = gw_created_pair.second; + if (st.sm_state[ana_group] == GW_STATES_PER_AGROUP_E::GW_ACTIVE_STATE){ + count ++; + if(count == 2) { + dout(1) << "number active states per ana-group " << ana_group << "more than 1 in pool-group " << group_key << dendl; + dout(1) << Created_gws[group_key] << dendl; + ceph_assert(false); + } + } + } + } +} + void NVMeofGwMap::update_active_timers( bool &propose_pending ){ //dout(4) << __func__ << " called, p_monitor: " << mon << dendl; diff --git a/src/mon/NVMeofGwMap.h b/src/mon/NVMeofGwMap.h index bf46b31dc07..51c80111ed4 100755 --- a/src/mon/NVMeofGwMap.h +++ b/src/mon/NVMeofGwMap.h @@ -64,6 +64,7 @@ private: int get_timer (const NvmeGwId &gw_id, const NvmeGroupKey& group_key, NvmeAnaGrpId anagrpid); void cancel_timer(const NvmeGwId &gw_id, const NvmeGroupKey& group_key, NvmeAnaGrpId anagrpid); + void validate_gw_map(const NvmeGroupKey& group_key); public: void encode(ceph::buffer::list &bl) const { diff --git a/src/mon/NVMeofGwMon.cc b/src/mon/NVMeofGwMon.cc index 893a1852255..69766f9d054 100644 --- a/src/mon/NVMeofGwMon.cc +++ b/src/mon/NVMeofGwMon.cc @@ -343,6 +343,7 @@ bool NVMeofGwMon::prepare_command(MonOpRequestRef op) if( map.Created_gws[group_key].size()){ f->open_object_section("common"); + f->dump_unsigned("epoch", map.epoch); f->dump_string("pool", pool); f->dump_string("group", group); f->dump_unsigned("num gws", map.Created_gws[group_key].size()); diff --git a/src/mon/NVMeofGwSerialize.h b/src/mon/NVMeofGwSerialize.h index 905d9090d1e..151e2e513a5 100755 --- a/src/mon/NVMeofGwSerialize.h +++ b/src/mon/NVMeofGwSerialize.h @@ -86,7 +86,7 @@ inline std::ostream& operator<<(std::ostream& os, const NqnState value) { } inline std::ostream& operator<<(std::ostream& os, const NvmeGwState value) { - os << "NvmeGwState { group id: " << value.group_id << " gw_map_epoch " << value.gw_map_epoch + os << "NvmeGwState { group id: " << value.group_id << " gw_map_epoch " << value.gw_map_epoch << " availablilty "<< value.availability << " GwSubsystems: [ "; for (const auto& sub: value.subsystems) os << sub.second << " "; os << " ] }"; @@ -240,6 +240,7 @@ inline void encode(const NvmeGwState& state, ceph::bufferlist &bl) { encode(state.group_id, bl); encode(state.gw_map_epoch, bl); encode (state.subsystems, bl); + encode((uint32_t)state.availability, bl); ENCODE_FINISH(bl); } @@ -248,6 +249,9 @@ inline void decode(NvmeGwState& state, ceph::bufferlist::const_iterator& bl) { decode(state.group_id, bl); decode(state.gw_map_epoch, bl); decode(state.subsystems, bl); + uint32_t avail; + decode(avail, bl); + state.availability = (GW_AVAILABILITY_E)avail; DECODE_FINISH(bl); } diff --git a/src/mon/NVMeofGwTypes.h b/src/mon/NVMeofGwTypes.h index d66f478cf98..e78e8170815 100755 --- a/src/mon/NVMeofGwTypes.h +++ b/src/mon/NVMeofGwTypes.h @@ -153,13 +153,14 @@ struct NvmeGwState { NvmeAnaGrpId group_id; epoch_t gw_map_epoch; GwSubsystems subsystems; - - NvmeGwState(NvmeAnaGrpId id, epoch_t epoch): + GW_AVAILABILITY_E availability; + NvmeGwState(NvmeAnaGrpId id, epoch_t epoch, GW_AVAILABILITY_E available): group_id(id), - gw_map_epoch(epoch) + gw_map_epoch(epoch), + availability(available) {}; - NvmeGwState() : NvmeGwState(REDUNDANT_GW_ANA_GROUP_ID, 0) {}; + NvmeGwState() : NvmeGwState(REDUNDANT_GW_ANA_GROUP_ID, 0, GW_AVAILABILITY_E::GW_UNAVAILABLE) {}; }; struct NvmeGwMetaData { diff --git a/src/nvmeof/NVMeofGwMonitorClient.cc b/src/nvmeof/NVMeofGwMonitorClient.cc index ed510a9ef31..61a71ffb205 100644 --- a/src/nvmeof/NVMeofGwMonitorClient.cc +++ b/src/nvmeof/NVMeofGwMonitorClient.cc @@ -235,12 +235,12 @@ void NVMeofGwMonitorClient::send_beacon() void NVMeofGwMonitorClient::disconnect_panic() { - auto disconnect_panic_duration = g_conf().get_val("mon_nvmeofgw_beacon_grace").count(); + auto disconnect_panic_duration = g_conf().get_val("nvmeof_mon_client_disconnect_panic").count(); auto now = std::chrono::steady_clock::now(); auto elapsed_seconds = std::chrono::duration_cast(now - last_map_time).count(); if (elapsed_seconds > disconnect_panic_duration) { dout(4) << "Triggering a panic upon disconnection from the monitor, elapsed " << elapsed_seconds << ", configured disconnect panic duration " << disconnect_panic_duration << dendl; - throw std::runtime_error("Lost connection to the monitor (mon)."); + throw std::runtime_error("Lost connection to the monitor (beacon timeout)."); } } @@ -314,10 +314,19 @@ void NVMeofGwMonitorClient::handle_nvmeof_gw_map(ceph::ref_t nmap) } } - // Make sure we do not get out of order state changes from the monitor if (got_old_gw_state && got_new_gw_state) { dout(0) << "got_old_gw_state: " << old_gw_state << "got_new_gw_state: " << new_gw_state << dendl; + // Make sure we do not get out of order state changes from the monitor ceph_assert(new_gw_state.gw_map_epoch >= old_gw_state.gw_map_epoch); + + // If the monitor previously identified this gateway as accessible but now + // flags it as unavailable, it suggests that the gateway lost connection + // to the monitor. + if (old_gw_state.availability == GW_AVAILABILITY_E::GW_AVAILABLE && + new_gw_state.availability == GW_AVAILABILITY_E::GW_UNAVAILABLE) { + dout(4) << "Triggering a panic upon disconnection from the monitor, gw state - unavailable" << dendl; + throw std::runtime_error("Lost connection to the monitor (gw map unavailable)."); + } } // Gather all state changes diff --git a/src/test/test_nvmeof_mon_encoding.cc b/src/test/test_nvmeof_mon_encoding.cc index 9d84b58397b..89c72df3849 100644 --- a/src/test/test_nvmeof_mon_encoding.cc +++ b/src/test/test_nvmeof_mon_encoding.cc @@ -63,7 +63,7 @@ void test_MNVMeofGwMap() { std::string pool = "pool1"; std::string group = "grp1"; std::string gw_id = "GW1"; - NvmeGwState state(1, 32); + NvmeGwState state(1, 32, GW_AVAILABILITY_E::GW_UNAVAILABLE); std::string nqn = "nqn"; ANA_STATE ana_state; NqnState nqn_state(nqn, ana_state);