]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
ceph-nvmeof-mon fixes
authorLeonid Chernin <leonidc@il.ibm.com>
Thu, 25 Apr 2024 12:01:17 +0000 (12:01 +0000)
committerAlexander Indenbaum <aindenba@redhat.com>
Thu, 20 Nov 2025 08:55:27 +0000 (10:55 +0200)
Resolves: rhbz#2277947

- add validation of the map after each  map decision

Signed-off-by: Leonid Chernin <leonidc@il.ibm.com>
  (cherry picked from commit c2f883d032c4f4cabbaccd69bce53aff9f368efa)

- added availability per gw  to the exported map

Signed-off-by: Leonid Chernin <leonidc@il.ibm.com>
  (cherry picked from commit dbda4f7a5220c930ae1e10ff6a92287171d5e9f4)

- src/nvmeof/NVMeofGwMonitorClient.cc: panic if the monitor flags gateway as unavailable

Signed-off-by: Alexander Indenbaum <aindenba@redhat.com>
  (cherry picked from commit e5f9eeabde506501989170e26433bd3e698a429e)

- ceph-nvmeof-mon: disconnect panic nvmeof_mon_client_disc

  * extract nvmeof_mon_client_disc conf option
  * default value 100 secs

Signed-off-by: Alexander Indenbaum <aindenba@redhat.com>
  (cherry picked from commit 7b469988d81ca93ce0ae0694c50172bc70be20ea)

(cherry picked from commit 041933408e62a4f3be449025a7e0438efb531a2e)
Signed-off-by: Alexander Indenbaum <aindenba@redhat.com>
src/common/options/mon.yaml.in
src/mon/NVMeofGwMap.cc
src/mon/NVMeofGwMap.h
src/mon/NVMeofGwMon.cc
src/mon/NVMeofGwSerialize.h
src/mon/NVMeofGwTypes.h
src/nvmeof/NVMeofGwMonitorClient.cc
src/test/test_nvmeof_mon_encoding.cc

index 7ff915903eb562e87cf83d1bdd5c959f47bb8414..58cee78207f406fc8488bd622df5c7540acb9a88 100644 (file)
@@ -1345,6 +1345,14 @@ options:
   with_legacy: true
   see_also:
   - osd_heartbeat_use_min_delay_socket
+- name: nvmeof_mon_client_disconnect_panic
+  type: secs
+  level: advanced
+  desc: The duration, expressed in seconds, after which the nvmeof gateway
+    should trigger a panic if it loses connection to the monitor
+  default: 100
+  services:
+  - mon
 - name: nvmeof_mon_client_tick_period
   type: secs
   level: advanced
index c6c4cb6a32320e621bc674511f392fdf19c3d8b7..03bc75dc68fc8ab719496f3366195c4828f5f43a 100755 (executable)
@@ -36,7 +36,7 @@ void NVMeofGwMap::to_gmap(std::map<NvmeGroupKey, NvmeGwMap>& Gmap) const {
             const auto& gw_id = gw_created_pair.first;
             const auto& gw_created  = gw_created_pair.second;
 
-            auto gw_state = NvmeGwState(gw_created.ana_grp_id, epoch);
+            auto gw_state = NvmeGwState(gw_created.ana_grp_id, epoch, gw_created.availability);
             for (const auto& sub: gw_created.subsystems) {
                 gw_state.subsystems.insert({sub.nqn, NqnState(sub.nqn, gw_created.sm_state, gw_created )});
             }
@@ -109,6 +109,7 @@ int NVMeofGwMap::process_gw_map_gw_down(const NvmeGwId &gw_id, const NvmeGroupKe
             st.standby_state(i);
         }
         propose_pending = true; // map should reflect that gw becames unavailable
+        if (propose_pending) validate_gw_map(group_key);
     }
     else {
         dout(1)  << __FUNCTION__ << "ERROR GW-id was not found in the map " << gw_id << dendl;
@@ -152,6 +153,7 @@ void NVMeofGwMap::process_gw_map_ka(const NvmeGwId &gw_id, const NvmeGroupKey& g
           fsm_handle_gw_alive (gw_id, group_key, gw_state->second, st.sm_state[i], i, last_osd_epoch, propose_pending);
         }
     }
+    if (propose_pending) validate_gw_map(group_key);
 }
 
 
@@ -192,6 +194,9 @@ void NVMeofGwMap::handle_abandoned_ana_groups(bool& propose)
                     find_failback_gw(gw_id, group_key, propose);
                 }
             }
+            if (propose) {
+                validate_gw_map(group_key);
+            }
     }
 }
 
@@ -453,6 +458,7 @@ void NVMeofGwMap::fsm_handle_gw_delete (const NvmeGwId &gw_id, const NvmeGroupKe
             ceph_assert(false);
         }
     }
+    if (map_modified) validate_gw_map(group_key);
 }
 
 void NVMeofGwMap::fsm_handle_to_expired(const NvmeGwId &gw_id, const NvmeGroupKey& group_key, NvmeAnaGrpId grpid,  bool &map_modified)
@@ -494,6 +500,7 @@ void NVMeofGwMap::fsm_handle_to_expired(const NvmeGwId &gw_id, const NvmeGroupKe
         dout(1) << " Expired GW_WAIT_FAILOVER_PREPARED timer from GW " << gw_id << " ANA groupId: "<< grpid << dendl;
         ceph_assert(false);
     }
+    if (map_modified) validate_gw_map(group_key);
 }
 
 NvmeGwCreated& NVMeofGwMap::find_already_created_gw(const NvmeGwId &gw_id, const NvmeGroupKey& group_key)
@@ -555,6 +562,34 @@ int NVMeofGwMap::blocklist_gw(const NvmeGwId &gw_id, const NvmeGroupKey& group_k
     return 0;
 }
 
+void  NVMeofGwMap::validate_gw_map(const NvmeGroupKey& group_key)
+{
+   NvmeAnaGrpId anas[MAX_SUPPORTED_ANA_GROUPS];
+   int i = 0;
+   int max_groups = 0;
+   for (auto& gw_created_pair: Created_gws[group_key]) {
+        auto& st = gw_created_pair.second;
+        anas[i++] = st.ana_grp_id;
+   }
+   max_groups = i;
+   for(int i = 0; i < max_groups; i++)
+   {
+       int ana_group = anas[i];
+       int count = 0;
+       for (auto& gw_created_pair: Created_gws[group_key]) {
+           auto& st = gw_created_pair.second;
+           if (st.sm_state[ana_group] == GW_STATES_PER_AGROUP_E::GW_ACTIVE_STATE){
+               count ++;
+               if(count == 2) {
+                   dout(1) << "number active states per ana-group " << ana_group << "more than 1 in pool-group " << group_key << dendl;
+                   dout(1) << Created_gws[group_key] << dendl;
+                   ceph_assert(false);
+               }
+           }
+       }
+   }
+}
+
 void NVMeofGwMap::update_active_timers( bool &propose_pending ){
 
     //dout(4) << __func__  <<  " called,  p_monitor: " << mon << dendl;
index bf46b31dc07e13177749771c92c8a2db9bc16f25..51c80111ed405e24da5da6f86b1ea7a1a3e1f51c 100755 (executable)
@@ -64,6 +64,7 @@ private:
 
     int  get_timer   (const NvmeGwId &gw_id, const NvmeGroupKey& group_key, NvmeAnaGrpId anagrpid);
     void cancel_timer(const NvmeGwId &gw_id, const NvmeGroupKey& group_key, NvmeAnaGrpId anagrpid);
+    void validate_gw_map(const NvmeGroupKey& group_key);
 
 public:
     void encode(ceph::buffer::list &bl) const {
index 893a185225503827f75e7bf5b0dce58c9b2e6729..69766f9d0548c77e0222516a5e8547bce0dfd0d9 100644 (file)
@@ -343,6 +343,7 @@ bool NVMeofGwMon::prepare_command(MonOpRequestRef op)
 
         if( map.Created_gws[group_key].size()){
             f->open_object_section("common");
+            f->dump_unsigned("epoch", map.epoch);
             f->dump_string("pool", pool);
             f->dump_string("group", group);
             f->dump_unsigned("num gws", map.Created_gws[group_key].size());
index 905d9090d1e005f78fb992a233d755b9a7ce3c1b..151e2e513a589be8209e6ce0494345aeed1d46d1 100755 (executable)
@@ -86,7 +86,7 @@ inline std::ostream& operator<<(std::ostream& os, const NqnState value) {
 }
 
 inline std::ostream& operator<<(std::ostream& os, const NvmeGwState value) {
-    os <<  "NvmeGwState { group id: " << value.group_id <<  " gw_map_epoch " <<  value.gw_map_epoch
+    os <<  "NvmeGwState { group id: " << value.group_id <<  " gw_map_epoch " <<  value.gw_map_epoch << " availablilty "<< value.availability
         << " GwSubsystems: [ ";
     for (const auto& sub: value.subsystems) os << sub.second << " ";
     os << " ] }";
@@ -240,6 +240,7 @@ inline void encode(const NvmeGwState& state,  ceph::bufferlist &bl) {
     encode(state.group_id, bl);
     encode(state.gw_map_epoch, bl);
     encode (state.subsystems, bl);
+    encode((uint32_t)state.availability, bl);
     ENCODE_FINISH(bl);
 }
 
@@ -248,6 +249,9 @@ inline  void decode(NvmeGwState& state,  ceph::bufferlist::const_iterator& bl) {
     decode(state.group_id, bl);
     decode(state.gw_map_epoch, bl);
     decode(state.subsystems, bl);
+    uint32_t avail;
+    decode(avail, bl);
+    state.availability = (GW_AVAILABILITY_E)avail;
     DECODE_FINISH(bl);
 }
 
index d66f478cf98a4607a449509d3f00943df6447d77..e78e817081527b5ab5b1527b785ea291ffb317fe 100755 (executable)
@@ -153,13 +153,14 @@ struct NvmeGwState {
     NvmeAnaGrpId              group_id;
     epoch_t                   gw_map_epoch;
     GwSubsystems              subsystems;
-
-    NvmeGwState(NvmeAnaGrpId id, epoch_t epoch):
+    GW_AVAILABILITY_E         availability;
+    NvmeGwState(NvmeAnaGrpId id, epoch_t epoch, GW_AVAILABILITY_E available):
         group_id(id),
-        gw_map_epoch(epoch)
+        gw_map_epoch(epoch),
+        availability(available)
     {};
 
-    NvmeGwState() : NvmeGwState(REDUNDANT_GW_ANA_GROUP_ID, 0) {};
+    NvmeGwState() : NvmeGwState(REDUNDANT_GW_ANA_GROUP_ID, 0, GW_AVAILABILITY_E::GW_UNAVAILABLE) {};
 };
 
 struct NvmeGwMetaData {
index ed510a9ef3166e85829f82377e29be36e2a45ff6..61a71ffb205a446386e8437b799c2c9f53866937 100644 (file)
@@ -235,12 +235,12 @@ void NVMeofGwMonitorClient::send_beacon()
 
 void NVMeofGwMonitorClient::disconnect_panic()
 {
-  auto disconnect_panic_duration = g_conf().get_val<std::chrono::seconds>("mon_nvmeofgw_beacon_grace").count();
+  auto disconnect_panic_duration = g_conf().get_val<std::chrono::seconds>("nvmeof_mon_client_disconnect_panic").count();
   auto now = std::chrono::steady_clock::now();
   auto elapsed_seconds = std::chrono::duration_cast<std::chrono::seconds>(now - last_map_time).count();
   if (elapsed_seconds > disconnect_panic_duration) {
     dout(4) << "Triggering a panic upon disconnection from the monitor, elapsed " << elapsed_seconds << ", configured disconnect panic duration " << disconnect_panic_duration << dendl;
-    throw std::runtime_error("Lost connection to the monitor (mon).");
+    throw std::runtime_error("Lost connection to the monitor (beacon timeout).");
   }
 }
 
@@ -314,10 +314,19 @@ void NVMeofGwMonitorClient::handle_nvmeof_gw_map(ceph::ref_t<MNVMeofGwMap> nmap)
     }
   }
 
-  // Make sure we do not get out of order state changes from the monitor
   if (got_old_gw_state && got_new_gw_state) {
     dout(0) << "got_old_gw_state: " << old_gw_state << "got_new_gw_state: " << new_gw_state << dendl;
+    // Make sure we do not get out of order state changes from the monitor
     ceph_assert(new_gw_state.gw_map_epoch >= old_gw_state.gw_map_epoch);
+
+    // If the monitor previously identified this gateway as accessible but now
+    // flags it as unavailable, it suggests that the gateway lost connection
+    // to the monitor.
+    if (old_gw_state.availability == GW_AVAILABILITY_E::GW_AVAILABLE &&
+       new_gw_state.availability == GW_AVAILABILITY_E::GW_UNAVAILABLE) {
+      dout(4) << "Triggering a panic upon disconnection from the monitor, gw state - unavailable" << dendl;
+      throw std::runtime_error("Lost connection to the monitor (gw map unavailable).");
+    }
   }
 
   // Gather all state changes
index 9d84b58397bb09d3bc248342a85449a7012413ae..89c72df38499b9d0c32f10abe1ae4d188bf6bd72 100644 (file)
@@ -63,7 +63,7 @@ void test_MNVMeofGwMap() {
   std::string pool = "pool1";
   std::string group = "grp1";
   std::string gw_id = "GW1";
-  NvmeGwState state(1, 32);
+  NvmeGwState state(1, 32, GW_AVAILABILITY_E::GW_UNAVAILABLE);
   std::string nqn = "nqn";
   ANA_STATE ana_state;
   NqnState nqn_state(nqn, ana_state);