]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mon/NVMeofGw*: 59667/head
authorLeonid Chernin <leonidc@il.ibm.com>
Sun, 8 Sep 2024 11:48:36 +0000 (11:48 +0000)
committerLeonid Chernin <leonidc@il.ibm.com>
Mon, 16 Sep 2024 13:38:23 +0000 (13:38 +0000)
 fix issue that GW was down when last subsystem  was deleted

Signed-off-by: Leonid Chernin <leonidc@il.ibm.com>
src/mon/NVMeofGwMap.cc
src/mon/NVMeofGwMap.h
src/mon/NVMeofGwMon.cc
src/mon/NVMeofGwMon.h

index 7d886344244ee6a922caa8c47b92881582b83414..d60d3edefd2d59f07d80f298e12d751d9d6eb206 100755 (executable)
@@ -247,6 +247,30 @@ void NVMeofGwMap::track_deleting_gws(const NvmeGroupKey& group_key,
   }
 }
 
+int NVMeofGwMap::process_gw_map_gw_no_subsystems(
+  const NvmeGwId &gw_id, const NvmeGroupKey& group_key, bool &propose_pending)
+{
+  int rc = 0;
+  auto& gws_states = created_gws[group_key];
+  auto  gw_state = gws_states.find(gw_id);
+  if (gw_state != gws_states.end()) {
+    dout(10) << "GW- no subsystems configured " << gw_id << dendl;
+    auto& st = gw_state->second;
+    st.availability = gw_availability_t::GW_CREATED;
+    for (auto& state_itr: created_gws[group_key][gw_id].sm_state) {
+      fsm_handle_gw_no_subsystems(
+    gw_id, group_key, state_itr.second,state_itr.first, propose_pending);
+    }
+    propose_pending = true; // map should reflect that gw becames Created
+    if (propose_pending) validate_gw_map(group_key);
+  } else {
+    dout(1)  << __FUNCTION__ << "ERROR GW-id was not found in the map "
+         << gw_id << dendl;
+    rc = -EINVAL;
+  }
+  return rc;
+}
+
 int NVMeofGwMap::process_gw_map_gw_down(
   const NvmeGwId &gw_id, const NvmeGroupKey& group_key, bool &propose_pending)
 {
@@ -263,7 +287,7 @@ int NVMeofGwMap::process_gw_map_gw_down(
        state_itr.first, propose_pending);
       state_itr.second = gw_states_per_group_t::GW_STANDBY_STATE;
     }
-    propose_pending = true; // map should reflect that gw becames unavailable
+    propose_pending = true; // map should reflect that gw becames Unavailable
     if (propose_pending) validate_gw_map(group_key);
   } else {
     dout(1)  << __FUNCTION__ << "ERROR GW-id was not found in the map "
@@ -615,6 +639,59 @@ void NVMeofGwMap::fsm_handle_gw_alive(
   }
 }
 
+void NVMeofGwMap::fsm_handle_gw_no_subsystems(
+    const NvmeGwId &gw_id, const NvmeGroupKey& group_key,
+    gw_states_per_group_t state, NvmeAnaGrpId grpid,  bool &map_modified)
+{
+  switch (state) {
+  case gw_states_per_group_t::GW_STANDBY_STATE:
+  case gw_states_per_group_t::GW_IDLE_STATE:
+    // nothing to do
+    break;
+
+  case gw_states_per_group_t::GW_WAIT_BLOCKLIST_CMPL:
+  {
+    cancel_timer(gw_id, group_key, grpid);
+    auto& gw_st = created_gws[group_key][gw_id];
+    gw_st.standby_state(grpid);
+    map_modified = true;
+  }
+  break;
+
+  case gw_states_per_group_t::GW_WAIT_FAILBACK_PREPARED:
+    cancel_timer(gw_id, group_key,  grpid);
+    map_modified = true;
+    for (auto& gw_st: created_gws[group_key]) {
+      auto& st = gw_st.second;
+      // found GW   that was intended for  Failback for this ana grp
+      if (st.sm_state[grpid] ==
+      gw_states_per_group_t::GW_OWNER_WAIT_FAILBACK_PREPARED) {
+    dout(4) << "Warning: Outgoing Failback when GW is without subsystems"
+        << " - to rollback it" <<" GW " << gw_id << "for ANA Group "
+        << grpid << dendl;
+    st.standby_state(grpid);
+    break;
+      }
+    }
+    break;
+
+  case gw_states_per_group_t::GW_OWNER_WAIT_FAILBACK_PREPARED:
+  case gw_states_per_group_t::GW_ACTIVE_STATE:
+  {
+    dout(4) << "Set state to Standby for GW " << gw_id << " group "
+        << grpid << dendl;
+    auto& gw_st = created_gws[group_key][gw_id];
+    gw_st.standby_state(grpid);
+  }
+  break;
+
+  default:
+  {
+    dout(4) << "Error : Invalid state " << state << "for GW " << gw_id  << dendl;
+  }
+  }
+}
+
 void NVMeofGwMap::fsm_handle_gw_down(
   const NvmeGwId &gw_id, const NvmeGroupKey& group_key,
   gw_states_per_group_t state, NvmeAnaGrpId grpid,  bool &map_modified)
index 4c9d796641018974328eda88645ab1d5a4f280f5..29710371742185d4b432f42c6f93ba73af41bbba 100755 (executable)
@@ -54,6 +54,9 @@ public:
   int process_gw_map_gw_down(
     const NvmeGwId &gw_id, const NvmeGroupKey& group_key,
     bool &propose_pending);
+  int process_gw_map_gw_no_subsystems(
+    const NvmeGwId &gw_id, const NvmeGroupKey& group_key,
+    bool &propose_pending);
   void update_active_timers(bool &propose_pending);
   void handle_abandoned_ana_groups(bool &propose_pending);
   void handle_removed_subsystems(
@@ -77,6 +80,9 @@ private:
   void fsm_handle_gw_down(
     const NvmeGwId &gw_id, const NvmeGroupKey& group_key,
     gw_states_per_group_t state, NvmeAnaGrpId grpid,  bool &map_modified);
+  void fsm_handle_gw_no_subsystems(
+     const NvmeGwId &gw_id, const NvmeGroupKey& group_key,
+     gw_states_per_group_t state, NvmeAnaGrpId grpid,  bool &map_modified);
   void fsm_handle_gw_delete(
     const NvmeGwId &gw_id, const NvmeGroupKey& group_key,
     gw_states_per_group_t state, NvmeAnaGrpId grpid,  bool &map_modified);
index b6faeb2e97ce5ee8e22e7db6f9c3a916eb82a037..544ad6747229526434818d6cd665a29e43060af6 100644 (file)
@@ -432,7 +432,8 @@ bool NVMeofGwMon::prepare_command(MonOpRequestRef op)
       if (rc == 0) {
         bool propose = false;
         // Simulate  immediate Failover of this GW
-        process_gw_down(id, group_key, propose);
+        process_gw_down(id, group_key, propose,
+           gw_availability_t::GW_UNAVAILABLE);
       } else if (rc == -EINVAL) {
        dout (4) << "Error: GW not found in the database " << id << " "
                 << pool << " " << group << "  rc " << rc << dendl;
@@ -462,13 +463,19 @@ bool NVMeofGwMon::prepare_command(MonOpRequestRef op)
 }
 
 void NVMeofGwMon::process_gw_down(const NvmeGwId &gw_id,
-   const NvmeGroupKey& group_key, bool &propose_pending)
+   const NvmeGroupKey& group_key, bool &propose_pending,
+   gw_availability_t avail)
 {
   LastBeacon lb = {gw_id, group_key};
   auto it = last_beacon.find(lb);
   if (it != last_beacon.end()) {
     last_beacon.erase(it);
-    pending_map.process_gw_map_gw_down(gw_id, group_key, propose_pending);
+    if (avail == gw_availability_t::GW_UNAVAILABLE) {
+      pending_map.process_gw_map_gw_down(gw_id, group_key, propose_pending);
+    } else {
+      pending_map.process_gw_map_gw_no_subsystems(gw_id, group_key, propose_pending);
+    }
+
   }
 }
 
@@ -581,7 +588,7 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op)
   }
 
   if (sub.size() == 0) {
-    avail = gw_availability_t::GW_UNAVAILABLE;
+    avail = gw_availability_t::GW_CREATED;
   }
   if (pending_map.created_gws[group_key][gw_id].subsystems != sub) {
     dout(10) << "subsystems of GW changed, propose pending " << gw_id << dendl;
@@ -607,8 +614,9 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op)
     epoch_t last_osd_epoch = m->get_last_osd_epoch();
     pending_map.process_gw_map_ka(gw_id, group_key, last_osd_epoch, propose);
   // state set by GW client application
-  } else if (avail == gw_availability_t::GW_UNAVAILABLE) {
-      process_gw_down(gw_id, group_key, propose);
+  } else if (avail == gw_availability_t::GW_UNAVAILABLE ||
+      avail == gw_availability_t::GW_CREATED) {
+      process_gw_down(gw_id, group_key, propose, avail);
   }
   // Periodic: check active FSM timers
   pending_map.update_active_timers(timer_propose);
index f132c87d92af70448a81ae580072c1fd66e42207..7fae8b766a5e7fe46d0bd6f4b73eb2f80bf0ff59 100644 (file)
@@ -85,7 +85,8 @@ public:
 private:
   void synchronize_last_beacon();
   void process_gw_down(const NvmeGwId &gw_id,
-     const NvmeGroupKey& group_key, bool &propose_pending);
+     const NvmeGroupKey& group_key, bool &propose_pending,
+     gw_availability_t avail);
 };
 
 #endif /* MON_NVMEGWMONITOR_H_ */