ceph-nvmeof-mon fixes

author Alexander Indenbaum <aindenba@redhat.com>

Thu, 9 May 2024 11:46:31 +0000 (11:46 +0000)

committer Alexander Indenbaum <aindenba@redhat.com>

Thu, 20 Nov 2025 08:55:27 +0000 (10:55 +0200)
author Alexander Indenbaum <aindenba@redhat.com>
Thu, 9 May 2024 11:46:31 +0000 (11:46 +0000)
committer Alexander Indenbaum <aindenba@redhat.com>
Thu, 20 Nov 2025 08:55:27 +0000 (10:55 +0200)
diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc

index 1d656574b8d22c3ec75ac640a8546190ea24064e..f4af5d0caae6d05fbe2c2891c9b82079b4b9a19d 100755 (executable)
--- a/src/mon/NVMeofGwMap.cc
+++ b/src/mon/NVMeofGwMap.cc
@@ -471,10 +471,12 @@ void NVMeofGwMap::fsm_handle_to_expired(const NvmeGwId &gw_id, const NvmeGroupKe
              auto& st = gw_state.second;
              if (st.ana_grp_id == grpid){// group owner
                  grp_owner_found = true;
-                if( ! (fbp_gw_state.last_gw_map_epoch_valid  && st.last_gw_map_epoch_valid) ){
-                   //Timer is not cancelled so it would expire over and over as long as both gws are not updated
-                   dout(1) << "gw " << gw_id  <<" or gw " << gw_state.first  << "map epochs are not updated "<< dendl;
-                   return;
+                if(st.availability == GW_AVAILABILITY_E::GW_AVAILABLE) {
+                   if( ! (fbp_gw_state.last_gw_map_epoch_valid  && st.last_gw_map_epoch_valid) ){
+                     //Timer is not cancelled so it would expire over and over as long as both gws are not updated
+                     dout(1) << "gw " << gw_id  <<" or gw " << gw_state.first  << "map epochs are not updated "<< dendl;
+                     return;
+                   }
                  }
                  cancel_timer(gw_id, group_key, grpid);
                  if (st.sm_state[grpid] == GW_STATES_PER_AGROUP_E::GW_OWNER_WAIT_FAILBACK_PREPARED && st.availability == GW_AVAILABILITY_E::GW_AVAILABLE )
diff --git a/src/pybind/mgr/cephadm/services/nvmeof.py b/src/pybind/mgr/cephadm/services/nvmeof.py

index 454f5b8c11a5b35e493a22affa8f310733496b2b..ac01bc7c4b5c178edae061145d49edc55fb59622 100644 (file)
--- a/src/pybind/mgr/cephadm/services/nvmeof.py
+++ b/src/pybind/mgr/cephadm/services/nvmeof.py
@@ -56,21 +56,36 @@ class NvmeofService(CephService):
          daemon_spec.extra_files = {'ceph-nvmeof.conf': gw_conf}
          daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
          daemon_spec.deps = []
-        # Notify monitor about this gateway creation
-        cmd = {
-            'prefix': 'nvme-gw create',
-            'id': name,
-            'group': spec.group,
-            'pool': spec.pool
-        }
-        _, _, err = self.mgr.mon_command(cmd)
-        # if send command failed, raise assertion exception, failing the daemon creation
-        assert not err, f"Unable to send monitor command {cmd}, error {err}"
          if not hasattr(self, 'gws'):
              self.gws = {} # id -> name map of gateways for this service.
          self.gws[nvmeof_gw_id] = name # add to map of service's gateway names
          return daemon_spec
  
+    def daemon_check_post(self, daemon_descrs: List[DaemonDescription]) -> None:
+        """ Overrides the daemon_check_post to add nvmeof gateways safely
+        """
+        self.mgr.log.info(f"nvmeof daemon_check_post {daemon_descrs}")
+        # Assert configured
+        assert self.pool
+        assert self.group is not None
+        for dd in daemon_descrs:
+            self.mgr.log.info(f"nvmeof daemon_descr {dd}")
+            assert dd.daemon_id in self.gws
+            name = self.gws[dd.daemon_id]
+            self.mgr.log.info(f"nvmeof daemon name={name}")
+            # Notify monitor about this gateway creation
+            cmd = {
+                'prefix': 'nvme-gw create',
+                'id': name,
+                'group': self.group,
+                'pool': self.pool
+            }
+            self.mgr.log.info(f"create gateway: monitor command {cmd}")
+            _, _, err = self.mgr.mon_command(cmd)
+            if err:
+                self.mgr.log.error(f"Unable to send monitor command {cmd}, error {err}")
+        super().daemon_check_post(daemon_descrs)
+
      def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None:
          # TODO: what integration do we need with the dashboard?
          pass
@@ -124,12 +139,28 @@ class NvmeofService(CephService):
              'group': self.group,
              'pool': self.pool
          }
+        self.mgr.log.info(f"delete gateway: monitor command {cmd}")
          _, _, err = self.mgr.mon_command(cmd)
          if err:
              self.mgr.log.error(f"Unable to send monitor command {cmd}, error {err}")
  
      def purge(self, service_name: str) -> None:
-        """Removes configuration
+        """Make sure no zombie gateway is left behind
          """
-        #  TODO: what should we purge in this case (if any)?
-        pass
+        # Assert configured
+        assert self.pool
+        assert self.group is not None
+        for daemon_id in self.gws:
+            name = self.gws[daemon_id]
+            self.gws.pop(daemon_id)
+            # Notify monitor about this gateway deletion
+            cmd = {
+                'prefix': 'nvme-gw delete',
+                'id': name,
+                'group': self.group,
+                'pool': self.pool
+            }
+            self.mgr.log.info(f"purge delete gateway: monitor command {cmd}")
+            _, _, err = self.mgr.mon_command(cmd)
+            if err:
+                self.mgr.log.error(f"Unable to send monitor command {cmd}, error {err}")
author	Alexander Indenbaum <aindenba@redhat.com>
	Thu, 9 May 2024 11:46:31 +0000 (11:46 +0000)
committer	Alexander Indenbaum <aindenba@redhat.com>
	Thu, 20 Nov 2025 08:55:27 +0000 (10:55 +0200)
src/mon/NVMeofGwMap.cc		patch \| blob \| history
src/pybind/mgr/cephadm/services/nvmeof.py		patch \| blob \| history