mgr/nfs: improve cluster info implementation and fix deployment type logic

author Shubha Jain <SHUBHA.JAIN1@ibm.com>

Mon, 16 Feb 2026 14:03:01 +0000 (19:33 +0530)

committer Shubha Jain <SHUBHA.JAIN1@ibm.com>

Thu, 19 Feb 2026 17:00:55 +0000 (22:30 +0530)
author Shubha Jain <SHUBHA.JAIN1@ibm.com>
Mon, 16 Feb 2026 14:03:01 +0000 (19:33 +0530)
committer Shubha Jain <SHUBHA.JAIN1@ibm.com>
Thu, 19 Feb 2026 17:00:55 +0000 (22:30 +0530)
diff --git a/qa/tasks/cephfs/test_nfs.py b/qa/tasks/cephfs/test_nfs.py

index 44f595e7172799669d012bf613fd3f02cd4a0037..07cea069e1788e7144c76b862d624f5c248f4130 100644 (file)
--- a/qa/tasks/cephfs/test_nfs.py
+++ b/qa/tasks/cephfs/test_nfs.py
@@ -814,14 +814,18 @@ class TestNFS(MgrTestCase):
          info_output = json.loads(self._nfs_cmd('cluster', 'info', self.cluster_id))
          print(f'info {info_output}')
          info_ip = info_output[self.cluster_id].get('backend', [])[0].pop("ip")
+        # Pop placement since it may vary by test environment
+        info_placement = info_output[self.cluster_id].pop("placement", {})
          host_details = {
              self.cluster_id: {
                  'backend': [
                      {
                          "hostname": self._sys_cmd(['hostname']).decode("utf-8").strip(),
-                        "port": 2049
+                        "port": 2049,
+                        "status": "running"
                      }
                  ],
+                "deployment_type": "standalone",
                  "virtual_ip": None,
              }
          }
diff --git a/src/pybind/mgr/nfs/cluster.py b/src/pybind/mgr/nfs/cluster.py

index e791d5f050589a875d39a317189095d0ae698d77..79d41e6b7d8c6f530a55aa9525fe40b803cede1c 100644 (file)
--- a/src/pybind/mgr/nfs/cluster.py
+++ b/src/pybind/mgr/nfs/cluster.py
@@ -2,7 +2,7 @@ import ipaddress
  import logging
  import re
  import socket
-from typing import cast, Dict, List, Any, Union, Optional, TYPE_CHECKING
+from typing import cast, Dict, List, Any, Optional, TYPE_CHECKING
  
  from mgr_module import NFS_POOL_NAME as POOL_NAME
  from ceph.deployment.service_spec import NFSServiceSpec, PlacementSpec, IngressSpec
@@ -228,56 +228,132 @@ class NFSCluster:
              raise ErrorResponse.wrap(e)
  
      def _show_nfs_cluster_info(self, cluster_id: str) -> Dict[str, Any]:
+        """
+        Retrieve and format NFS cluster information including daemon status,
+        placement, and ingress configuration.
+        """
+        # Get all NFS daemons
          completion = self.mgr.list_daemons(daemon_type='nfs')
-        # Here completion.result is a list DaemonDescription objects
-        clusters = orchestrator.raise_if_exception(completion)
-        backends: List[Dict[str, Union[Any]]] = []
-
-        for cluster in clusters:
-            if cluster_id == cluster.service_id():
-                assert cluster.hostname
-                try:
-                    if cluster.ip:
-                        ip = cluster.ip
-                    else:
-                        c = self.mgr.get_hosts()
-                        orchestrator.raise_if_exception(c)
-                        hosts = [h for h in c.result or []
-                                 if h.hostname == cluster.hostname]
-                        if hosts:
-                            ip = resolve_ip(hosts[0].addr)
-                        else:
-                            # sigh
-                            ip = resolve_ip(cluster.hostname)
-                    backends.append({
-                        "hostname": cluster.hostname,
-                        "ip": ip,
-                        "port": cluster.ports[0] if cluster.ports else None
-                    })
-                except orchestrator.OrchestratorError:
-                    continue
+        all_nfs_daemons = orchestrator.raise_if_exception(completion)
  
-        r: Dict[str, Any] = {
-            'virtual_ip': None,
-            'backend': backends,
-        }
-        sc = self.mgr.describe_service(service_type='ingress')
-        services = orchestrator.raise_if_exception(sc)
-        for i in services:
-            spec = cast(IngressSpec, i.spec)
-            if spec.backend_service == f'nfs.{cluster_id}':
-                r['virtual_ip'] = i.virtual_ip.split('/')[0] if i.virtual_ip else None
-                if i.ports:
-                    r['port'] = i.ports[0]
-                    if len(i.ports) > 1:
-                        r['monitor_port'] = i.ports[1]
+        # Filter daemons for this cluster
+        cluster_daemons = [d for d in all_nfs_daemons if d.service_id() == cluster_id]
+
+        # Cache hosts data to avoid O(n) orchestrator calls
+        hosts_map = {}
+        try:
+            hosts_completion = self.mgr.get_hosts()
+            hosts = orchestrator.raise_if_exception(hosts_completion)
+            hosts_map = {h.hostname: h for h in hosts}
+        except orchestrator.OrchestratorError:
+            log.debug("Failed to get hosts for IP resolution")
+
+        # Determine ingress configuration
+        ingress_mode: Optional[IngressType] = None
+        virtual_ip: Optional[str] = None
+        ingress_port: Optional[int] = None
+        monitor_port: Optional[int] = None
+
+        sc = self.mgr.describe_service(
+            service_type='ingress',
+            service_name=f'ingress.nfs.{cluster_id}'
+        )
+        try:
+            ingress_services = orchestrator.raise_if_exception(sc)
+            if ingress_services:
+                svc = ingress_services[0]
+                spec = cast(IngressSpec, svc.spec)
+                virtual_ip = svc.virtual_ip.split('/')[0] if svc.virtual_ip else None
                  if spec.keepalive_only:
                      ingress_mode = IngressType.keepalive_only
                  elif spec.enable_haproxy_protocol:
                      ingress_mode = IngressType.haproxy_protocol
                  else:
                      ingress_mode = IngressType.haproxy_standard
-                r['ingress_mode'] = ingress_mode.value
+                if svc.ports:
+                    ingress_port = svc.ports[0]
+                    if len(svc.ports) > 1:
+                        monitor_port = svc.ports[1]
+        except orchestrator.OrchestratorError:
+            # No ingress service found for this cluster
+            log.debug(f"No ingress service found for NFS cluster {cluster_id}")
+
+        # Build backend list with daemon information
+        backends: List[Dict[str, Any]] = []
+        for daemon in cluster_daemons:
+            if not daemon.hostname:
+                continue
+
+            try:
+                # Resolve daemon IP
+                if daemon.ip:
+                    ip = daemon.ip
+                elif daemon.hostname in hosts_map:
+                    # Use cached host data
+                    ip = resolve_ip(hosts_map[daemon.hostname].addr)
+                else:
+                    # Fallback to hostname resolution
+                    ip = resolve_ip(daemon.hostname)
+
+                # Get daemon status
+                status = orchestrator.DaemonDescriptionStatus.to_str(daemon.status)
+
+                backends.append({
+                    "hostname": daemon.hostname,
+                    "ip": ip,
+                    "port": daemon.ports[0] if daemon.ports and len(daemon.ports) > 0 else None,
+                    "status": status
+                })
+            except orchestrator.OrchestratorError:
+                log.warning(
+                    "Failed to get info for NFS daemon"
+                    f" on {daemon.hostname} in cluster {cluster_id}")
+                continue
+
+        # Sort backends by hostname for consistent output
+        backends.sort(key=lambda x: x["hostname"])
+
+        # Determine deployment type based on ingress configuration and actual daemon count
+        deployment_type = "standalone"
+        placement = None
+
+        # Get NFS service spec for placement information first
+        nfs_sc = self.mgr.describe_service(
+            service_type='nfs',
+            service_name=f'nfs.{cluster_id}'
+        )
+        nfs_services = orchestrator.raise_if_exception(nfs_sc)
+        for svc in nfs_services:
+            if svc.spec.service_id == cluster_id:
+                placement = svc.spec.placement
+                break
+
+        if ingress_mode:
+            # Determine deployment type from placement spec (source of truth)
+            # Note: Using placement.count instead of len(backends) to avoid race conditions
+            if placement and placement.count and placement.count > 1:
+                deployment_type = "active-active"
+            elif len(backends) > 1:
+                # Fallback to actual daemon count if placement.count not set
+                deployment_type = "active-active"
+            else:
+                deployment_type = "active-passive"
+
+        # Build result dictionary
+        r: Dict[str, Any] = {
+            'deployment_type': deployment_type,
+            'virtual_ip': virtual_ip,
+            'backend': backends,
+            'placement': placement.to_json() if placement else {},
+        }
+
+        # Add ingress configuration to result
+        if ingress_mode:
+            r['ingress_mode'] = ingress_mode.value
+        if ingress_port is not None:
+            r['port'] = ingress_port
+        if monitor_port is not None:
+            r['monitor_port'] = monitor_port
  
          log.debug("Successfully fetched %s info: %s", cluster_id, r)
          return r
diff --git a/src/pybind/mgr/nfs/tests/test_nfs.py b/src/pybind/mgr/nfs/tests/test_nfs.py

index ab8e3c528f01eafb8130f489e06100137808b0fc..fe2687517699fed69c1da0b57f41b74211e0cc69 100644 (file)
--- a/src/pybind/mgr/nfs/tests/test_nfs.py
+++ b/src/pybind/mgr/nfs/tests/test_nfs.py
@@ -1231,7 +1231,12 @@ NFS_CORE_PARAM {
          cluster = NFSCluster(nfs_mod)
  
          out = cluster.show_nfs_cluster_info(self.cluster_id)
-        assert out == {"foo": {"virtual_ip": None, "backend": []}}
+        assert out == {"foo": {
+            "deployment_type": "standalone",
+            "virtual_ip": None,
+            "backend": [],
+            "placement": {}
+        }}
  
      def test_cluster_info(self):
          self._do_mock_test(self._do_test_cluster_info)
author	Shubha Jain <SHUBHA.JAIN1@ibm.com>
	Mon, 16 Feb 2026 14:03:01 +0000 (19:33 +0530)
committer	Shubha Jain <SHUBHA.JAIN1@ibm.com>
	Thu, 19 Feb 2026 17:00:55 +0000 (22:30 +0530)
qa/tasks/cephfs/test_nfs.py		patch \| blob \| history
src/pybind/mgr/nfs/cluster.py		patch \| blob \| history
src/pybind/mgr/nfs/tests/test_nfs.py		patch \| blob \| history