From 0ba570a46e63231e303de17540c898a294daff33 Mon Sep 17 00:00:00 2001
From: Adam King <kingamk3@gmail.com>
Date: Sun, 15 Jan 2023 17:18:47 -0500
Subject: [PATCH] mgr/cephadm: fix haproxy nfs backend server ip gathering

Fixes: https://tracker.ceph.com/issues/58465

Previously, if there were 2 nfs daemons of the same
rank, we could not check the rank generation, which
is intended to mark which one is the "real" on of that
rank in cases where we cannot remove the other one due
to its host being offline. The nfs of a given rank with
the highest rank_generation is the one we want haproxy
to use for its backend IP. Since we didn't actually
check this, it was random, depending on what order we
happened to iterate over the nfs daemons of the same
rank, which IP we actually got. If the nfs with the
lower rank_generation on an offline host happened
to come later in the iterations, we'd use that one
for the IP, which is incorrect.

Signed-off-by: Adam King <adking@redhat.com>
(cherry picked from commit e0dd6cac0229e75feb85b10bb31a70e4638733cc)
---
 src/pybind/mgr/cephadm/services/ingress.py    |  40 +++++-
 src/pybind/mgr/cephadm/tests/test_services.py | 120 +++++++++++++++++-
 2 files changed, 152 insertions(+), 8 deletions(-)

diff --git a/src/pybind/mgr/cephadm/services/ingress.py b/src/pybind/mgr/cephadm/services/ingress.py
index 10f3609e6748..93f2acc4429c 100644
--- a/src/pybind/mgr/cephadm/services/ingress.py
+++ b/src/pybind/mgr/cephadm/services/ingress.py
@@ -6,8 +6,8 @@ from typing import List, Dict, Any, Tuple, cast, Optional
 
 from ceph.deployment.service_spec import IngressSpec
 from mgr_util import build_url
-from cephadm.utils import resolve_ip
-from orchestrator import OrchestratorError
+from cephadm import utils
+from orchestrator import OrchestratorError, DaemonDescription
 from cephadm.services.cephadmservice import CephadmDaemonDeploySpec, CephService
 
 logger = logging.getLogger(__name__)
@@ -87,7 +87,33 @@ class IngressService(CephService):
 
         if backend_spec.service_type == 'nfs':
             mode = 'tcp'
-            by_rank = {d.rank: d for d in daemons if d.rank is not None}
+            # we need to get the nfs daemon with the highest rank_generation for
+            # each rank we are currently deploying for the haproxy config
+            # for example if we had three (rank, rank_generation) pairs of
+            # (0, 0), (0, 1), (1, 0) we would want the nfs daemon corresponding
+            # to (0, 1) and (1, 0) because those are the two with the highest
+            # rank_generation for the existing ranks (0 and 1, with the highest
+            # rank_generation for rank 0 being 1 and highest rank_generation for
+            # rank 1 being 0)
+            ranked_daemons = [d for d in daemons if (d.rank is not None and d.rank_generation is not None)]
+            by_rank: Dict[int, DaemonDescription] = {}
+            for d in ranked_daemons:
+                # It doesn't seem like mypy can figure out that rank
+                # and rank_generation for both the daemon we're looping on
+                # and all those in by_rank cannot be None due to the filtering
+                # when creating the ranked_daemons list, which is why these
+                # seemingly unnecessary assertions are here.
+                assert d.rank is not None
+                if d.rank not in by_rank:
+                    by_rank[d.rank] = d
+                else:
+                    same_rank_nfs = by_rank[d.rank]
+                    assert d.rank_generation is not None
+                    assert same_rank_nfs.rank_generation is not None
+                    # if we have multiple of the same rank. take the one
+                    # with the highesr rank generation
+                    if d.rank_generation > same_rank_nfs.rank_generation:
+                        by_rank[d.rank] = d
             servers = []
 
             # try to establish how many ranks we *should* have
@@ -101,7 +127,7 @@ class IngressService(CephService):
                     assert d.ports
                     servers.append({
                         'name': f"{spec.backend_service}.{rank}",
-                        'ip': d.ip or resolve_ip(self.mgr.inventory.get_addr(str(d.hostname))),
+                        'ip': d.ip or utils.resolve_ip(self.mgr.inventory.get_addr(str(d.hostname))),
                         'port': d.ports[0],
                     })
                 else:
@@ -116,7 +142,7 @@ class IngressService(CephService):
             servers = [
                 {
                     'name': d.name(),
-                    'ip': d.ip or resolve_ip(self.mgr.inventory.get_addr(str(d.hostname))),
+                    'ip': d.ip or utils.resolve_ip(self.mgr.inventory.get_addr(str(d.hostname))),
                     'port': d.ports[0],
                 } for d in daemons if d.ports
             ]
@@ -271,7 +297,7 @@ class IngressService(CephService):
         # other_ips in conf file and converter to ips
         if host in hosts:
             hosts.remove(host)
-        other_ips = [resolve_ip(self.mgr.inventory.get_addr(h)) for h in hosts]
+        other_ips = [utils.resolve_ip(self.mgr.inventory.get_addr(h)) for h in hosts]
 
         keepalived_conf = self.mgr.template.render(
             'services/ingress/keepalived.conf.j2',
@@ -284,7 +310,7 @@ class IngressService(CephService):
                 'states': states,
                 'priorities': priorities,
                 'other_ips': other_ips,
-                'host_ip': resolve_ip(self.mgr.inventory.get_addr(host)),
+                'host_ip': utils.resolve_ip(self.mgr.inventory.get_addr(host)),
             }
         )
 
diff --git a/src/pybind/mgr/cephadm/tests/test_services.py b/src/pybind/mgr/cephadm/tests/test_services.py
index 646fb4fed823..8f1bc338ba1a 100644
--- a/src/pybind/mgr/cephadm/tests/test_services.py
+++ b/src/pybind/mgr/cephadm/tests/test_services.py
@@ -17,9 +17,11 @@ from cephadm.services.monitoring import GrafanaService, AlertmanagerService, Pro
 from cephadm.services.exporter import CephadmExporter
 from cephadm.module import CephadmOrchestrator
 from ceph.deployment.service_spec import IscsiServiceSpec, MonitoringSpec, AlertManagerSpec, \
-    ServiceSpec, RGWSpec, GrafanaSpec, SNMPGatewaySpec, IngressSpec, PlacementSpec
+    ServiceSpec, RGWSpec, GrafanaSpec, SNMPGatewaySpec, IngressSpec, PlacementSpec, NFSServiceSpec
 from cephadm.tests.fixtures import with_host, with_service, _run_cephadm
 
+from ceph.utils import datetime_now
+
 from orchestrator import OrchestratorError
 from orchestrator._interface import DaemonDescription
 
@@ -816,6 +818,122 @@ class TestSNMPGateway:
 
 class TestIngressService:
 
+    @patch("cephadm.inventory.Inventory.get_addr")
+    @patch("cephadm.utils.resolve_ip")
+    @patch("cephadm.inventory.HostCache.get_daemons_by_service")
+    @patch("cephadm.serve.CephadmServe._run_cephadm")
+    def test_ingress_config_nfs_multiple_nfs_same_rank(self, _run_cephadm, _get_daemons_by_service, _resolve_ip, _get_addr, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.return_value = ('{}', '', 0)
+
+        def fake_resolve_ip(hostname: str) -> str:
+            if hostname == 'host1':
+                return '192.168.122.111'
+            elif hostname == 'host2':
+                return '192.168.122.222'
+            else:
+                return 'xxx.xxx.xxx.xxx'
+        _resolve_ip.side_effect = fake_resolve_ip
+
+        def fake_get_addr(hostname: str) -> str:
+            return hostname
+        _get_addr.side_effect = fake_get_addr
+
+        nfs_service = NFSServiceSpec(service_id="foo", placement=PlacementSpec(count=1, hosts=['host1', 'host2']),
+                                     port=12049)
+
+        ispec = IngressSpec(service_type='ingress',
+                            service_id='nfs.foo',
+                            backend_service='nfs.foo',
+                            frontend_port=2049,
+                            monitor_port=9049,
+                            virtual_ip='192.168.122.100/24',
+                            monitor_user='admin',
+                            monitor_password='12345',
+                            keepalived_password='12345')
+
+        cephadm_module.spec_store._specs = {
+            'nfs.foo': nfs_service,
+            'ingress.nfs.foo': ispec
+        }
+        cephadm_module.spec_store.spec_created = {
+            'nfs.foo': datetime_now(),
+            'ingress.nfs.foo': datetime_now()
+        }
+
+        # in both test cases we'll do here, we want only the ip
+        # for the host1 nfs daemon as we'll end up giving that
+        # one higher rank_generation but the same rank as the one
+        # on host2
+        haproxy_expected_conf = {
+            'files':
+                {
+                    'haproxy.cfg':
+                        '# This file is generated by cephadm.\n'
+                        'global\n'
+                        '    log         127.0.0.1 local2\n'
+                        '    chroot      /var/lib/haproxy\n'
+                        '    pidfile     /var/lib/haproxy/haproxy.pid\n'
+                        '    maxconn     8000\n'
+                        '    daemon\n'
+                        '    stats socket /var/lib/haproxy/stats\n\n'
+                        'defaults\n'
+                        '    mode                    tcp\n'
+                        '    log                     global\n'
+                        '    timeout queue           1m\n'
+                        '    timeout connect         10s\n'
+                        '    timeout client          1m\n'
+                        '    timeout server          1m\n'
+                        '    timeout check           10s\n'
+                        '    maxconn                 8000\n\n'
+                        'frontend stats\n'
+                        '    mode http\n'
+                        '    bind 192.168.122.100:9049\n'
+                        '    bind localhost:9049\n'
+                        '    stats enable\n'
+                        '    stats uri /stats\n'
+                        '    stats refresh 10s\n'
+                        '    stats auth admin:12345\n'
+                        '    http-request use-service prometheus-exporter if { path /metrics }\n'
+                        '    monitor-uri /health\n\n'
+                        'frontend frontend\n'
+                        '    bind 192.168.122.100:2049\n'
+                        '    default_backend backend\n\n'
+                        'backend backend\n'
+                        '    mode        tcp\n'
+                        '    balance     source\n'
+                        '    hash-type   consistent\n'
+                        '    server nfs.foo.0 192.168.122.111:12049\n'
+                }
+        }
+
+        # verify we get the same cfg regardless of the order in which the nfs daemons are returned
+        # in this case both nfs are rank 0, so it should only take the one with rank_generation 1 a.k.a
+        # the one on host1
+        nfs_daemons = [
+            DaemonDescription(daemon_type='nfs', daemon_id='foo.0.1.host1.qwerty', hostname='host1', rank=0, rank_generation=1, ports=[12049]),
+            DaemonDescription(daemon_type='nfs', daemon_id='foo.0.0.host2.abcdef', hostname='host2', rank=0, rank_generation=0, ports=[12049])
+        ]
+        _get_daemons_by_service.return_value = nfs_daemons
+
+        haproxy_generated_conf = cephadm_module.cephadm_services['ingress'].haproxy_generate_config(
+            CephadmDaemonDeploySpec(host='host1', daemon_id='ingress', service_name=ispec.service_name()))
+
+        assert haproxy_generated_conf[0] == haproxy_expected_conf
+
+        # swapping order now, should still pick out the one with the higher rank_generation
+        # in this case both nfs are rank 0, so it should only take the one with rank_generation 1 a.k.a
+        # the one on host1
+        nfs_daemons = [
+            DaemonDescription(daemon_type='nfs', daemon_id='foo.0.0.host2.abcdef', hostname='host2', rank=0, rank_generation=0, ports=[12049]),
+            DaemonDescription(daemon_type='nfs', daemon_id='foo.0.1.host1.qwerty', hostname='host1', rank=0, rank_generation=1, ports=[12049])
+        ]
+        _get_daemons_by_service.return_value = nfs_daemons
+
+        haproxy_generated_conf = cephadm_module.cephadm_services['ingress'].haproxy_generate_config(
+            CephadmDaemonDeploySpec(host='host1', daemon_id='ingress', service_name=ispec.service_name()))
+
+        assert haproxy_generated_conf[0] == haproxy_expected_conf
+
     @patch("cephadm.serve.CephadmServe._run_cephadm")
     def test_ingress_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
         _run_cephadm.return_value = ('{}', '', 0)
-- 
2.47.3