From 0fbf12c9d17a98bf4315b0bff81c0aeb38458e0f Mon Sep 17 00:00:00 2001 From: Adam King Date: Wed, 5 Aug 2020 17:04:46 -0400 Subject: [PATCH] mgr/cephadm: mgr or mds scale-down should prefer non-active daemons When removing daemons during a mgr/mds scale-down, prefer to remove standby daemons so the active daemon is not killed Fixes: https://tracker.ceph.com/issues/44252 Signed-off-by: Adam King --- src/pybind/mgr/cephadm/module.py | 6 + src/pybind/mgr/cephadm/schedule.py | 28 +++- .../mgr/cephadm/services/cephadmservice.py | 27 +++- src/pybind/mgr/cephadm/tests/test_cephadm.py | 3 +- .../mgr/cephadm/tests/test_scheduling.py | 153 ++++++++++++++++++ src/pybind/mgr/cephadm/tests/test_spec.py | 30 ++-- src/pybind/mgr/orchestrator/_interface.py | 6 +- .../orchestrator/tests/test_orchestrator.py | 1 + 8 files changed, 238 insertions(+), 16 deletions(-) diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index cbbdb5d819aad..07e6c91c3c487 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -2135,6 +2135,12 @@ you may want to run: # These daemon types require additional configs after creation if dd.daemon_type in ['grafana', 'iscsi', 'prometheus', 'alertmanager', 'nfs']: daemons_post[dd.daemon_type].append(dd) + + if self.cephadm_services[dd.daemon_type].get_active_daemon( + self.cache.get_daemons_by_service(dd.service_name())).daemon_id == dd.daemon_id: + dd.is_active = True + else: + dd.is_active = False deps = self._calc_daemon_deps(dd.daemon_type, dd.daemon_id) last_deps, last_config = self.cache.get_daemon_last_config_deps( diff --git a/src/pybind/mgr/cephadm/schedule.py b/src/pybind/mgr/cephadm/schedule.py index 81f6a5e029fda..17fbe6bcb2476 100644 --- a/src/pybind/mgr/cephadm/schedule.py +++ b/src/pybind/mgr/cephadm/schedule.py @@ -130,8 +130,7 @@ class HostAssignment(object): # we don't need any additional hosts if need < 0: - # ask the scheduler to return a set of hosts with a up to the value of - return self.scheduler.place(hosts_with_daemons, count) + return self.prefer_hosts_with_active_daemons(hosts_with_daemons, count) else: # exclusive to 'mon' daemons. Filter out hosts that don't have a public network assigned if self.filter_new_host: @@ -146,6 +145,30 @@ class HostAssignment(object): # if a host already has the anticipated daemon, merge it with the candidates # to get a list of HostPlacementSpec that can be deployed on. return list(merge_hostspecs(hosts_with_daemons, others)) + + def get_hosts_with_active_daemon(self, hosts: List[HostPlacementSpec]) -> List[HostPlacementSpec]: + active_hosts: List['HostPlacementSpec'] = [] + for daemon in self.daemons: + if daemon.is_active: + for h in hosts: + if h.hostname == daemon.hostname: + active_hosts.append(h) + # remove duplicates before returning + return list(dict.fromkeys(active_hosts)) + + def prefer_hosts_with_active_daemons(self, hosts: List[HostPlacementSpec], count) -> List[HostPlacementSpec]: + # try to prefer host with active daemon if possible + active_hosts = self.get_hosts_with_active_daemon(hosts) + if len(active_hosts) != 0 and count > 0: + for host in active_hosts: + hosts.remove(host) + if len(active_hosts) >= count: + return self.scheduler.place(active_hosts, count) + else: + return list(merge_hostspecs(self.scheduler.place(active_hosts, count), + self.scheduler.place(hosts, count - len(active_hosts)))) + # ask the scheduler to return a set of hosts with a up to the value of + return self.scheduler.place(hosts, count) def add_daemon_hosts(self, host_pool: List[HostPlacementSpec]) -> Set[HostPlacementSpec]: hosts_with_daemons = {d.hostname for d in self.daemons} @@ -227,4 +250,3 @@ def difference_hostspecs(l: List[HostPlacementSpec], r: List[HostPlacementSpec]) r_names = {h.hostname for h in r} return [h for h in l if h.hostname not in r_names] - diff --git a/src/pybind/mgr/cephadm/services/cephadmservice.py b/src/pybind/mgr/cephadm/services/cephadmservice.py index e513e809f96c0..ec4cba0bbb323 100644 --- a/src/pybind/mgr/cephadm/services/cephadmservice.py +++ b/src/pybind/mgr/cephadm/services/cephadmservice.py @@ -110,7 +110,9 @@ class CephadmService(metaclass=ABCMeta): raise NotImplementedError() def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription: - raise NotImplementedError() + # if this is called for a service type where it hasn't explcitly been + # defined, return empty Daemon Desc + return DaemonDescription() def _inventory_get_addr(self, hostname: str) -> str: """Get a host's address with its hostname.""" @@ -324,6 +326,14 @@ class MgrService(CephadmService): daemon_spec.keyring = keyring return self.mgr._create_daemon(daemon_spec) + + def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription: + active_mgr_str = self.mgr.get('mgr_map')['active_name'] + for daemon in daemon_descrs: + if daemon.daemon_id == active_mgr_str: + return daemon + # if no active mgr found, return empty Daemon Desc + return DaemonDescription() class MdsService(CephadmService): @@ -356,6 +366,21 @@ class MdsService(CephadmService): daemon_spec.keyring = keyring return self.mgr._create_daemon(daemon_spec) + + def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription: + active_mds_strs = list() + for fs in self.mgr.get('fs_map')['filesystems']: + mds_map = fs['mdsmap'] + if mds_map is not None: + for mds_id, mds_status in mds_map['info'].items(): + if mds_status['state'] == 'up:active': + active_mds_strs.append(mds_status['name']) + if len(active_mds_strs) != 0: + for daemon in daemon_descrs: + if daemon.daemon_id in active_mds_strs: + return daemon + # if no mds found, return empty Daemon Desc + return DaemonDescription() class RgwService(CephadmService): diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py index 1e9633173a6a2..d1072d4f250b5 100644 --- a/src/pybind/mgr/cephadm/tests/test_cephadm.py +++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py @@ -114,7 +114,8 @@ class TestCephadm(object): 'daemon_type': 'mds', 'hostname': 'test', 'status': 1, - 'status_desc': 'starting'} + 'status_desc': 'starting', + 'is_active': False} ] with with_service(cephadm_module, ServiceSpec('rgw', 'r.z'), CephadmOrchestrator.apply_rgw, 'test'): diff --git a/src/pybind/mgr/cephadm/tests/test_scheduling.py b/src/pybind/mgr/cephadm/tests/test_scheduling.py index 806625934bf60..76807f8fccad7 100644 --- a/src/pybind/mgr/cephadm/tests/test_scheduling.py +++ b/src/pybind/mgr/cephadm/tests/test_scheduling.py @@ -625,3 +625,156 @@ def test_bad_specs(service_type, placement, hosts, daemons, expected): get_hosts_func=get_hosts_func, get_daemons_func=lambda _: daemons).place() assert str(e.value) == expected + +class ActiveAssignmentTest(NamedTuple): + service_type: str + placement: PlacementSpec + hosts: List[str] + daemons: List[DaemonDescription] + expected: List[List[str]] + + +@pytest.mark.parametrize("service_type,placement,hosts,daemons,expected", + [ + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=2), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1', is_active=True), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3'), + ], + [['host1', 'host2'], ['host1', 'host3']] + ), + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=2), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3', is_active=True), + ], + [['host1', 'host3'], ['host2', 'host3']] + ), + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=1), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2', is_active=True), + DaemonDescription('mgr', 'c', 'host3'), + ], + [['host2']] + ), + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=1), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3', is_active=True), + ], + [['host3']] + ), + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=1), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1', is_active=True), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3', is_active=True), + ], + [['host1'], ['host3']] + ), + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=2), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2', is_active=True), + DaemonDescription('mgr', 'c', 'host3', is_active=True), + ], + [['host2', 'host3']] + ), + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=1), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1', is_active=True), + DaemonDescription('mgr', 'b', 'host2', is_active=True), + DaemonDescription('mgr', 'c', 'host3', is_active=True), + ], + [['host1'], ['host2'], ['host3']] + ), + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=1), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1', is_active=True), + DaemonDescription('mgr', 'a2', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3'), + ], + [['host1']] + ), + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=1), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1', is_active=True), + DaemonDescription('mgr', 'a2', 'host1', is_active=True), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3'), + ], + [['host1']] + ), + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=2), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1', is_active=True), + DaemonDescription('mgr', 'a2', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3', is_active=True), + ], + [['host1', 'host3']] + ), + # Explicit placement should override preference for active daemon + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=1, hosts=['host1']), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3', is_active=True), + ], + [['host1']] + ), + + ]) +def test_active_assignment(service_type, placement, hosts, daemons, expected): + def get_hosts_func(label=None, as_hostspec=False): + if as_hostspec: + return [HostSpec(h) for h in hosts] + return hosts + + spec = ServiceSpec(service_type=service_type, + service_id=None, + placement=placement) + + hosts = HostAssignment( + spec=spec, + get_hosts_func=get_hosts_func, + get_daemons_func=lambda _: daemons).place() + assert sorted([h.hostname for h in hosts]) in expected diff --git a/src/pybind/mgr/cephadm/tests/test_spec.py b/src/pybind/mgr/cephadm/tests/test_spec.py index 41b33ac9f185c..2fa190a814fb9 100644 --- a/src/pybind/mgr/cephadm/tests/test_spec.py +++ b/src/pybind/mgr/cephadm/tests/test_spec.py @@ -122,7 +122,8 @@ def test_spec_octopus(spec_json): "status_desc": "running", "last_refresh": "2020-04-03T15:31:48.725856", "created": "2020-04-02T19:23:08.829543", - "started": "2020-04-03T07:29:16.932838" + "started": "2020-04-03T07:29:16.932838", + "is_active": false }, { "hostname": "ceph-001", @@ -136,7 +137,8 @@ def test_spec_octopus(spec_json): "status_desc": "running", "last_refresh": "2020-04-03T15:31:48.725903", "created": "2020-04-02T19:23:11.390694", - "started": "2020-04-03T07:29:16.910897" + "started": "2020-04-03T07:29:16.910897", + "is_active": false }, { "hostname": "ceph-001", @@ -150,7 +152,8 @@ def test_spec_octopus(spec_json): "status_desc": "running", "last_refresh": "2020-04-03T15:31:48.725950", "created": "2020-04-02T19:23:52.025088", - "started": "2020-04-03T07:29:16.847972" + "started": "2020-04-03T07:29:16.847972", + "is_active": false }, { "hostname": "ceph-001", @@ -164,7 +167,8 @@ def test_spec_octopus(spec_json): "status_desc": "running", "last_refresh": "2020-04-03T15:31:48.725807", "created": "2020-04-02T19:22:18.648584", - "started": "2020-04-03T07:29:16.856153" + "started": "2020-04-03T07:29:16.856153", + "is_active": false }, { "hostname": "ceph-001", @@ -178,7 +182,8 @@ def test_spec_octopus(spec_json): "status_desc": "running", "last_refresh": "2020-04-03T15:31:48.725715", "created": "2020-04-02T19:22:13.863300", - "started": "2020-04-03T07:29:17.206024" + "started": "2020-04-03T07:29:17.206024", + "is_active": false }, { "hostname": "ceph-001", @@ -192,7 +197,8 @@ def test_spec_octopus(spec_json): "status_desc": "running", "last_refresh": "2020-04-03T15:31:48.725996", "created": "2020-04-02T19:23:53.880197", - "started": "2020-04-03T07:29:16.880044" + "started": "2020-04-03T07:29:16.880044", + "is_active": false }, { "hostname": "ceph-001", @@ -206,7 +212,8 @@ def test_spec_octopus(spec_json): "status_desc": "running", "last_refresh": "2020-04-03T15:31:48.726088", "created": "2020-04-02T20:35:02.991435", - "started": "2020-04-03T07:29:19.373956" + "started": "2020-04-03T07:29:19.373956", + "is_active": false }, { "hostname": "ceph-001", @@ -220,7 +227,8 @@ def test_spec_octopus(spec_json): "status_desc": "running", "last_refresh": "2020-04-03T15:31:48.726134", "created": "2020-04-02T20:35:17.142272", - "started": "2020-04-03T07:29:19.374002" + "started": "2020-04-03T07:29:19.374002", + "is_active": false }, { "hostname": "ceph-001", @@ -234,14 +242,16 @@ def test_spec_octopus(spec_json): "status_desc": "running", "last_refresh": "2020-04-03T15:31:48.726042", "created": "2020-04-02T19:24:10.281163", - "started": "2020-04-03T07:29:16.926292" + "started": "2020-04-03T07:29:16.926292", + "is_active": false }, { "hostname": "ceph-001", "daemon_id": "default-rgw-realm.eu-central-1.1.ceph-001.ytywjo", "daemon_type": "rgw", "status": 1, - "status_desc": "starting" + "status_desc": "starting", + "is_active": false } ]""") ) diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py index 5af3503fef480..55c512aee4499 100644 --- a/src/pybind/mgr/orchestrator/_interface.py +++ b/src/pybind/mgr/orchestrator/_interface.py @@ -1200,7 +1200,8 @@ class DaemonDescription(object): last_configured=None, osdspec_affinity=None, last_deployed=None, - events: Optional[List['OrchestratorEvent']] = None): + events: Optional[List['OrchestratorEvent']] = None, + is_active: bool=False): # Host is at the same granularity as InventoryHost self.hostname: str = hostname @@ -1242,6 +1243,8 @@ class DaemonDescription(object): self.osdspec_affinity: Optional[str] = osdspec_affinity self.events: List[OrchestratorEvent] = events or [] + + self.is_active = is_active def name(self): return '%s.%s' % (self.daemon_type, self.daemon_id) @@ -1318,6 +1321,7 @@ class DaemonDescription(object): out['status_desc'] = self.status_desc if self.daemon_type == 'osd': out['osdspec_affinity'] = self.osdspec_affinity + out['is_active'] = self.is_active for k in ['last_refresh', 'created', 'started', 'last_deployed', 'last_configured']: diff --git a/src/pybind/mgr/orchestrator/tests/test_orchestrator.py b/src/pybind/mgr/orchestrator/tests/test_orchestrator.py index cc1e6e401fc28..b5b45a2caa36c 100644 --- a/src/pybind/mgr/orchestrator/tests/test_orchestrator.py +++ b/src/pybind/mgr/orchestrator/tests/test_orchestrator.py @@ -251,6 +251,7 @@ daemon_id: ubuntu hostname: ubuntu status: 1 status_desc: starting +is_active: false events: - 2020-06-10T10:08:22.933241 daemon:crash.ubuntu [INFO] "Deployed crash.ubuntu on host 'ubuntu'" -- 2.39.5