From 1c0a0a6f6153dc555085f263f161666cd171964a Mon Sep 17 00:00:00 2001 From: Joshua Schmid Date: Thu, 30 Apr 2020 11:47:22 +0200 Subject: [PATCH] mgr/cephadm: add support for proper osd daemon tracking Signed-off-by: Joshua Schmid --- src/pybind/mgr/cephadm/module.py | 44 ++++++++++++++++--- src/pybind/mgr/cephadm/tests/test_cephadm.py | 6 +-- src/pybind/mgr/orchestrator/_interface.py | 4 ++ .../ceph/deployment/translate.py | 2 + 4 files changed, 47 insertions(+), 9 deletions(-) diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index 89af9d14b04..4bf248dfc02 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -1746,6 +1746,8 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule): sd.container_image_name = d.get('container_image_name') sd.container_image_id = d.get('container_image_id') sd.version = d.get('version') + if sd.daemon_type == 'osd': + sd.osdspec_affinity = self.get_osdspec_affinity(sd.daemon_id) if 'state' in d: sd.status_desc = d['state'] sd.status = { @@ -1815,6 +1817,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule): self._refresh_host_daemons(host) # sm = {} # type: Dict[str, orchestrator.ServiceDescription] + osd_count = 0 for h, dm in self.cache.get_daemons_with_volatile_status(): for name, dd in dm.items(): if service_type and service_type != dd.daemon_type: @@ -1823,9 +1826,12 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule): if service_name and service_name != n: continue if dd.daemon_type == 'osd': - continue # ignore OSDs for now - if dd.service_name() in self.spec_store.specs: - spec = self.spec_store.specs[dd.service_name()] + """ + OSDs do not know the affinity to their spec out of the box. + """ + n = f"osd.{dd.osdspec_affinity}" + if n in self.spec_store.specs: + spec = self.spec_store.specs[n] else: spec = ServiceSpec( unmanaged=True, @@ -1842,9 +1848,19 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule): container_image_name=dd.container_image_name, spec=spec, ) - if dd.service_name() in self.spec_store.specs: - sm[n].size = self._get_spec_size(spec) - sm[n].created = self.spec_store.spec_created[dd.service_name()] + if n in self.spec_store.specs: + if dd.daemon_type == 'osd': + """ + The osd count can't be determined by the Placement spec. + It's rather pointless to show a actual/expected representation + here. So we're setting running = size for now. + """ + osd_count += 1 + sm[n].size = osd_count + else: + sm[n].size = self._get_spec_size(spec) + + sm[n].created = self.spec_store.spec_created[n] if service_type == 'nfs': spec = cast(NFSServiceSpec, spec) sm[n].rados_config_location = spec.rados_config_location() @@ -2056,6 +2072,22 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule): def apply_drivegroups(self, specs: List[DriveGroupSpec]): return [self._apply(spec) for spec in specs] + def get_osdspec_affinity(self, osd_id: str) -> str: + ret, out, err = self.mon_command({ + 'prefix': 'osd metadata', + 'id': int(osd_id), + 'format': 'json' + }) + if ret != 0: + self.log.warning(f"Caught error on calling 'osd metadata {osd_id}' -> {err}") + return '' + try: + metadata = json.loads(out) + except json.decoder.JSONDecodeError: + self.log.error(f"Could not decode json -> {out}") + return '' + return metadata.get('osdspec_affinity', '') + def find_destroyed_osds(self) -> Dict[str, List[str]]: osd_host_map: Dict[str, List[str]] = dict() ret, out, err = self.mon_command({ diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py index a8bd0ae0f93..2d9ba86ee1d 100644 --- a/src/pybind/mgr/cephadm/tests/test_cephadm.py +++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py @@ -297,17 +297,17 @@ class TestCephadm(object): # no preview and only one disk, prepare is used due the hack that is in place. (['/dev/sda'], False, "lvm prepare --bluestore --data /dev/sda --no-systemd"), # no preview and multiple disks, uses batch - (['/dev/sda', '/dev/sdb'], False, "lvm batch --no-auto /dev/sda /dev/sdb --yes --no-systemd"), + (['/dev/sda', '/dev/sdb'], False, "CEPH_VOLUME_OSDSPEC_AFFINITY=test.spec lvm batch --no-auto /dev/sda /dev/sdb --yes --no-systemd"), # preview and only one disk needs to use batch again to generate the preview (['/dev/sda'], True, "lvm batch --no-auto /dev/sda --report --format json"), # preview and multiple disks work the same - (['/dev/sda', '/dev/sdb'], True, "lvm batch --no-auto /dev/sda /dev/sdb --yes --no-systemd --report --format json"), + (['/dev/sda', '/dev/sdb'], True, "CEPH_VOLUME_OSDSPEC_AFFINITY=test.spec lvm batch --no-auto /dev/sda /dev/sdb --yes --no-systemd --report --format json"), ] ) @mock.patch("cephadm.module.CephadmOrchestrator._run_cephadm", _run_cephadm('{}')) def test_driveselection_to_ceph_volume(self, cephadm_module, devices, preview, exp_command): with self._with_host(cephadm_module, 'test'): - dg = DriveGroupSpec(placement=PlacementSpec(host_pattern='test'), data_devices=DeviceSelection(paths=devices)) + dg = DriveGroupSpec(service_id='test.spec', placement=PlacementSpec(host_pattern='test'), data_devices=DeviceSelection(paths=devices)) ds = DriveSelection(dg, Devices([Device(path) for path in devices])) preview = preview out = cephadm_module.driveselection_to_ceph_volume(dg, ds, [], preview) diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py index d6d94930de6..8e8e806761e 100644 --- a/src/pybind/mgr/orchestrator/_interface.py +++ b/src/pybind/mgr/orchestrator/_interface.py @@ -1245,6 +1245,7 @@ class DaemonDescription(object): created=None, started=None, last_configured=None, + osdspec_affinity=None, last_deployed=None): # Host is at the same granularity as InventoryHost self.hostname = hostname @@ -1282,6 +1283,9 @@ class DaemonDescription(object): self.last_configured = last_configured # type: Optional[datetime.datetime] self.last_deployed = last_deployed # type: Optional[datetime.datetime] + # Affinity to a certain OSDSpec + self.osdspec_affinity = osdspec_affinity # type: Optional[str] + def name(self): return '%s.%s' % (self.daemon_type, self.daemon_id) diff --git a/src/python-common/ceph/deployment/translate.py b/src/python-common/ceph/deployment/translate.py index 2412e861867..1431836b2ae 100644 --- a/src/python-common/ceph/deployment/translate.py +++ b/src/python-common/ceph/deployment/translate.py @@ -101,4 +101,6 @@ class to_ceph_volume(object): cmd += " --report" cmd += " --format json" + cmd = f"CEPH_VOLUME_OSDSPEC_AFFINITY={self.spec.service_id} " + cmd + return cmd -- 2.39.5