From ea987a0e56db106f7c76d11f86b3e602257f365e Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 5 Mar 2020 10:42:26 -0600 Subject: [PATCH] mgr/cephadm: make osd create on an existing LV idempotent If we try to prepare an LV that was already prepared, ceph-volume will return an error message and code. We want our osd create command to be idempotent, though, so recognize the error string and continue. This is an ugly hack, but quicker than changing ceph-volume behavior, and it is sufficient to stop all of the teuthology failures. The second part of this is that we have to deploy the daemon on OSDs that are already prepared and already exist in our osdmap beforehand, but have never started. Works-around: https://tracker.ceph.com/issues/44313 Signed-off-by: Sage Weil --- src/pybind/mgr/cephadm/module.py | 32 ++++++++++++++++---- src/pybind/mgr/cephadm/tests/test_cephadm.py | 2 +- 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index 40c6ea38c3ab..534fc4e33aaf 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -1879,12 +1879,16 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule): return blink(locs) - def get_osd_uuid_map(self): + def get_osd_uuid_map(self, only_up=False): # type: () -> Dict[str,str] osd_map = self.get('osd_map') r = {} for o in osd_map['osds']: - r[str(o['osd'])] = o['uuid'] + # only include OSDs that have ever started in this map. this way + # an interrupted osd create can be repeated and succeed the second + # time around. + if not only_up or o['up_from'] > 0: + r[str(o['osd'])] = o['uuid'] return r def call_inventory(self, hosts, drive_groups): @@ -1956,7 +1960,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule): 'keyring': keyring, }) - before_osd_uuid_map = self.get_osd_uuid_map() + before_osd_uuid_map = self.get_osd_uuid_map(only_up=True) split_cmd = cmd.split(' ') _cmd = ['--config-and-keyring', '-', '--'] @@ -1964,7 +1968,18 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule): out, err, code = self._run_cephadm( host, 'osd', 'ceph-volume', _cmd, - stdin=j) + stdin=j, + error_ok=True) + if code == 1 and ', it is already prepared' in '\n'.join(err): + # HACK: when we create against an existing LV, ceph-volume + # returns an error and the above message. To make this + # command idempotent, tolerate this "error" and continue. + self.log.debug('the device was already prepared; continuing') + code = 0 + if code: + raise RuntimeError( + 'cephadm exited with an error code: %d, stderr:%s' % ( + code, '\n'.join(err))) # check result out, err, code = self._run_cephadm( @@ -1977,6 +1992,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule): osds_elems = json.loads('\n'.join(out)) fsid = self._cluster_fsid osd_uuid_map = self.get_osd_uuid_map() + created = [] for osd_id, osds in osds_elems.items(): for osd in osds: if osd['tags']['ceph.cluster_fsid'] != fsid: @@ -1995,12 +2011,16 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule): osd['tags']['ceph.osd_fsid'])) continue + created.append(osd_id) self._create_daemon( 'osd', osd_id, host, osd_uuid_map=osd_uuid_map) - self.cache.invalidate_host_devices(host) - return "Created osd(s) on host '{}'".format(host) + if created: + self.cache.invalidate_host_devices(host) + return "Created osd(s) %s on host '%s'" % (','.join(created), host) + else: + return "Created no osd(s) on host %s; already created?" % host def _calc_daemon_deps(self, daemon_type, daemon_id): need = { diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py index d4a9a669eac7..1d912d81b922 100644 --- a/src/pybind/mgr/cephadm/tests/test_cephadm.py +++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py @@ -120,7 +120,7 @@ class TestCephadm(object): with self._with_host(cephadm_module, 'test'): dg = DriveGroupSpec('test', data_devices=DeviceSelection(paths=[''])) c = cephadm_module.create_osds([dg]) - assert wait(cephadm_module, c) == ["Created osd(s) on host 'test'"] + assert wait(cephadm_module, c) == ["Created no osd(s) on host test; already created?"] @mock.patch("cephadm.module.CephadmOrchestrator._run_cephadm", _run_cephadm( json.dumps([ -- 2.47.3