]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: add replacement flag to _create_osd 34346/head
authorJoshua Schmid <jschmid@suse.de>
Thu, 2 Apr 2020 09:33:35 +0000 (11:33 +0200)
committerJoshua Schmid <jschmid@suse.de>
Wed, 15 Apr 2020 13:56:45 +0000 (15:56 +0200)
Signed-off-by: Joshua Schmid <jschmid@suse.de>
src/pybind/mgr/cephadm/module.py
src/pybind/mgr/cephadm/tests/test_cephadm.py
src/python-common/ceph/deployment/translate.py
src/python-common/ceph/tests/test_drive_group.py

index a2e5d1c9b9593a7cf2fdf38dd11b30efc3ef42ec..2adc81e6b4d56a5ef635cbfa297808de53ec86d2 100644 (file)
@@ -2084,15 +2084,18 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
         return blink(locs)
 
     def get_osd_uuid_map(self, only_up=False):
-        # type: (bool) -> Dict[str,str]
+        # type: (bool) -> Dict[str, str]
         osd_map = self.get('osd_map')
         r = {}
         for o in osd_map['osds']:
             # only include OSDs that have ever started in this map.  this way
             # an interrupted osd create can be repeated and succeed the second
             # time around.
-            if not only_up or o['up_from'] > 0:
-                r[str(o['osd'])] = o['uuid']
+            osd_id = o.get('osd')
+            if osd_id is None:
+                raise OrchestratorError("Could not retrieve osd_id from osd_map")
+            if not only_up or (o['up_from'] > 0):
+                r[str(osd_id)] = o.get('uuid', '')
         return r
 
     @trivial_completion
@@ -2126,13 +2129,17 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
     def create_osds(self, drive_group: DriveGroupSpec):
         self.log.debug(f"Processing DriveGroup {drive_group}")
         ret = []
+        drive_group.osd_id_claims = self.find_destroyed_osds()
+        self.log.info(f"Found osd claims for drivegroup {drive_group.service_id} -> {drive_group.osd_id_claims}")
         for host, drive_selection in self.prepare_drivegroup(drive_group):
             self.log.info('Applying %s on host %s...' % (drive_group.service_id, host))
-            cmd = self.driveselection_to_ceph_volume(drive_group, drive_selection)
+            cmd = self.driveselection_to_ceph_volume(drive_group, drive_selection,
+                                                     drive_group.osd_id_claims.get(host, []))
             if not cmd:
                 self.log.debug("No data_devices, skipping DriveGroup: {}".format(drive_group.service_id))
                 continue
-            ret_msg = self._create_osd(host, cmd)
+            ret_msg = self._create_osd(host, cmd,
+                                       replace_osd_ids=drive_group.osd_id_claims.get(host, []))
             ret.append(ret_msg)
         return ", ".join(ret)
 
@@ -2143,8 +2150,6 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
         host_ds_map = []
 
         # set osd_id_claims
-        drive_group.osd_id_claims = self.find_destroyed_osds()
-        self.log.info(f"Found osd claims for drivegroup {drive_group.service_id} -> {drive_group.osd_id_claims}")
 
         def _find_inv_for_host(hostname: str, inventory_dict: dict):
             # This is stupid and needs to be loaded with the host
@@ -2165,9 +2170,10 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
 
     def driveselection_to_ceph_volume(self, drive_group: DriveGroupSpec,
                                       drive_selection: DriveSelection,
+                                      osd_id_claims: Optional[List[str]] = None,
                                       preview: bool = False) -> Optional[str]:
         self.log.debug(f"Translating DriveGroup <{drive_group}> to ceph-volume command")
-        cmd: Optional[str] = translate.to_ceph_volume(drive_group, drive_selection, preview=preview).run()
+        cmd: Optional[str] = translate.to_ceph_volume(drive_group, drive_selection, osd_id_claims, preview=preview).run()
         self.log.debug(f"Resulting ceph-volume cmd: {cmd}")
         return cmd
 
@@ -2183,9 +2189,12 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
             drive_groups = []
         ret_all = []
         for drive_group in drive_groups:
+            drive_group.osd_id_claims = self.find_destroyed_osds()
+            self.log.info(f"Found osd claims for drivegroup {drive_group.service_id} -> {drive_group.osd_id_claims}")
             # prepare driveselection
             for host, ds in self.prepare_drivegroup(drive_group):
-                cmd = self.driveselection_to_ceph_volume(drive_group, ds, preview=True)
+                cmd = self.driveselection_to_ceph_volume(drive_group, ds,
+                                                         drive_group.osd_id_claims.get(host, []), preview=True)
                 if not cmd:
                     self.log.debug("No data_devices, skipping DriveGroup: {}".format(drive_group.service_name()))
                     continue
@@ -2224,7 +2233,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
             error_ok=True)
         return out, err, code
 
-    def _create_osd(self, host, cmd):
+    def _create_osd(self, host, cmd, replace_osd_ids=None):
         out, err, code = self._run_ceph_volume_command(host, cmd)
 
         if code == 1 and ', it is already prepared' in '\n'.join(err):
@@ -2256,16 +2265,16 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
                 if osd['tags']['ceph.cluster_fsid'] != fsid:
                     self.log.debug('mismatched fsid, skipping %s' % osd)
                     continue
-                if osd_id in before_osd_uuid_map:
-                    # this osd existed before we ran prepare
+                if osd_id in before_osd_uuid_map and osd_id not in replace_osd_ids:
+                    # if it exists but is part of the replacement operation, don't skip
                     continue
                 if osd_id not in osd_uuid_map:
-                    self.log.debug('osd id %d does not exist in cluster' % osd_id)
+                    self.log.debug('osd id {} does not exist in cluster'.format(osd_id))
                     continue
-                if osd_uuid_map[osd_id] != osd['tags']['ceph.osd_fsid']:
+                if osd_uuid_map.get(osd_id) != osd['tags']['ceph.osd_fsid']:
                     self.log.debug('mismatched osd uuid (cluster has %s, osd '
                                    'has %s)' % (
-                                       osd_uuid_map[osd_id],
+                                       osd_uuid_map.get(osd_id),
                                        osd['tags']['ceph.osd_fsid']))
                     continue
 
@@ -2360,7 +2369,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
             if daemon_type == 'osd':
                 if not osd_uuid_map:
                     osd_uuid_map = self.get_osd_uuid_map()
-                osd_uuid = osd_uuid_map.get(daemon_id, None)
+                osd_uuid = osd_uuid_map.get(daemon_id)
                 if not osd_uuid:
                     raise OrchestratorError('osd.%d not in osdmap' % daemon_id)
                 extra_args.extend(['--osd-fsid', osd_uuid])
index 24146c765162c3f2f6c8aae4c722c6d393d380e8..21cf5afe4f5366f615732581560f6d475eb2a21e 100644 (file)
@@ -288,7 +288,7 @@ class TestCephadm(object):
             dg = DriveGroupSpec(placement=PlacementSpec(host_pattern='test'), data_devices=DeviceSelection(paths=devices))
             ds = DriveSelection(dg, Devices([Device(path) for path in devices]))
             preview = preview
-            out = cephadm_module.driveselection_to_ceph_volume(dg, ds, preview)
+            out = cephadm_module.driveselection_to_ceph_volume(dg, ds, [], preview)
             assert out in exp_command
 
     @mock.patch("cephadm.module.SpecStore.find")
index 6f0fd1107e540fd45d648d821dd3b508f3ee6a3e..2412e86186734a4d8b84f59edc815449e5f2ea49 100644 (file)
@@ -16,14 +16,14 @@ class to_ceph_volume(object):
     def __init__(self,
                  spec,  # type: DriveGroupSpec
                  selection,  # type: DriveSelection
-                 preview=False
-                 host  # type: str
+                 osd_id_claims=None,  # type: Optional[List[str]]
+                 preview=False  # type: bool
                  ):
 
         self.spec = spec
         self.selection = selection
         self.preview = preview
-        self.host = host
+        self.osd_id_claims = osd_id_claims
 
     def run(self):
         # type: () -> Optional[str]
@@ -32,7 +32,6 @@ class to_ceph_volume(object):
         db_devices = [x.path for x in self.selection.db_devices()]
         wal_devices = [x.path for x in self.selection.wal_devices()]
         journal_devices = [x.path for x in self.selection.journal_devices()]
-        reclaimed_ids: List[str] = self.spec.osd_id_claims.get(self.host, [])
 
         if not data_devices:
             return None
@@ -60,6 +59,8 @@ class to_ceph_volume(object):
            not db_devices and \
            not wal_devices:
             cmd = "lvm prepare --bluestore --data %s --no-systemd" % (' '.join(data_devices))
+            if self.osd_id_claims:
+                cmd += " --osd-id {}".format(str(self.osd_id_claims[0]))
             if self.preview:
                 # Like every horrible hack, this has sideffects on other features.
                 # In this case, 'lvm prepare' has neither a '--report' nor a '--format json' option
@@ -90,8 +91,8 @@ class to_ceph_volume(object):
         if self.spec.osds_per_device:
             cmd += " --osds-per-device {}".format(self.spec.osds_per_device)
 
-        if reclaimed_ids:
-            cmd += " --osd-ids {}".format(" ".join(reclaimed_ids))
+        if self.osd_id_claims:
+            cmd += " --osd-ids {}".format(" ".join(self.osd_id_claims))
 
         cmd += " --yes"
         cmd += " --no-systemd"
index 7cfbf85da048de638f3f526cc501a5dea79986d2..c0734ced82e39919cd86a16bbbf6ba0fac75f353 100644 (file)
@@ -72,7 +72,7 @@ def test_ceph_volume_command_0():
                           )
     inventory = _mk_inventory(_mk_device()*2)
     sel = drive_selection.DriveSelection(spec, inventory)
-    cmd = translate.to_ceph_volume(spec, sel, 'host1').run()
+    cmd = translate.to_ceph_volume(spec, sel, []).run()
     assert cmd == 'lvm batch --no-auto /dev/sda /dev/sdb --yes --no-systemd'
 
 
@@ -83,7 +83,7 @@ def test_ceph_volume_command_1():
                           )
     inventory = _mk_inventory(_mk_device(rotational=True)*2 + _mk_device(rotational=False)*2)
     sel = drive_selection.DriveSelection(spec, inventory)
-    cmd = translate.to_ceph_volume(spec, sel, 'host1').run()
+    cmd = translate.to_ceph_volume(spec, sel, []).run()
     assert cmd == ('lvm batch --no-auto /dev/sda /dev/sdb '
                    '--db-devices /dev/sdc /dev/sdd --yes --no-systemd')
 
@@ -99,7 +99,7 @@ def test_ceph_volume_command_2():
                               _mk_device(size="10.0 GB", rotational=False)*2
                               )
     sel = drive_selection.DriveSelection(spec, inventory)
-    cmd = translate.to_ceph_volume(spec, sel, 'host1').run()
+    cmd = translate.to_ceph_volume(spec, sel, []).run()
     assert cmd == ('lvm batch --no-auto /dev/sda /dev/sdb '
                    '--db-devices /dev/sdc /dev/sdd --wal-devices /dev/sde /dev/sdf '
                    '--yes --no-systemd')
@@ -117,7 +117,7 @@ def test_ceph_volume_command_3():
                               _mk_device(size="10.0 GB", rotational=False)*2
                               )
     sel = drive_selection.DriveSelection(spec, inventory)
-    cmd = translate.to_ceph_volume(spec, sel, 'host1').run()
+    cmd = translate.to_ceph_volume(spec, sel, []).run()
     assert cmd == ('lvm batch --no-auto /dev/sda /dev/sdb '
                    '--db-devices /dev/sdc /dev/sdd '
                    '--wal-devices /dev/sde /dev/sdf --dmcrypt '
@@ -139,7 +139,7 @@ def test_ceph_volume_command_4():
                               _mk_device(size="10.0 GB", rotational=False)*2
                               )
     sel = drive_selection.DriveSelection(spec, inventory)
-    cmd = translate.to_ceph_volume(spec, sel, 'host1').run()
+    cmd = translate.to_ceph_volume(spec, sel, []).run()
     assert cmd == ('lvm batch --no-auto /dev/sda /dev/sdb '
                    '--db-devices /dev/sdc /dev/sdd --wal-devices /dev/sde /dev/sdf '
                    '--block-wal-size 500M --block-db-size 500M --dmcrypt '
@@ -153,7 +153,7 @@ def test_ceph_volume_command_5():
                           )
     inventory = _mk_inventory(_mk_device(rotational=True)*2)
     sel = drive_selection.DriveSelection(spec, inventory)
-    cmd = translate.to_ceph_volume(spec, sel, 'host1').run()
+    cmd = translate.to_ceph_volume(spec, sel, []).run()
     assert cmd == 'lvm batch --no-auto /dev/sda /dev/sdb --filestore --yes --no-systemd'
 
 
@@ -166,7 +166,7 @@ def test_ceph_volume_command_6():
                           )
     inventory = _mk_inventory(_mk_device(rotational=True)*2 + _mk_device(rotational=False)*2)
     sel = drive_selection.DriveSelection(spec, inventory)
-    cmd = translate.to_ceph_volume(spec, sel, 'host1').run()
+    cmd = translate.to_ceph_volume(spec, sel, []).run()
     assert cmd == ('lvm batch --no-auto /dev/sdc /dev/sdd '
                    '--journal-size 500M --journal-devices /dev/sda /dev/sdb '
                    '--filestore --yes --no-systemd')
@@ -179,5 +179,5 @@ def test_ceph_volume_command_7():
                           )
     inventory = _mk_inventory(_mk_device(rotational=True)*2)
     sel = drive_selection.DriveSelection(spec, inventory)
-    cmd = translate.to_ceph_volume(spec, sel, 'host1').run()
+    cmd = translate.to_ceph_volume(spec, sel, ['0', '1']).run()
     assert cmd == 'lvm batch --no-auto /dev/sda /dev/sdb --osd-ids 0 1 --yes --no-systemd'