]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
mgr/cephadm: don't say migration in progress if migration current > migration last
authorAdam King <adking@redhat.com>
Thu, 22 Sep 2022 18:17:23 +0000 (14:17 -0400)
committerAdam King <adking@redhat.com>
Tue, 27 Sep 2022 22:23:18 +0000 (18:23 -0400)
We risk ending up in an endless busy loop accomplishing
nothing otherwise.

Fixes: https://tracker.ceph.com/issues/57651
Signed-off-by: Adam King <adking@redhat.com>
src/pybind/mgr/cephadm/migrations.py
src/pybind/mgr/cephadm/tests/test_migration.py

index 69f39cb9107703eb286b7ce0ee7085ace3bde420..a3a35a900e9069fc3d7701db79770399bcac6e30 100644 (file)
@@ -31,8 +31,7 @@ class Migrations:
         # We have the cache, the inventory, the config store, the upgrade (imagine changing the
         # upgrade code, while an old upgrade is still in progress), naming of daemons,
         # fs-layout of the daemons, etc.
-        if self.mgr.migration_current is None:
-            self.set(LAST_MIGRATION)
+        self.set_sane_migration_current()
 
         v = mgr.get_store('nfs_migration_queue')
         self.nfs_migration_queue = json.loads(v) if v else []
@@ -46,8 +45,29 @@ class Migrations:
         self.mgr.set_module_option('migration_current', val)
         self.mgr.migration_current = val
 
+    def set_sane_migration_current(self) -> None:
+        # migration current should always be an integer
+        # between 0 and LAST_MIGRATION (inclusive) in order to
+        # actually carry out migration. If we find
+        # it is None or too high of a value here we should
+        # set it to some sane value
+        mc: Optional[int] = self.mgr.migration_current
+        if mc is None:
+            logger.info('Found migration_current of "None". Setting to last migration.')
+            self.set(LAST_MIGRATION)
+            return
+
+        if mc > LAST_MIGRATION:
+            logger.error(f'Found migration_current of {mc} when max should be {LAST_MIGRATION}. Setting back to 0.')
+            # something has gone wrong and caused migration_current
+            # to be higher than it should be able to be. Best option
+            # we have here is to just set it back to 0
+            self.set(0)
+
     def is_migration_ongoing(self) -> bool:
-        return self.mgr.migration_current != LAST_MIGRATION
+        self.set_sane_migration_current()
+        mc: Optional[int] = self.mgr.migration_current
+        return mc is None or mc < LAST_MIGRATION
 
     def verify_no_migration(self) -> None:
         if self.is_migration_ongoing():
index 1c73897cb852734e3080ac31dcd1540b99c98d73..93481553ab5fdb1a82a3034a12ed8e3e911086ca 100644 (file)
@@ -228,3 +228,32 @@ def test_migrate_admin_client_keyring(cephadm_module: CephadmOrchestrator):
     assert cephadm_module.migration_current == LAST_MIGRATION
 
     assert cephadm_module.keys.keys['client.admin'].placement.label == '_admin'
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+def test_migrate_set_sane_value(cephadm_module: CephadmOrchestrator):
+    cephadm_module.migration_current = 0
+    cephadm_module.migration.set_sane_migration_current()
+    assert cephadm_module.migration_current == 0
+
+    cephadm_module.migration_current = LAST_MIGRATION
+    cephadm_module.migration.set_sane_migration_current()
+    assert cephadm_module.migration_current == LAST_MIGRATION
+
+    cephadm_module.migration_current = None
+    cephadm_module.migration.set_sane_migration_current()
+    assert cephadm_module.migration_current == LAST_MIGRATION
+
+    cephadm_module.migration_current = LAST_MIGRATION + 1
+    cephadm_module.migration.set_sane_migration_current()
+    assert cephadm_module.migration_current == 0
+
+    cephadm_module.migration_current = None
+    ongoing = cephadm_module.migration.is_migration_ongoing()
+    assert not ongoing
+    assert cephadm_module.migration_current == LAST_MIGRATION
+
+    cephadm_module.migration_current = LAST_MIGRATION + 1
+    ongoing = cephadm_module.migration.is_migration_ongoing()
+    assert ongoing
+    assert cephadm_module.migration_current == 0