From 63068800ecd446316ab903cf02da2a243f361b58 Mon Sep 17 00:00:00 2001 From: Adam King Date: Thu, 22 Sep 2022 14:17:23 -0400 Subject: [PATCH] mgr/cephadm: don't say migration in progress if migration current > migration last We risk ending up in an endless busy loop accomplishing nothing otherwise. Fixes: https://tracker.ceph.com/issues/57651 Signed-off-by: Adam King --- src/pybind/mgr/cephadm/migrations.py | 26 +++++++++++++++-- .../mgr/cephadm/tests/test_migration.py | 29 +++++++++++++++++++ 2 files changed, 52 insertions(+), 3 deletions(-) diff --git a/src/pybind/mgr/cephadm/migrations.py b/src/pybind/mgr/cephadm/migrations.py index 69f39cb9107..a3a35a900e9 100644 --- a/src/pybind/mgr/cephadm/migrations.py +++ b/src/pybind/mgr/cephadm/migrations.py @@ -31,8 +31,7 @@ class Migrations: # We have the cache, the inventory, the config store, the upgrade (imagine changing the # upgrade code, while an old upgrade is still in progress), naming of daemons, # fs-layout of the daemons, etc. - if self.mgr.migration_current is None: - self.set(LAST_MIGRATION) + self.set_sane_migration_current() v = mgr.get_store('nfs_migration_queue') self.nfs_migration_queue = json.loads(v) if v else [] @@ -46,8 +45,29 @@ class Migrations: self.mgr.set_module_option('migration_current', val) self.mgr.migration_current = val + def set_sane_migration_current(self) -> None: + # migration current should always be an integer + # between 0 and LAST_MIGRATION (inclusive) in order to + # actually carry out migration. If we find + # it is None or too high of a value here we should + # set it to some sane value + mc: Optional[int] = self.mgr.migration_current + if mc is None: + logger.info('Found migration_current of "None". Setting to last migration.') + self.set(LAST_MIGRATION) + return + + if mc > LAST_MIGRATION: + logger.error(f'Found migration_current of {mc} when max should be {LAST_MIGRATION}. Setting back to 0.') + # something has gone wrong and caused migration_current + # to be higher than it should be able to be. Best option + # we have here is to just set it back to 0 + self.set(0) + def is_migration_ongoing(self) -> bool: - return self.mgr.migration_current != LAST_MIGRATION + self.set_sane_migration_current() + mc: Optional[int] = self.mgr.migration_current + return mc is None or mc < LAST_MIGRATION def verify_no_migration(self) -> None: if self.is_migration_ongoing(): diff --git a/src/pybind/mgr/cephadm/tests/test_migration.py b/src/pybind/mgr/cephadm/tests/test_migration.py index 1c73897cb85..93481553ab5 100644 --- a/src/pybind/mgr/cephadm/tests/test_migration.py +++ b/src/pybind/mgr/cephadm/tests/test_migration.py @@ -228,3 +228,32 @@ def test_migrate_admin_client_keyring(cephadm_module: CephadmOrchestrator): assert cephadm_module.migration_current == LAST_MIGRATION assert cephadm_module.keys.keys['client.admin'].placement.label == '_admin' + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) +def test_migrate_set_sane_value(cephadm_module: CephadmOrchestrator): + cephadm_module.migration_current = 0 + cephadm_module.migration.set_sane_migration_current() + assert cephadm_module.migration_current == 0 + + cephadm_module.migration_current = LAST_MIGRATION + cephadm_module.migration.set_sane_migration_current() + assert cephadm_module.migration_current == LAST_MIGRATION + + cephadm_module.migration_current = None + cephadm_module.migration.set_sane_migration_current() + assert cephadm_module.migration_current == LAST_MIGRATION + + cephadm_module.migration_current = LAST_MIGRATION + 1 + cephadm_module.migration.set_sane_migration_current() + assert cephadm_module.migration_current == 0 + + cephadm_module.migration_current = None + ongoing = cephadm_module.migration.is_migration_ongoing() + assert not ongoing + assert cephadm_module.migration_current == LAST_MIGRATION + + cephadm_module.migration_current = LAST_MIGRATION + 1 + ongoing = cephadm_module.migration.is_migration_ongoing() + assert ongoing + assert cephadm_module.migration_current == 0 -- 2.39.5