From: Sage Weil Date: Thu, 25 Nov 2021 14:10:28 +0000 (-0600) Subject: qa/tasks/cephadm_cases/test_cli: fix test_daemon_restart X-Git-Tag: v17.1.0~353^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=9ae989482719bf890232061cf90b8677a788c25c;p=ceph-ci.git qa/tasks/cephadm_cases/test_cli: fix test_daemon_restart We cannot schedule a daemon start if there is another daemon action with a higher priority (including stop) scheduled. However, that state isn't cleared until *after* the osd goes down, the systemctl command returns, and mgr/cephadm gets around to updating the inventory scheduled_daemon_action state. Semi-fix: (1) wait for the orch status to change, and then (2) wait a few more seconds after that. Signed-off-by: Sage Weil --- diff --git a/qa/tasks/cephadm_cases/test_cli.py b/qa/tasks/cephadm_cases/test_cli.py index 1dcf2f35e69..c05395673c8 100644 --- a/qa/tasks/cephadm_cases/test_cli.py +++ b/qa/tasks/cephadm_cases/test_cli.py @@ -1,6 +1,9 @@ +import json import logging +import time from tasks.mgr.mgr_test_case import MgrTestCase +from teuthology.contextutil import safe_while log = logging.getLogger(__name__) @@ -48,6 +51,13 @@ class TestCephadmCLI(MgrTestCase): def test_daemon_restart(self): self._orch_cmd('daemon', 'stop', 'osd.0') self.wait_for_health('OSD_DOWN', 30) + with safe_while(sleep=1, tries=30) as proceed: + while proceed(): + j = json.loads(self._orch_cmd('ps', '--format', 'json')) + d = {d['daemon_name']: d for d in j} + if d['osd.0']['status_desc'] != 'running': + break + time.sleep(5) self._orch_cmd('daemon', 'start', 'osd.0') self.wait_for_health_clear(90) self._orch_cmd('daemon', 'restart', 'osd.0')