From: John Fulton Date: Wed, 17 Mar 2021 22:03:46 +0000 (-0400) Subject: mgr/cephadm: retry after JSONDecodeError in wait_for_mgr_restart() X-Git-Tag: v16.2.0~24^2~2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=98d55648b0d20f03f55610520b383d6f5fc70462;p=ceph.git mgr/cephadm: retry after JSONDecodeError in wait_for_mgr_restart() 'ceph mgr dump' does not always return valid JSON so cephadm will throw an exception sometimes when applying a spec as per the issue this PR closes. Add a try/except to catch a possible JSONDecodeError and retry after sleeping. Fixes: https://tracker.ceph.com/issues/49870 Signed-off-by: John Fulton (cherry picked from commit 0aba5704d9eb1a2df6dd437785fc1f8c558c0990) --- diff --git a/src/cephadm/cephadm b/src/cephadm/cephadm index 7663c2a915501..c6e979acf37cf 100755 --- a/src/cephadm/cephadm +++ b/src/cephadm/cephadm @@ -3939,16 +3939,24 @@ def command_bootstrap(ctx): # create mgr create_mgr(ctx, uid, gid, fsid, mgr_id, mgr_key, config, cli) + def json_loads_retry(cli_func): + for sleep_secs in [1, 4, 4]: + try: + return json.loads(cli_func()) + except json.JSONDecodeError: + logger.debug('Invalid JSON. Retrying in %s seconds...' % sleep_secs) + time.sleep(sleep_secs) + return json.loads(cli_func()) + # wait for mgr to restart (after enabling a module) def wait_for_mgr_restart(): # first get latest mgrmap epoch from the mon. try newer 'mgr # stat' command first, then fall back to 'mgr dump' if # necessary try: - out = cli(['mgr', 'stat']) + j = json_loads_retry(lambda: cli(['mgr', 'stat'])) except Exception: - out = cli(['mgr', 'dump']) - j = json.loads(out) + j = json_loads_retry(lambda: cli(['mgr', 'dump'])) epoch = j['epoch'] # wait for mgr to have it