From 57f8e318d33f3eca74b87d8231ec71d208fdf55a Mon Sep 17 00:00:00 2001 From: Kefu Chai Date: Thu, 10 Jun 2021 20:19:09 +0800 Subject: [PATCH] tasks/ceph_manager: ignore EACCES when waiting for quorum mon_tick_interval is 5 seconds by default. monitors update their rotating keys every mon_tick_interval. before monitors forms a quorum, the auth requests from clients are put into the wait list. these requests are re-enqueued once the monitors form a quorum. but there is a small window of mon_tick_interval, before they are able to serve the auth requests even after their claim to be able to server requests. if these re-enqueued requests happen to be served in this window, and if authx is enabled, they will be greeted with errors like handle_auth_bad_method server allowed_methods [2] but i only support [2] in the case of ceph cli, the error would look like: [errno 13] RADOS permission denied (error connecting to the cluster) so, to address this issue, the EACCES error is ignored when waiting for a quorum. Signed-off-by: Kefu Chai (cherry picked from commit 7afd38f846894f11a61f697a2522cd0c30a35dc7) --- qa/tasks/ceph_manager.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/qa/tasks/ceph_manager.py b/qa/tasks/ceph_manager.py index b212278ff8499..1117205fd941c 100644 --- a/qa/tasks/ceph_manager.py +++ b/qa/tasks/ceph_manager.py @@ -3,6 +3,7 @@ ceph manager -- Thrasher and CephManager objects """ from functools import wraps import contextlib +import errno import random import signal import time @@ -2565,8 +2566,17 @@ class CephManager: tries=timeout // sleep, action=f'wait for quorum size {size}') as proceed: while proceed(): - if len(self.get_mon_quorum()) == size: - break + try: + if len(self.get_mon_quorum()) == size: + break + except CommandFailedError as e: + # could fail instea4d of blocked if the rotating key of the + # connected monitor is not updated yet after they form the + # quorum + if e.exitstatus == errno.EACCES: + pass + else: + raise self.log("quorum is size %d" % size) def get_mon_health(self, debug=False): -- 2.39.5