From: Kefu Chai Date: Thu, 10 Jun 2021 12:19:09 +0000 (+0800) Subject: tasks/ceph_manager: ignore EACCES when waiting for quorum X-Git-Tag: v16.2.6~131^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=e535c8f099fef5f9393888dc5ce8643dcc9dac2b;p=ceph.git tasks/ceph_manager: ignore EACCES when waiting for quorum mon_tick_interval is 5 seconds by default. monitors update their rotating keys every mon_tick_interval. before monitors forms a quorum, the auth requests from clients are put into the wait list. these requests are re-enqueued once the monitors form a quorum. but there is a small window of mon_tick_interval, before they are able to serve the auth requests even after their claim to be able to server requests. if these re-enqueued requests happen to be served in this window, and if authx is enabled, they will be greeted with errors like handle_auth_bad_method server allowed_methods [2] but i only support [2] in the case of ceph cli, the error would look like: [errno 13] RADOS permission denied (error connecting to the cluster) so, to address this issue, the EACCES error is ignored when waiting for a quorum. Signed-off-by: Kefu Chai (cherry picked from commit 7afd38f846894f11a61f697a2522cd0c30a35dc7) --- diff --git a/qa/tasks/ceph_manager.py b/qa/tasks/ceph_manager.py index 8b901df3f976..28f28f54f492 100644 --- a/qa/tasks/ceph_manager.py +++ b/qa/tasks/ceph_manager.py @@ -3,6 +3,7 @@ ceph manager -- Thrasher and CephManager objects """ from functools import wraps import contextlib +import errno import random import signal import time @@ -3083,8 +3084,17 @@ class CephManager: tries=timeout // sleep, action=f'wait for quorum size {size}') as proceed: while proceed(): - if len(self.get_mon_quorum()) == size: - break + try: + if len(self.get_mon_quorum()) == size: + break + except CommandFailedError as e: + # could fail instea4d of blocked if the rotating key of the + # connected monitor is not updated yet after they form the + # quorum + if e.exitstatus == errno.EACCES: + pass + else: + raise self.log("quorum is size %d" % size) def get_mon_health(self, debug=False):