"""
config = config if isinstance(config, dict) else dict()
cluster_name = config.get('cluster', 'ceph')
+ expected_checks = config.get('expected_checks', [])
log.info('Waiting until %s daemons up and pgs clean...', cluster_name)
manager = ctx.managers[cluster_name]
try:
if config.get('wait-for-healthy', True):
log.info('Waiting until ceph cluster %s is healthy...', cluster_name)
- manager.wait_until_healthy(timeout=300)
+ manager.wait_until_healthy(timeout=300, expected_checks=expected_checks)
if ctx.cluster.only(teuthology.is_type('mds', cluster_name)).remotes:
# Some MDSs exist, wait for them to be healthy
self.log('health:\n{h}'.format(h=out))
return json.loads(out)
- def wait_until_healthy(self, timeout=None):
+ def wait_until_healthy(self, timeout=None, expected_checks=[]):
self.log("wait_until_healthy")
start = time.time()
- while self.get_mon_health()['status'] != 'HEALTH_OK':
+ found = set()
+ while True:
+ health = self.get_mon_health()
+ if health['status'] == 'HEALTH_OK':
+ break
+ found = set()
+ okay = True
+ unhealthy = []
+ for name, check in health['checks'].items():
+ if check['muted']:
+ log.debug("{} is muted", name)
+ elif name in expected_checks:
+ log.info("{} in expected_checks", name)
+ found.add(name)
+ else:
+ unhealthy.append(name)
+ okay = False
+ if okay:
+ break
if timeout is not None:
- assert time.time() - start < timeout, \
- 'timeout expired in wait_until_healthy'
+ if timeout < (time.time() - start):
+ what = ", ".join(unhealthy)
+ err = f"timeout {timeout}s expired waiting for healthy cluster with these unhealthy checks: {what}"
+ raise RuntimeError(err)
time.sleep(3)
+ if found != set(expected_checks):
+ exp = ", ".join(expected_checks)
+ fnd = ", ".join(found)
+ err = f"healthy cluster but expected_checks ({exp}) not equal to {fnd}"
+ raise RuntimeError(err)
self.log("wait_until_healthy done")
def get_filepath(self):