From 84cd4ed6c3ed4e5359e5b66c302da6d76f9a4d67 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sat, 25 Feb 2012 21:05:00 -0800 Subject: [PATCH] peer: wait for peering to complete, or block We need to wait for peering to either complete, or block because it is waiting for another PG. _Then_ look at all the PG states and compare the mon values with what we get from qeurying the OSDs directly. --- teuthology/task/ceph_manager.py | 27 +++++++++++++++++++++++++++ teuthology/task/peer.py | 5 +++++ 2 files changed, 32 insertions(+) diff --git a/teuthology/task/ceph_manager.py b/teuthology/task/ceph_manager.py index cf4065d1c0ffa..f4d85a681da1c 100644 --- a/teuthology/task/ceph_manager.py +++ b/teuthology/task/ceph_manager.py @@ -267,12 +267,24 @@ class CephManager: num += 1 return num + def get_num_active_down(self): + pgs = self.get_pg_stats() + num = 0 + for pg in pgs: + if (pg['state'].count('active') and not pg['state'].count('stale')) or \ + (pg['state'].count('down') and not pg['state'].count('stale')): + num += 1 + return num + def is_clean(self): return self.get_num_active_clean() == self.get_num_pgs() def is_recovered(self): return self.get_num_active_recovered() == self.get_num_pgs() + def is_active_or_down(self): + return self.get_num_active_down() == self.get_num_pgs() + def wait_for_clean(self, timeout=None): self.log("waiting for clean") start = time.time() @@ -303,6 +315,21 @@ class CephManager: time.sleep(3) self.log("recovered!") + def wait_for_active_or_down(self, timeout=None): + self.log("waiting for peering to complete or become blocked") + start = time.time() + num_active_down = self.get_num_active_down() + while not self.is_active_or_down(): + if timeout is not None: + assert time.time() - start < timeout, \ + 'failed to recover before timeout expired' + cur_active_down = self.get_num_active_down() + if cur_active_down != num_active_down: + start = time.time() + num_active_down = cur_active_down + time.sleep(3) + self.log("active or down!") + def osd_is_up(self, osd): osds = self.get_osd_dump() return osds[osd]['up'] > 0 diff --git a/teuthology/task/peer.py b/teuthology/task/peer.py index 9b820bd668847..096c285a01db5 100644 --- a/teuthology/task/peer.py +++ b/teuthology/task/peer.py @@ -68,6 +68,11 @@ def task(ctx, config): manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') + manager.wait_for_active_or_down() + + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') + # look for down pgs num_down_pgs = 0 pgs = manager.get_pg_stats() -- 2.39.5