From c50b143e9221c907bc31765e8ae668de99843c8c Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Thu, 21 Mar 2013 14:37:38 -0700 Subject: [PATCH] thrashosds: add test_backfill_full Signed-off-by: Samuel Just Reviewed-by: Dan Mick --- teuthology/task/ceph_manager.py | 48 +++++++++++++++++++++++++++++++++ teuthology/task/thrashosds.py | 3 +++ 2 files changed, 51 insertions(+) diff --git a/teuthology/task/ceph_manager.py b/teuthology/task/ceph_manager.py index a2d421995a82d..d8a6f8eb5eb44 100644 --- a/teuthology/task/ceph_manager.py +++ b/teuthology/task/ceph_manager.py @@ -147,9 +147,46 @@ class Thrasher: status = self.ceph_manager.get_osd_status() assert not the_one in status['down'] + def test_backfill_full(self): + """ + Test backfills stopping when the replica fills up. + + First, use osd_backfill_full_ratio to simulate a now full + osd by setting it to 0 on all of the OSDs. + + Second, on a random subset, set + osd_debug_skip_full_check_in_backfill_reservation to force + the more complicated check in do_scan to be exercised. + + Then, verify that all backfills stop. + """ + self.log("injecting osd_backfill_full_ratio = 0") + for i in self.live_osds: + self.ceph_manager.set_config( + i, + osd_debug_skip_full_check_in_backfill_reservation = random.choice( + ['false', 'true']), + osd_backfill_full_ratio = 0) + for i in range(30): + status = self.ceph_manager.compile_pg_status() + if 'backfill' not in status.keys(): + break + self.log( + "waiting for {still_going} backfills".format( + still_going=status.get('backfill'))) + time.sleep(1) + assert('backfill' not in self.ceph_manager.compile_pg_status().keys()) + for i in self.live_osds: + self.ceph_manager.set_config( + i, + osd_debug_skip_full_check_in_backfill_reservation = \ + 'false', + osd_backfill_full_ratio = 0.85) + def choose_action(self): chance_down = self.config.get('chance_down', 0.4) chance_test_min_size = self.config.get('chance_test_min_size', 0) + chance_test_backfill_full= self.config.get('chance_test_backfill_full', 0) if isinstance(chance_down, int): chance_down = float(chance_down) / 100 minin = self.config.get("min_in", 2) @@ -171,6 +208,7 @@ class Thrasher: actions.append((self.grow_pool, self.config.get('chance_pgnum_grow', 0),)) actions.append((self.fix_pgp_num, self.config.get('chance_pgpnum_fix', 0),)) actions.append((self.test_pool_min_size, chance_test_min_size,)) + actions.append((self.test_backfill_full, chance_test_backfill_full,)) for key in ['heartbeat_inject_failure', 'filestore_inject_stall']: for scenario in [ (lambda: self.inject_pause(key, @@ -481,6 +519,16 @@ class CephManager: j = json.loads('\n'.join(out.split('\n')[1:])) return j['pg_stats'] + def compile_pg_status(self): + ret = {} + j = self.get_pg_stats() + for pg in j: + for status in pg['state'].split('+'): + if status not in ret: + ret[status] = 0 + ret[status] += 1 + return ret + def get_single_pg_stats(self, pgid): all_stats = self.get_pg_stats() diff --git a/teuthology/task/thrashosds.py b/teuthology/task/thrashosds.py index 9570078f4f696..7e4e04d339bc9 100644 --- a/teuthology/task/thrashosds.py +++ b/teuthology/task/thrashosds.py @@ -72,6 +72,9 @@ def task(ctx, config): of just the osd process. Note that this assumes that a single osd is the only important process on the node. + chance_test_backfill_full: (0) chance to simulate full disks stopping + backfill + example: tasks: -- 2.39.5