if overwrites:
self.run_ceph_cmd('osd', 'pool', 'set', n+"-data", 'allow_ec_overwrites', 'true')
- def _get_unhealthy_mds_id(self, health_report, health_warn):
- '''
- Return MDS ID for which health warning in "health_warn" has been
- generated.
- '''
- # variable "msg" should hold string something like this -
- # 'mds.b(mds.0): Behind on trimming (865/10) max_segments: 10,
- # num_segments: 86
- msg = health_report['checks'][health_warn]['detail'][0]['message']
- mds_id = msg.split('(')[0]
- mds_id = mds_id.replace('mds.', '')
- return mds_id
+ def gen_health_warn_mds_cache_oversized(self):
+ health_warn = 'MDS_CACHE_OVERSIZED'
- def wait_till_health_warn(self, health_warn, active_mds_id, sleep=3,
- tries=10):
- errmsg = (f'Expected health warning "{health_warn}" to eventually '
- 'show up in output of command "ceph health detail". Tried '
- f'{tries} times with interval of {sleep} seconds but the '
- 'health warning didn\'t turn up.')
+ self.config_set('mds', 'mds_cache_memory_limit', '1K')
+ self.config_set('mds', 'mds_health_cache_threshold', '1.00000')
+ self.mount_a.open_n_background('.', 400)
- with safe_while(sleep=sleep, tries=tries, action=errmsg) as proceed:
- while proceed():
- self.get_ceph_cmd_stdout(
- f'tell mds.{active_mds_id} cache status')
+ self.wait_for_health(health_warn, 30)
- health_report = json.loads(self.get_ceph_cmd_stdout(
- 'health detail --format json'))
+ def gen_health_warn_mds_trim(self):
+ health_warn = 'MDS_TRIM'
+
+ # for generating health warning MDS_TRIM
+ self.config_set('mds', 'mds_debug_subtrees', 'true')
+ # this will really really slow the trimming, so that MDS_TRIM stays
+ # for longer.
+ self.config_set('mds', 'mds_log_trim_decay_rate', '60')
+ self.config_set('mds', 'mds_log_trim_threshold', '1')
+ self.mount_a.open_n_background('.', 400)
- if health_warn in health_report['checks']:
- return
+ self.wait_for_health(health_warn, 30)
+class TestMdsLastSeen(CephFSTestCase):
+ """
+ Tests for `mds last-seen` command.
+ """
+
+ MDSS_REQUIRED = 2
+
+ def test_in_text(self):
+ """
+ That `mds last-seen` returns 0 for an MDS currently in the map.
+ """
+
+ status = self.fs.status()
+ r0 = self.fs.get_rank(0, status=status)
+ s = self.get_ceph_cmd_stdout("mds", "last-seen", r0['name'])
+ seconds = int(re.match(r"^(\d+)s$", s).group(1))
+ self.assertEqual(seconds, 0)
+
+ def test_in_json(self):
+ """
+ That `mds last-seen` returns 0 for an MDS currently in the map.
+ """
+
+ status = self.fs.status()
+ r0 = self.fs.get_rank(0, status=status)
+ s = self.get_ceph_cmd_stdout("--format=json", "mds", "last-seen", r0['name'])
+ J = json.loads(s)
+ seconds = int(re.match(r"^(\d+)s$", J['last-seen']).group(1))
+ self.assertEqual(seconds, 0)
+
+ def test_unknown(self):
+ """
+ That `mds last-seen` returns ENOENT for an mds not in recent maps.
+ """
+
+ try:
+ self.get_ceph_cmd_stdout("--format=json", "mds", "last-seen", 'foo')
+ except CommandFailedError as e:
+ self.assertEqual(e.exitstatus, errno.ENOENT)
+ else:
+ self.fail("non-existent mds should fail ENOENT")
+
+ def test_standby(self):
+ """
+ That `mds last-seen` returns 0 for a standby.
+ """
+
+ status = self.fs.status()
+ for info in status.get_standbys():
+ s = self.get_ceph_cmd_stdout("--format=json", "mds", "last-seen", info['name'])
+ J = json.loads(s)
+ seconds = int(re.match(r"^(\d+)s$", J['last-seen']).group(1))
+ self.assertEqual(seconds, 0)
+
+ def test_stopped(self):
+ """
+ That `mds last-seen` returns >0 for mds that is stopped.
+ """
+
+ status = self.fs.status()
+ r0 = self.fs.get_rank(0, status=status)
+ self.fs.mds_stop(mds_id=r0['name'])
+ self.fs.rank_fail()
+ sleep(2)
+ with safe_while(sleep=1, tries=self.fs.beacon_timeout, action='wait for last-seen >0') as proceed:
+ while proceed():
+ s = self.get_ceph_cmd_stdout("--format=json", "mds", "last-seen", r0['name'])
+ J = json.loads(s)
+ seconds = int(re.match(r"^(\d+)s$", J['last-seen']).group(1))
+ if seconds == 0:
+ continue
+ self.assertGreater(seconds, 1)
+ break
+
+ def test_gc(self):
+ """
+ That historical mds information is eventually garbage collected.
+ """
+
+ prune_time = 20
+ sleep_time = 2
+ self.config_set('mon', 'mon_fsmap_prune_threshold', prune_time)
+ status = self.fs.status()
+ r0 = self.fs.get_rank(0, status=status)
+ self.fs.mds_stop(mds_id=r0['name'])
+ self.fs.rank_fail()
+ last = 0
+ for i in range(prune_time):
+ sleep(sleep_time) # we will sleep twice prune_time
+ try:
+ s = self.get_ceph_cmd_stdout("--format=json", "mds", "last-seen", r0['name'])
+ J = json.loads(s)
+ seconds = int(re.match(r"^(\d+)s$", J['last-seen']).group(1))
+ self.assertGreater(seconds, last)
+ log.debug("last_seen: %ds", seconds)
+ last = seconds
+ except CommandFailedError as e:
+ self.assertEqual(e.exitstatus, errno.ENOENT)
+ self.assertGreaterEqual(last + sleep_time + 1, prune_time) # rounding error add 1
+ return
+ self.fail("map was no garbage collected as expected")
+
@classhook('_add_valid_tell')
class TestValidTell(TestAdminCommands):
@classmethod