]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
Merge pull request #58562 from rishabh-d-dave/wip-66935-squid
authorXiubo Li <xiubli@redhat.com>
Mon, 22 Jul 2024 05:51:48 +0000 (13:51 +0800)
committerGitHub <noreply@github.com>
Mon, 22 Jul 2024 05:51:48 +0000 (13:51 +0800)
squid: qa/cephfs: improvements for "mds fail" and "fs fail"

1  2 
qa/tasks/cephfs/test_admin.py

index 2827edc75a37a3f4855232ef158d911ef3090def,d609ee6710576e5f0382d4d77bababda20653ca8..90726016dc99b193cab2b6971c30a971914563fb
@@@ -165,140 -91,29 +165,131 @@@ class TestAdminCommands(CephFSTestCase)
          if overwrites:
              self.run_ceph_cmd('osd', 'pool', 'set', n+"-data", 'allow_ec_overwrites', 'true')
  
-     def _get_unhealthy_mds_id(self, health_report, health_warn):
-         '''
-         Return MDS ID for which health warning in "health_warn" has been
-         generated.
-         '''
-         # variable "msg" should hold string something like this -
-         # 'mds.b(mds.0): Behind on trimming (865/10) max_segments: 10,
-         # num_segments: 86
-         msg = health_report['checks'][health_warn]['detail'][0]['message']
-         mds_id = msg.split('(')[0]
-         mds_id = mds_id.replace('mds.', '')
-         return mds_id
+     def gen_health_warn_mds_cache_oversized(self):
+         health_warn = 'MDS_CACHE_OVERSIZED'
  
-     def wait_till_health_warn(self, health_warn, active_mds_id, sleep=3,
-                               tries=10):
-         errmsg = (f'Expected health warning "{health_warn}" to eventually '
-                   'show up in output of command "ceph health detail". Tried '
-                   f'{tries} times with interval of {sleep} seconds but the '
-                   'health warning didn\'t turn up.')
+         self.config_set('mds', 'mds_cache_memory_limit', '1K')
+         self.config_set('mds', 'mds_health_cache_threshold', '1.00000')
+         self.mount_a.open_n_background('.', 400)
  
-         with safe_while(sleep=sleep, tries=tries, action=errmsg) as proceed:
-             while proceed():
-                 self.get_ceph_cmd_stdout(
-                     f'tell mds.{active_mds_id} cache status')
+         self.wait_for_health(health_warn, 30)
  
-                 health_report = json.loads(self.get_ceph_cmd_stdout(
-                     'health detail --format json'))
+     def gen_health_warn_mds_trim(self):
+         health_warn = 'MDS_TRIM'
+         # for generating health warning MDS_TRIM
+         self.config_set('mds', 'mds_debug_subtrees', 'true')
+         # this will really really slow the trimming, so that MDS_TRIM stays
+         # for longer.
+         self.config_set('mds', 'mds_log_trim_decay_rate', '60')
+         self.config_set('mds', 'mds_log_trim_threshold', '1')
+         self.mount_a.open_n_background('.', 400)
  
-                 if health_warn in health_report['checks']:
-                     return
+         self.wait_for_health(health_warn, 30)
  
  
 +class TestMdsLastSeen(CephFSTestCase):
 +    """
 +    Tests for `mds last-seen` command.
 +    """
 +
 +    MDSS_REQUIRED = 2
 +
 +    def test_in_text(self):
 +        """
 +        That `mds last-seen` returns 0 for an MDS currently in the map.
 +        """
 +
 +        status = self.fs.status()
 +        r0 = self.fs.get_rank(0, status=status)
 +        s = self.get_ceph_cmd_stdout("mds", "last-seen", r0['name'])
 +        seconds = int(re.match(r"^(\d+)s$", s).group(1))
 +        self.assertEqual(seconds, 0)
 +
 +    def test_in_json(self):
 +        """
 +        That `mds last-seen` returns 0 for an MDS currently in the map.
 +        """
 +
 +        status = self.fs.status()
 +        r0 = self.fs.get_rank(0, status=status)
 +        s = self.get_ceph_cmd_stdout("--format=json", "mds", "last-seen", r0['name'])
 +        J = json.loads(s)
 +        seconds = int(re.match(r"^(\d+)s$", J['last-seen']).group(1))
 +        self.assertEqual(seconds, 0)
 +
 +    def test_unknown(self):
 +        """
 +        That `mds last-seen` returns ENOENT for an mds not in recent maps.
 +        """
 +
 +        try:
 +            self.get_ceph_cmd_stdout("--format=json", "mds", "last-seen", 'foo')
 +        except CommandFailedError as e:
 +            self.assertEqual(e.exitstatus, errno.ENOENT)
 +        else:
 +            self.fail("non-existent mds should fail ENOENT")
 +
 +    def test_standby(self):
 +        """
 +        That `mds last-seen` returns 0 for a standby.
 +        """
 +
 +        status = self.fs.status()
 +        for info in status.get_standbys():
 +            s = self.get_ceph_cmd_stdout("--format=json", "mds", "last-seen", info['name'])
 +            J = json.loads(s)
 +            seconds = int(re.match(r"^(\d+)s$", J['last-seen']).group(1))
 +            self.assertEqual(seconds, 0)
 +
 +    def test_stopped(self):
 +        """
 +        That `mds last-seen` returns >0 for mds that is stopped.
 +        """
 +
 +        status = self.fs.status()
 +        r0 = self.fs.get_rank(0, status=status)
 +        self.fs.mds_stop(mds_id=r0['name'])
 +        self.fs.rank_fail()
 +        sleep(2)
 +        with safe_while(sleep=1, tries=self.fs.beacon_timeout, action='wait for last-seen >0') as proceed:
 +            while proceed():
 +                s = self.get_ceph_cmd_stdout("--format=json", "mds", "last-seen", r0['name'])
 +                J = json.loads(s)
 +                seconds = int(re.match(r"^(\d+)s$", J['last-seen']).group(1))
 +                if seconds == 0:
 +                    continue
 +                self.assertGreater(seconds, 1)
 +                break
 +
 +    def test_gc(self):
 +        """
 +        That historical mds information is eventually garbage collected.
 +        """
 +
 +        prune_time = 20
 +        sleep_time = 2
 +        self.config_set('mon', 'mon_fsmap_prune_threshold', prune_time)
 +        status = self.fs.status()
 +        r0 = self.fs.get_rank(0, status=status)
 +        self.fs.mds_stop(mds_id=r0['name'])
 +        self.fs.rank_fail()
 +        last = 0
 +        for i in range(prune_time):
 +            sleep(sleep_time) # we will sleep twice prune_time
 +            try:
 +                s = self.get_ceph_cmd_stdout("--format=json", "mds", "last-seen", r0['name'])
 +                J = json.loads(s)
 +                seconds = int(re.match(r"^(\d+)s$", J['last-seen']).group(1))
 +                self.assertGreater(seconds, last)
 +                log.debug("last_seen: %ds", seconds)
 +                last = seconds
 +            except CommandFailedError as e:
 +                self.assertEqual(e.exitstatus, errno.ENOENT)
 +                self.assertGreaterEqual(last + sleep_time + 1, prune_time) # rounding error add 1
 +                return
 +        self.fail("map was no garbage collected as expected")
 +
  @classhook('_add_valid_tell')
  class TestValidTell(TestAdminCommands):
      @classmethod