From c1cea71299300ed2fd2a34d5c10a1948f3016031 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Mon, 26 Apr 2021 16:24:31 +0800 Subject: [PATCH] qa: use the pg autoscale mode to calcuate the pg_num Setting the pg_num to 8 is too small that some osds maybe not covered by the pools, some osds maybe overloaded. Remove the hardcodeing pg_num here and let the pg autoscale mode to calculate it as needed, and at the same time set the pg_num_min to 64 to avoid the pg_num to small. If ec pool is used, for the test cases most datas will go to the ec pool and the primary replicated pool will store a small amount of metadata for all the files only, so set the target size ratio to 0.05 should be enough. Fixes: https://tracker.ceph.com/issues/45434 Signed-off-by: Xiubo Li --- qa/tasks/cephfs/filesystem.py | 40 ++++++++++++++++++++++++------- qa/tasks/cephfs/test_admin.py | 5 ++-- qa/tasks/cephfs/test_data_scan.py | 2 +- qa/tasks/cephfs/test_misc.py | 4 ++-- 4 files changed, 37 insertions(+), 14 deletions(-) diff --git a/qa/tasks/cephfs/filesystem.py b/qa/tasks/cephfs/filesystem.py index a84f2bbd57b17..805b7548dc60f 100644 --- a/qa/tasks/cephfs/filesystem.py +++ b/qa/tasks/cephfs/filesystem.py @@ -583,9 +583,14 @@ class Filesystem(MDSCluster): c = ["fs", "required_client_features", self.name, *args] return self.mon_manager.run_cluster_cmd(args=c, **kwargs) - # In Octopus+, the PG count can be omitted to use the default. We keep the - # hard-coded value for deployments of Mimic/Nautilus. - pgs_per_fs_pool = 8 + # Since v15.1.0 the pg autoscale mode has been enabled as default, + # will let the pg autoscale mode to calculate the pg_num as needed. + # We set the pg_num_min to 64 to make sure that pg autoscale mode + # won't set the pg_num to low to fix Tracker#45434. + pg_num = 64 + pg_num_min = 64 + target_size_ratio = 0.9 + target_size_ratio_ec = 0.9 def create(self): if self.name is None: @@ -597,13 +602,22 @@ class Filesystem(MDSCluster): else: data_pool_name = self.data_pool_name + # will use the ec pool to store the data and a small amount of + # metadata still goes to the primary data pool for all files. + if not self.metadata_overlay and self.ec_profile and 'disabled' not in self.ec_profile: + self.target_size_ratio = 0.05 + log.debug("Creating filesystem '{0}'".format(self.name)) self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', - self.metadata_pool_name, self.pgs_per_fs_pool.__str__()) + self.metadata_pool_name, + '--pg_num_min', str(self.pg_num_min)) self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', - data_pool_name, self.pgs_per_fs_pool.__str__()) + data_pool_name, str(self.pg_num), + '--pg_num_min', str(self.pg_num_min), + '--target_size_ratio', + str(self.target_size_ratio)) if self.metadata_overlay: self.mon_manager.raw_cluster_cmd('fs', 'new', @@ -622,9 +636,10 @@ class Filesystem(MDSCluster): cmd.extend(self.ec_profile) self.mon_manager.raw_cluster_cmd(*cmd) self.mon_manager.raw_cluster_cmd( - 'osd', 'pool', 'create', - ec_data_pool_name, self.pgs_per_fs_pool.__str__(), 'erasure', - ec_data_pool_name) + 'osd', 'pool', 'create', ec_data_pool_name, + 'erasure', ec_data_pool_name, + '--pg_num_min', str(self.pg_num_min), + '--target_size_ratio', str(self.target_size_ratio_ec)) self.mon_manager.raw_cluster_cmd( 'osd', 'pool', 'set', ec_data_pool_name, 'allow_ec_overwrites', 'true') @@ -778,7 +793,8 @@ class Filesystem(MDSCluster): def add_data_pool(self, name, create=True): if create: - self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', name, self.pgs_per_fs_pool.__str__()) + self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', name, + '--pg_num_min', str(self.pg_num_min)) self.mon_manager.raw_cluster_cmd('fs', 'add_data_pool', self.name, name) self.get_pool_names(refresh = True) for poolid, fs_name in self.data_pools.items(): @@ -831,6 +847,12 @@ class Filesystem(MDSCluster): raise RuntimeError("can't set filesystem name if its fscid is set") self.data_pool_name = name + def get_pool_pg_num(self, pool_name): + pgs = json.loads(self.mon_manager.raw_cluster_cmd('osd', 'pool', 'get', + pool_name, 'pg_num', + '--format=json-pretty')) + return int(pgs['pg_num']) + def get_namespace_id(self): return self.id diff --git a/qa/tasks/cephfs/test_admin.py b/qa/tasks/cephfs/test_admin.py index 03b477c3e3d6e..1036197a49fb4 100644 --- a/qa/tasks/cephfs/test_admin.py +++ b/qa/tasks/cephfs/test_admin.py @@ -74,7 +74,8 @@ class TestAddDataPool(TestAdminCommands): """ pool_name = "foo" mon_cmd = self.fs.mon_manager.raw_cluster_cmd - mon_cmd('osd', 'pool', 'create', pool_name, str(self.fs.pgs_per_fs_pool)) + mon_cmd('osd', 'pool', 'create', pool_name, '--pg_num_min', + str(self.fs.pg_num_min)) # Check whether https://tracker.ceph.com/issues/43061 is fixed mon_cmd('osd', 'pool', 'application', 'enable', pool_name, 'cephfs') self.fs.add_data_pool(pool_name, create=False) @@ -207,7 +208,7 @@ class TestFsNew(TestAdminCommands): pool_names = [fs_name+'-'+key for key in keys] mon_cmd = self.fs.mon_manager.raw_cluster_cmd for p in pool_names: - mon_cmd('osd', 'pool', 'create', p, str(self.fs.pgs_per_fs_pool)) + mon_cmd('osd', 'pool', 'create', p, '--pg_num_min', str(self.fs.pg_num_min)) mon_cmd('osd', 'pool', 'application', 'enable', p, 'cephfs') mon_cmd('fs', 'new', fs_name, pool_names[0], pool_names[1]) for i in range(2): diff --git a/qa/tasks/cephfs/test_data_scan.py b/qa/tasks/cephfs/test_data_scan.py index 2aa7398ec6f63..dcb7eda40a56d 100644 --- a/qa/tasks/cephfs/test_data_scan.py +++ b/qa/tasks/cephfs/test_data_scan.py @@ -545,7 +545,7 @@ class TestDataScan(CephFSTestCase): pgs_to_files[pgid].append(file_path) log.info("{0}: {1}".format(file_path, pgid)) - pg_count = self.fs.pgs_per_fs_pool + pg_count = self.fs.get_pool_pg_num(self.fs.get_data_pool_name()) for pg_n in range(0, pg_count): pg_str = "{0}.{1:x}".format(self.fs.get_data_pool_id(), pg_n) out = self.fs.data_scan(["pg_files", "mydir", pg_str]) diff --git a/qa/tasks/cephfs/test_misc.py b/qa/tasks/cephfs/test_misc.py index 40bb8fb1d9d75..85b11cc6ca045 100644 --- a/qa/tasks/cephfs/test_misc.py +++ b/qa/tasks/cephfs/test_misc.py @@ -82,7 +82,7 @@ class TestMisc(CephFSTestCase): '--yes-i-really-really-mean-it') self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', self.fs.metadata_pool_name, - self.fs.pgs_per_fs_pool.__str__()) + '--pg_num_min', str(self.fs.pg_num_min)) # insert a garbage object self.fs.radosm(["put", "foo", "-"], stdin=StringIO("bar")) @@ -119,7 +119,7 @@ class TestMisc(CephFSTestCase): '--yes-i-really-really-mean-it') self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', self.fs.metadata_pool_name, - self.fs.pgs_per_fs_pool.__str__()) + '--pg_num_min', str(self.fs.pg_num_min)) self.fs.mon_manager.raw_cluster_cmd('fs', 'new', self.fs.name, self.fs.metadata_pool_name, data_pool_name) -- 2.39.5