From 5b8020522e6da8cc799dfda96a252af85b18308a Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Mon, 26 Apr 2021 16:24:31 +0800 Subject: [PATCH] qa: use the pg autoscale mode to calcuate the pg_num Setting the pg_num to 8 is too small that some osds maybe not covered by the pools, some osds maybe overloaded. Remove the hardcodeing pg_num here and let the pg autoscale mode to calculate it as needed, and at the same time set the pg_num_min to 64 to avoid the pg_num to small. If ec pool is used, for the test cases most datas will go to the ec pool and the primary replicated pool will store a small amount of metadata for all the files only, so set the target size ratio to 0.05 should be enough. Fixes: https://tracker.ceph.com/issues/45434 Signed-off-by: Xiubo Li (cherry picked from commit c1cea71299300ed2fd2a34d5c10a1948f3016031) Cherry-pick notes: - Fixed Invalid command: missing required parameter pg_num() --- qa/tasks/cephfs/filesystem.py | 40 ++++++++++++++++++++++++------- qa/tasks/cephfs/test_admin.py | 5 ++-- qa/tasks/cephfs/test_data_scan.py | 2 +- qa/tasks/cephfs/test_misc.py | 4 ++-- 4 files changed, 37 insertions(+), 14 deletions(-) diff --git a/qa/tasks/cephfs/filesystem.py b/qa/tasks/cephfs/filesystem.py index ce91435b5bf47..f48ff146ede4a 100644 --- a/qa/tasks/cephfs/filesystem.py +++ b/qa/tasks/cephfs/filesystem.py @@ -634,9 +634,14 @@ class Filesystem(MDSCluster): c = ["fs", "required_client_features", self.name, *args] return self.mon_manager.run_cluster_cmd(args=c, **kwargs) - # In Octopus+, the PG count can be omitted to use the default. We keep the - # hard-coded value for deployments of Mimic/Nautilus. - pgs_per_fs_pool = 8 + # Since v15.1.0 the pg autoscale mode has been enabled as default, + # will let the pg autoscale mode to calculate the pg_num as needed. + # We set the pg_num_min to 64 to make sure that pg autoscale mode + # won't set the pg_num to low to fix Tracker#45434. + pg_num = 64 + pg_num_min = 64 + target_size_ratio = 0.9 + target_size_ratio_ec = 0.9 def create(self): if self.name is None: @@ -648,13 +653,22 @@ class Filesystem(MDSCluster): else: data_pool_name = self.data_pool_name + # will use the ec pool to store the data and a small amount of + # metadata still goes to the primary data pool for all files. + if not self.metadata_overlay and self.ec_profile and 'disabled' not in self.ec_profile: + self.target_size_ratio = 0.05 + log.debug("Creating filesystem '{0}'".format(self.name)) self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', - self.metadata_pool_name, self.pgs_per_fs_pool.__str__()) + self.metadata_pool_name, str(self.pg_num), + '--pg_num_min', str(self.pg_num_min)) self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', - data_pool_name, self.pgs_per_fs_pool.__str__()) + data_pool_name, str(self.pg_num), + '--pg_num_min', str(self.pg_num_min), + '--target_size_ratio', + str(self.target_size_ratio)) if self.metadata_overlay: self.mon_manager.raw_cluster_cmd('fs', 'new', @@ -673,9 +687,10 @@ class Filesystem(MDSCluster): cmd.extend(self.ec_profile) self.mon_manager.raw_cluster_cmd(*cmd) self.mon_manager.raw_cluster_cmd( - 'osd', 'pool', 'create', - ec_data_pool_name, self.pgs_per_fs_pool.__str__(), 'erasure', - ec_data_pool_name) + 'osd', 'pool', 'create', ec_data_pool_name, + 'erasure', ec_data_pool_name, + '--pg_num_min', str(self.pg_num_min), + '--target_size_ratio', str(self.target_size_ratio_ec)) self.mon_manager.raw_cluster_cmd( 'osd', 'pool', 'set', ec_data_pool_name, 'allow_ec_overwrites', 'true') @@ -829,7 +844,8 @@ class Filesystem(MDSCluster): def add_data_pool(self, name, create=True): if create: - self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', name, self.pgs_per_fs_pool.__str__()) + self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', name, + '--pg_num_min', str(self.pg_num_min)) self.mon_manager.raw_cluster_cmd('fs', 'add_data_pool', self.name, name) self.get_pool_names(refresh = True) for poolid, fs_name in self.data_pools.items(): @@ -882,6 +898,12 @@ class Filesystem(MDSCluster): raise RuntimeError("can't set filesystem name if its fscid is set") self.data_pool_name = name + def get_pool_pg_num(self, pool_name): + pgs = json.loads(self.mon_manager.raw_cluster_cmd('osd', 'pool', 'get', + pool_name, 'pg_num', + '--format=json-pretty')) + return int(pgs['pg_num']) + def get_namespace_id(self): return self.id diff --git a/qa/tasks/cephfs/test_admin.py b/qa/tasks/cephfs/test_admin.py index 2e71c06b1e2c2..25fe5c4992f7d 100644 --- a/qa/tasks/cephfs/test_admin.py +++ b/qa/tasks/cephfs/test_admin.py @@ -88,7 +88,8 @@ class TestAdminCommands(CephFSTestCase): """ pool_name = "foo" mon_cmd = self.fs.mon_manager.raw_cluster_cmd - mon_cmd('osd', 'pool', 'create', pool_name, str(self.fs.pgs_per_fs_pool)) + mon_cmd('osd', 'pool', 'create', pool_name, '--pg_num_min', + str(self.fs.pg_num_min)) # Check whether https://tracker.ceph.com/issues/43061 is fixed mon_cmd('osd', 'pool', 'application', 'enable', pool_name, 'cephfs') self.fs.add_data_pool(pool_name, create=False) @@ -191,7 +192,7 @@ class TestAdminCommands(CephFSTestCase): pool_names = [fs_name+'-'+key for key in keys] mon_cmd = self.fs.mon_manager.raw_cluster_cmd for p in pool_names: - mon_cmd('osd', 'pool', 'create', p, str(self.fs.pgs_per_fs_pool)) + mon_cmd('osd', 'pool', 'create', p, '--pg_num_min', str(self.fs.pg_num_min)) mon_cmd('osd', 'pool', 'application', 'enable', p, 'cephfs') mon_cmd('fs', 'new', fs_name, pool_names[0], pool_names[1]) for i in range(2): diff --git a/qa/tasks/cephfs/test_data_scan.py b/qa/tasks/cephfs/test_data_scan.py index 2aa7398ec6f63..dcb7eda40a56d 100644 --- a/qa/tasks/cephfs/test_data_scan.py +++ b/qa/tasks/cephfs/test_data_scan.py @@ -545,7 +545,7 @@ class TestDataScan(CephFSTestCase): pgs_to_files[pgid].append(file_path) log.info("{0}: {1}".format(file_path, pgid)) - pg_count = self.fs.pgs_per_fs_pool + pg_count = self.fs.get_pool_pg_num(self.fs.get_data_pool_name()) for pg_n in range(0, pg_count): pg_str = "{0}.{1:x}".format(self.fs.get_data_pool_id(), pg_n) out = self.fs.data_scan(["pg_files", "mydir", pg_str]) diff --git a/qa/tasks/cephfs/test_misc.py b/qa/tasks/cephfs/test_misc.py index cf3b1a1bf8cd8..a6cb3da4efce9 100644 --- a/qa/tasks/cephfs/test_misc.py +++ b/qa/tasks/cephfs/test_misc.py @@ -82,7 +82,7 @@ class TestMisc(CephFSTestCase): '--yes-i-really-really-mean-it') self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', self.fs.metadata_pool_name, - self.fs.pgs_per_fs_pool.__str__()) + '--pg_num_min', str(self.fs.pg_num_min)) # insert a garbage object self.fs.radosm(["put", "foo", "-"], stdin=StringIO("bar")) @@ -119,7 +119,7 @@ class TestMisc(CephFSTestCase): '--yes-i-really-really-mean-it') self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', self.fs.metadata_pool_name, - self.fs.pgs_per_fs_pool.__str__()) + '--pg_num_min', str(self.fs.pg_num_min)) self.fs.mon_manager.raw_cluster_cmd('fs', 'new', self.fs.name, self.fs.metadata_pool_name, data_pool_name) -- 2.39.5