From: Connor Fawcett Date: Fri, 10 Oct 2025 11:28:31 +0000 (+0100) Subject: qa/erasure-code: Teach the OSDThrasher to enable allow_ec_optimizations on pools X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fheads%2Fteuthology-optimizations-off-on;p=ceph-ci.git qa/erasure-code: Teach the OSDThrasher to enable allow_ec_optimizations on pools --- diff --git a/qa/suites/rados/thrash-erasure-code-big/ec_optimizations/ec_optimizations_off_then_on.yaml b/qa/suites/rados/thrash-erasure-code-big/ec_optimizations/ec_optimizations_off_then_on.yaml new file mode 100644 index 00000000000..c61dd038d96 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-big/ec_optimizations/ec_optimizations_off_then_on.yaml @@ -0,0 +1,8 @@ +overrides: + ceph: + conf: + global: + enable experimental unrecoverable data corrupting features: '*' + osd_pool_default_flag_ec_optimizations: false + thrashosds: + ec_optimizations_off_then_on: true diff --git a/qa/suites/rados/thrash-erasure-code-crush-4-nodes/ec_optimizations/ec_optimizations_off_then_on.yaml b/qa/suites/rados/thrash-erasure-code-crush-4-nodes/ec_optimizations/ec_optimizations_off_then_on.yaml new file mode 100644 index 00000000000..c61dd038d96 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-crush-4-nodes/ec_optimizations/ec_optimizations_off_then_on.yaml @@ -0,0 +1,8 @@ +overrides: + ceph: + conf: + global: + enable experimental unrecoverable data corrupting features: '*' + osd_pool_default_flag_ec_optimizations: false + thrashosds: + ec_optimizations_off_then_on: true diff --git a/qa/suites/rados/thrash-erasure-code-isa/ec_optimizations/ec_optimizations_off_then_on.yaml b/qa/suites/rados/thrash-erasure-code-isa/ec_optimizations/ec_optimizations_off_then_on.yaml new file mode 100644 index 00000000000..c61dd038d96 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-isa/ec_optimizations/ec_optimizations_off_then_on.yaml @@ -0,0 +1,8 @@ +overrides: + ceph: + conf: + global: + enable experimental unrecoverable data corrupting features: '*' + osd_pool_default_flag_ec_optimizations: false + thrashosds: + ec_optimizations_off_then_on: true diff --git a/qa/suites/rados/thrash-erasure-code-overwrites/ec_optimizations/ec_optimizations_off_then_on.yaml b/qa/suites/rados/thrash-erasure-code-overwrites/ec_optimizations/ec_optimizations_off_then_on.yaml new file mode 100644 index 00000000000..c61dd038d96 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-overwrites/ec_optimizations/ec_optimizations_off_then_on.yaml @@ -0,0 +1,8 @@ +overrides: + ceph: + conf: + global: + enable experimental unrecoverable data corrupting features: '*' + osd_pool_default_flag_ec_optimizations: false + thrashosds: + ec_optimizations_off_then_on: true diff --git a/qa/suites/rados/thrash-erasure-code-shec/ec_optimizations/ec_optimizations_off_then_on.yaml b/qa/suites/rados/thrash-erasure-code-shec/ec_optimizations/ec_optimizations_off_then_on.yaml new file mode 100644 index 00000000000..c61dd038d96 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-shec/ec_optimizations/ec_optimizations_off_then_on.yaml @@ -0,0 +1,8 @@ +overrides: + ceph: + conf: + global: + enable experimental unrecoverable data corrupting features: '*' + osd_pool_default_flag_ec_optimizations: false + thrashosds: + ec_optimizations_off_then_on: true diff --git a/qa/suites/rados/thrash-erasure-code/ec_optimizations/ec_optimizations_off_then_on.yaml b/qa/suites/rados/thrash-erasure-code/ec_optimizations/ec_optimizations_off_then_on.yaml new file mode 100644 index 00000000000..cd95c465e40 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code/ec_optimizations/ec_optimizations_off_then_on.yaml @@ -0,0 +1,9 @@ +overrides: + ceph: + conf: + global: + enable experimental unrecoverable data corrupting features: '*' + osd_pool_default_flag_ec_optimizations: false + ec_optimizations_on_then_off: true + thrashosds: + ec_optimizations_off_then_on: true diff --git a/qa/tasks/ceph_manager.py b/qa/tasks/ceph_manager.py index 0f7e92c5c2f..18289857e07 100644 --- a/qa/tasks/ceph_manager.py +++ b/qa/tasks/ceph_manager.py @@ -231,6 +231,7 @@ class OSDThrasher(Thrasher): self.optrack_toggle_delay = self.config.get('optrack_toggle_delay') self.dump_ops_enable = self.config.get('dump_ops_enable') self.noscrub_toggle_delay = self.config.get('noscrub_toggle_delay') + self.enable_ec_opts_delay = self.config.get('enable_ec_opts_delay', random.uniform(300, 900)) self.chance_thrash_cluster_full = self.config.get('chance_thrash_cluster_full', .05) self.chance_thrash_pg_upmap = self.config.get('chance_thrash_pg_upmap', 1.0) self.chance_thrash_pg_upmap_items = self.config.get('chance_thrash_pg_upmap', 1.0) @@ -238,6 +239,7 @@ class OSDThrasher(Thrasher): self.chance_force_recovery = self.config.get('chance_force_recovery', 0.3) self.chance_reset_purged_snaps_last = self.config.get('chance_reset_purged_snaps_last', 0.3) self.chance_trim_stale_osdmaps = self.config.get('chance_trim_stale_osdmaps', 0.3) + self.enable_ec_opts = self.config.get('ec_optimizations_off_then_on', False) num_osds = self.in_osds + self.out_osds self.max_pgs = self.config.get("max_pgs_per_pool_osd", 1200) * len(num_osds) @@ -283,6 +285,11 @@ class OSDThrasher(Thrasher): self.dump_ops_thread = gevent.spawn(self.do_dump_ops) if self.noscrub_toggle_delay: self.noscrub_toggle_thread = gevent.spawn(self.do_noscrub_toggle) + if self.enable_ec_opts_delay: + # need delay to let some objects be written before enabling opts + self.log("enable_ec_opts detected by thrasher") + self.enable_ec_opts_thread = gevent.spawn_later(self.enable_ec_opts_delay, + self.do_enable_ec_opts) def log(self, msg, *args, **kwargs): self.logger.info(msg, *args, **kwargs) @@ -893,6 +900,9 @@ class OSDThrasher(Thrasher): if self.noscrub_toggle_delay: self.log("joining the do_noscrub_toggle greenlet") self.noscrub_toggle_thread.join() + if self.enable_ec_opts_delay: + self.log("joining the do_enable_ec_opts greenlet") + self.enable_ec_opts_thread.join() def stop_and_join(self): """ @@ -1459,6 +1469,22 @@ class OSDThrasher(Thrasher): self.ceph_manager.raw_cluster_cmd('osd', 'unset', 'noscrub') self.ceph_manager.raw_cluster_cmd('osd', 'unset', 'nodeep-scrub') + @log_exc + def do_enable_ec_opts(self): + """ + Loop through pools and enable allow_ec_optimizations on + any EC pools with optimizations disabled. + """ + self.log('Inside do_enable_ec_opts') + for pool in self.ceph_manager.list_pools(): + opts_enabled = self.ceph_manager.get_pool_property(self, pool, 'allow_ec_optimizations') + # Pools with opts enabled will return 'true', non ec pools will return an error + if opts_enabled is 'false': + self.ceph_manager.set_pool_property(self, pool, 'allow_ec_optimizations', 'true') + self.log('do_enable_ec_opts: Enabled ec optimizations on pool %s' % pool) + else: + self.log('do_enable_ec_opts: Unable to enable ec optimizations on pool %s, ignoring' % pool) + @log_exc def _do_thrash(self): """