]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
qa/erasure-code: Teach the OSDThrasher to enable allow_ec_optimizations on pools
authorConnor Fawcett <connorfa@uk.ibm.com>
Fri, 10 Oct 2025 11:28:31 +0000 (12:28 +0100)
committerConnor Fawcett <connorfa@uk.ibm.com>
Mon, 13 Oct 2025 11:11:57 +0000 (12:11 +0100)
qa/suites/rados/thrash-erasure-code-big/ec_optimizations/ec_optimizations_off_then_on.yaml [new file with mode: 0644]
qa/suites/rados/thrash-erasure-code-crush-4-nodes/ec_optimizations/ec_optimizations_off_then_on.yaml [new file with mode: 0644]
qa/suites/rados/thrash-erasure-code-isa/ec_optimizations/ec_optimizations_off_then_on.yaml [new file with mode: 0644]
qa/suites/rados/thrash-erasure-code-overwrites/ec_optimizations/ec_optimizations_off_then_on.yaml [new file with mode: 0644]
qa/suites/rados/thrash-erasure-code-shec/ec_optimizations/ec_optimizations_off_then_on.yaml [new file with mode: 0644]
qa/suites/rados/thrash-erasure-code/ec_optimizations/ec_optimizations_off_then_on.yaml [new file with mode: 0644]
qa/tasks/ceph_manager.py

diff --git a/qa/suites/rados/thrash-erasure-code-big/ec_optimizations/ec_optimizations_off_then_on.yaml b/qa/suites/rados/thrash-erasure-code-big/ec_optimizations/ec_optimizations_off_then_on.yaml
new file mode 100644 (file)
index 0000000..c61dd03
--- /dev/null
@@ -0,0 +1,8 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        enable experimental unrecoverable data corrupting features: '*'
+        osd_pool_default_flag_ec_optimizations: false
+  thrashosds:
+    ec_optimizations_off_then_on: true
diff --git a/qa/suites/rados/thrash-erasure-code-crush-4-nodes/ec_optimizations/ec_optimizations_off_then_on.yaml b/qa/suites/rados/thrash-erasure-code-crush-4-nodes/ec_optimizations/ec_optimizations_off_then_on.yaml
new file mode 100644 (file)
index 0000000..c61dd03
--- /dev/null
@@ -0,0 +1,8 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        enable experimental unrecoverable data corrupting features: '*'
+        osd_pool_default_flag_ec_optimizations: false
+  thrashosds:
+    ec_optimizations_off_then_on: true
diff --git a/qa/suites/rados/thrash-erasure-code-isa/ec_optimizations/ec_optimizations_off_then_on.yaml b/qa/suites/rados/thrash-erasure-code-isa/ec_optimizations/ec_optimizations_off_then_on.yaml
new file mode 100644 (file)
index 0000000..c61dd03
--- /dev/null
@@ -0,0 +1,8 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        enable experimental unrecoverable data corrupting features: '*'
+        osd_pool_default_flag_ec_optimizations: false
+  thrashosds:
+    ec_optimizations_off_then_on: true
diff --git a/qa/suites/rados/thrash-erasure-code-overwrites/ec_optimizations/ec_optimizations_off_then_on.yaml b/qa/suites/rados/thrash-erasure-code-overwrites/ec_optimizations/ec_optimizations_off_then_on.yaml
new file mode 100644 (file)
index 0000000..c61dd03
--- /dev/null
@@ -0,0 +1,8 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        enable experimental unrecoverable data corrupting features: '*'
+        osd_pool_default_flag_ec_optimizations: false
+  thrashosds:
+    ec_optimizations_off_then_on: true
diff --git a/qa/suites/rados/thrash-erasure-code-shec/ec_optimizations/ec_optimizations_off_then_on.yaml b/qa/suites/rados/thrash-erasure-code-shec/ec_optimizations/ec_optimizations_off_then_on.yaml
new file mode 100644 (file)
index 0000000..c61dd03
--- /dev/null
@@ -0,0 +1,8 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        enable experimental unrecoverable data corrupting features: '*'
+        osd_pool_default_flag_ec_optimizations: false
+  thrashosds:
+    ec_optimizations_off_then_on: true
diff --git a/qa/suites/rados/thrash-erasure-code/ec_optimizations/ec_optimizations_off_then_on.yaml b/qa/suites/rados/thrash-erasure-code/ec_optimizations/ec_optimizations_off_then_on.yaml
new file mode 100644 (file)
index 0000000..cd95c46
--- /dev/null
@@ -0,0 +1,9 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        enable experimental unrecoverable data corrupting features: '*'
+        osd_pool_default_flag_ec_optimizations: false
+        ec_optimizations_on_then_off: true
+  thrashosds:
+    ec_optimizations_off_then_on: true
index 0f7e92c5c2fb98af3c1dacbc43656e7fe53545e1..51671b084bafcb18d600376fafdb971c59c3e09a 100644 (file)
@@ -238,6 +238,7 @@ class OSDThrasher(Thrasher):
         self.chance_force_recovery = self.config.get('chance_force_recovery', 0.3)
         self.chance_reset_purged_snaps_last = self.config.get('chance_reset_purged_snaps_last', 0.3)
         self.chance_trim_stale_osdmaps = self.config.get('chance_trim_stale_osdmaps', 0.3)
+        self.ec_opts_off_then_on = self.config.get('ec_optimizations_off_then_on', False)
 
         num_osds = self.in_osds + self.out_osds
         self.max_pgs = self.config.get("max_pgs_per_pool_osd", 1200) * len(num_osds)
@@ -283,6 +284,12 @@ class OSDThrasher(Thrasher):
             self.dump_ops_thread = gevent.spawn(self.do_dump_ops)
         if self.noscrub_toggle_delay:
             self.noscrub_toggle_thread = gevent.spawn(self.do_noscrub_toggle)
+        if self.ec_opts_off_then_on:
+            # need delay to let some objects be written before enabling opts
+            self.log("ec_opts_off_then_on detected by thrasher")
+            delay = random.uniform(300, 900)
+            self.turn_on_opts_thread = gevent.spawn_later(delay,
+                                                          self.do_enable_ec_opts)
 
     def log(self, msg, *args, **kwargs):
         self.logger.info(msg, *args, **kwargs)
@@ -893,6 +900,9 @@ class OSDThrasher(Thrasher):
         if self.noscrub_toggle_delay:
             self.log("joining the do_noscrub_toggle greenlet")
             self.noscrub_toggle_thread.join()
+        if self.ec_opts_off_then_on:
+            self.log("joining the do_enable_ec_opts greenlet")
+            self.turn_on_opts_thread.join()
 
     def stop_and_join(self):
         """
@@ -1459,6 +1469,21 @@ class OSDThrasher(Thrasher):
         self.ceph_manager.raw_cluster_cmd('osd', 'unset', 'noscrub')
         self.ceph_manager.raw_cluster_cmd('osd', 'unset', 'nodeep-scrub')
 
+    @log_exc
+    def do_enable_ec_opts(self):
+        """
+        Loop through pools and enable allow_ec_optimizations on
+        any EC pools with optimizations disabled.
+        """
+        for pool in self.ceph_manager.pools:
+            opts_enabled = self.get_pool_property(self, pool, 'allow_ec_optimizations')
+            # Pools with opts enabled will return 'true', non ec pools will return an error
+            if opts_enabled is 'false':
+                set_pool_property(self, pool, 'allow_ec_optimizations', 'true')
+                self.log('Enabled ec optimizations on pool %s' % pool)
+            else:
+                self.log('Unable to enable ec optimizations on pool %s, ignoring' % pool)
+
     @log_exc
     def _do_thrash(self):
         """