From: Ronen Friedman <rfriedma@redhat.com>
Date: Thu, 16 Apr 2026 18:02:21 +0000 (+0000)
Subject: qa/tasks/ceph_objectstore_tool.py: add gc_before_restart option
X-Git-Tag: v21.0.1~366^2~2
X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=8ef4aacfcf70691b01f9e879ea3451836cd5fa86;p=ceph.git

qa/tasks/ceph_objectstore_tool.py: add gc_before_restart option

The objectstore tool tests restart the OSDs without allowing enough
time for GC to run, which can lead to no-OOL-segments conditions on restart. This
adds a gc_before_restart option to the test config, which when set
to true will run crimson-objectstore-tool --op gc on each OSD
before restarting them.

Fixes: https://tracker.ceph.com/issues/73101

Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
---

diff --git a/qa/suites/crimson-rados/objectstore_tool/tasks/crimson_obejctstore_tool.yaml b/qa/suites/crimson-rados/objectstore_tool/tasks/crimson_obejctstore_tool.yaml
index 3640f61aede..c212c451237 100644
--- a/qa/suites/crimson-rados/objectstore_tool/tasks/crimson_obejctstore_tool.yaml
+++ b/qa/suites/crimson-rados/objectstore_tool/tasks/crimson_obejctstore_tool.yaml
@@ -11,6 +11,7 @@ overrides:
 tasks:
 - ceph_objectstore_tool:
     crimson_objectstore_tool: true
+    gc_before_restart: true
     objects: 5
     pgnum: 8
 
diff --git a/qa/tasks/ceph_objectstore_tool.py b/qa/tasks/ceph_objectstore_tool.py
index c5d86f0215f..84e6e8ca4a1 100644
--- a/qa/tasks/ceph_objectstore_tool.py
+++ b/qa/tasks/ceph_objectstore_tool.py
@@ -169,6 +169,7 @@ def task(ctx, config):
           objects: 20 # <number of objects>
           pgnum: 12
           crimson_objectstore_tool: true # use crimson-objectstore-tool instead of ceph-objectstore-tool
+          gc_before_restart: true # run crimson-objectstore-tool --op gc before restarting OSDs
     """
 
     if config is None:
@@ -702,6 +703,23 @@ def test_objectstore(ctx, config, cli_remote, REP_POOL, REP_NAME, ec=False):
     ERRORS += IMP_ERRORS
 
     if EXP_ERRORS == 0 and RM_ERRORS == 0 and IMP_ERRORS == 0:
+        if CRIMSON and config.get('gc_before_restart', False):
+            # Run GC on each OSD's seastore to reclaim segments consumed
+            # by repeated tool mount/unmount cycles before restarting.
+            log.info("Running GC on each OSD store...")
+            for remote in osds.remotes.keys():
+                for role in osds.remotes[remote]:
+                    if not role.startswith("osd."):
+                        continue
+                    osdid = int(role.split('.')[1])
+                    cmd = (prefix + "--op gc").format(id=osdid)
+                    try:
+                        remote.sh(cmd, wait=True)
+                    except CommandFailedError as e:
+                        log.warning(
+                            "GC failed on osd.{id} with {ret}".format(
+                                id=osdid, ret=e.exitstatus))
+
         log.info("Restarting OSDs....")
         # They are still look to be up because of setting nodown
         for osd in manager.get_osd_status()['up']: