]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
qa/tasks/ceph_objectstore_tool.py: add gc_before_restart option
authorRonen Friedman <rfriedma@redhat.com>
Thu, 16 Apr 2026 18:02:21 +0000 (18:02 +0000)
committerRonen Friedman <rfriedma@redhat.com>
Tue, 28 Apr 2026 05:11:06 +0000 (05:11 +0000)
The objectstore tool tests restart the OSDs without allowing enough
time for GC to run, which can lead to no-OOL-segments conditions on restart. This
adds a gc_before_restart option to the test config, which when set
to true will run crimson-objectstore-tool --op gc on each OSD
before restarting them.

Fixes: https://tracker.ceph.com/issues/73101
Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
qa/suites/crimson-rados/objectstore_tool/tasks/crimson_obejctstore_tool.yaml
qa/tasks/ceph_objectstore_tool.py

index 3640f61aedef5d23823fc6eaac32b303244fd879..c212c451237f64880736675da5ed4cc5ef6b5cce 100644 (file)
@@ -11,6 +11,7 @@ overrides:
 tasks:
 - ceph_objectstore_tool:
     crimson_objectstore_tool: true
+    gc_before_restart: true
     objects: 5
     pgnum: 8
 
index c5d86f0215fbe41e7b884e5fcd1722449d491ee8..84e6e8ca4a1c966c1534ce095e8bc22dc749c1cd 100644 (file)
@@ -169,6 +169,7 @@ def task(ctx, config):
           objects: 20 # <number of objects>
           pgnum: 12
           crimson_objectstore_tool: true # use crimson-objectstore-tool instead of ceph-objectstore-tool
+          gc_before_restart: true # run crimson-objectstore-tool --op gc before restarting OSDs
     """
 
     if config is None:
@@ -702,6 +703,23 @@ def test_objectstore(ctx, config, cli_remote, REP_POOL, REP_NAME, ec=False):
     ERRORS += IMP_ERRORS
 
     if EXP_ERRORS == 0 and RM_ERRORS == 0 and IMP_ERRORS == 0:
+        if CRIMSON and config.get('gc_before_restart', False):
+            # Run GC on each OSD's seastore to reclaim segments consumed
+            # by repeated tool mount/unmount cycles before restarting.
+            log.info("Running GC on each OSD store...")
+            for remote in osds.remotes.keys():
+                for role in osds.remotes[remote]:
+                    if not role.startswith("osd."):
+                        continue
+                    osdid = int(role.split('.')[1])
+                    cmd = (prefix + "--op gc").format(id=osdid)
+                    try:
+                        remote.sh(cmd, wait=True)
+                    except CommandFailedError as e:
+                        log.warning(
+                            "GC failed on osd.{id} with {ret}".format(
+                                id=osdid, ret=e.exitstatus))
+
         log.info("Restarting OSDs....")
         # They are still look to be up because of setting nodown
         for osd in manager.get_osd_status()['up']: