From: Ronen Friedman Date: Thu, 16 Apr 2026 18:02:21 +0000 (+0000) Subject: qa/tasks/ceph_objectstore_tool.py: add gc_before_restart option X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=8ef4aacfcf70691b01f9e879ea3451836cd5fa86;p=ceph.git qa/tasks/ceph_objectstore_tool.py: add gc_before_restart option The objectstore tool tests restart the OSDs without allowing enough time for GC to run, which can lead to no-OOL-segments conditions on restart. This adds a gc_before_restart option to the test config, which when set to true will run crimson-objectstore-tool --op gc on each OSD before restarting them. Fixes: https://tracker.ceph.com/issues/73101 Signed-off-by: Ronen Friedman --- diff --git a/qa/suites/crimson-rados/objectstore_tool/tasks/crimson_obejctstore_tool.yaml b/qa/suites/crimson-rados/objectstore_tool/tasks/crimson_obejctstore_tool.yaml index 3640f61aedef..c212c451237f 100644 --- a/qa/suites/crimson-rados/objectstore_tool/tasks/crimson_obejctstore_tool.yaml +++ b/qa/suites/crimson-rados/objectstore_tool/tasks/crimson_obejctstore_tool.yaml @@ -11,6 +11,7 @@ overrides: tasks: - ceph_objectstore_tool: crimson_objectstore_tool: true + gc_before_restart: true objects: 5 pgnum: 8 diff --git a/qa/tasks/ceph_objectstore_tool.py b/qa/tasks/ceph_objectstore_tool.py index c5d86f0215fb..84e6e8ca4a1c 100644 --- a/qa/tasks/ceph_objectstore_tool.py +++ b/qa/tasks/ceph_objectstore_tool.py @@ -169,6 +169,7 @@ def task(ctx, config): objects: 20 # pgnum: 12 crimson_objectstore_tool: true # use crimson-objectstore-tool instead of ceph-objectstore-tool + gc_before_restart: true # run crimson-objectstore-tool --op gc before restarting OSDs """ if config is None: @@ -702,6 +703,23 @@ def test_objectstore(ctx, config, cli_remote, REP_POOL, REP_NAME, ec=False): ERRORS += IMP_ERRORS if EXP_ERRORS == 0 and RM_ERRORS == 0 and IMP_ERRORS == 0: + if CRIMSON and config.get('gc_before_restart', False): + # Run GC on each OSD's seastore to reclaim segments consumed + # by repeated tool mount/unmount cycles before restarting. + log.info("Running GC on each OSD store...") + for remote in osds.remotes.keys(): + for role in osds.remotes[remote]: + if not role.startswith("osd."): + continue + osdid = int(role.split('.')[1]) + cmd = (prefix + "--op gc").format(id=osdid) + try: + remote.sh(cmd, wait=True) + except CommandFailedError as e: + log.warning( + "GC failed on osd.{id} with {ret}".format( + id=osdid, ret=e.exitstatus)) + log.info("Restarting OSDs....") # They are still look to be up because of setting nodown for osd in manager.get_osd_status()['up']: