From: Kefu Chai Date: Thu, 9 Apr 2026 12:37:40 +0000 (+0800) Subject: mgr/crash, mgr/status: return negative errno to fix CLI exit code X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=7041f6b6b310165beee46ed45a0d34908716d159;p=ceph.git mgr/crash, mgr/status: return negative errno to fix CLI exit code The crash and status mgr modules return positive errno values (e.g. errno.EINVAL) on error. However, the ceph CLI (src/ceph.in) only treats negative return codes as errors: if ret < 0: final_ret = -ret A positive return code falls through and the CLI exits with 0, masking the error. This causes commands like "ceph crash post" and "ceph crash info " to return success (rc=0) despite failing, with the error message only appearing on stderr. This was observed in the wild and worked around in a77b47eeeb57, which added a stderr != "" check to ceph-crash's post_crash() as a mitigation. Fix the root cause by negating the errno values in both modules, consistent with the HandleCommandResult convention (retval: "E.g. 0 or -errno.EINVAL") and the 154 other call sites across mgr modules that already use negative errno. Also add error-path tests for the crash module to cover the three affected commands (info, post, archive). See-also: a77b47eeeb5770eeefcf4619ab2105ee7a6a003e Fixes: https://tracker.ceph.com/issues/75937 Signed-off-by: Kefu Chai --- diff --git a/qa/tasks/mgr/test_crash.py b/qa/tasks/mgr/test_crash.py index 49191127f24b..bc3378bebb51 100644 --- a/qa/tasks/mgr/test_crash.py +++ b/qa/tasks/mgr/test_crash.py @@ -106,3 +106,22 @@ class TestCrash(MgrTestCase): 'crash', 'ls', ) self.assertNotIn(self.oldest_crashid, retstr) + + def test_info_nonexistent(self): + rc = self.mgr_cluster.mon_manager.raw_cluster_cmd_result( + 'crash', 'info', 'does-not-exist', + ) + self.assertNotEqual(0, rc) + + def test_post_malformed(self): + rc = self.mgr_cluster.mon_manager.raw_cluster_cmd_result( + 'crash', 'post', '-i', '-', + stdin='{"not_a_valid_crash": true}', + ) + self.assertNotEqual(0, rc) + + def test_archive_nonexistent(self): + rc = self.mgr_cluster.mon_manager.raw_cluster_cmd_result( + 'crash', 'archive', 'does-not-exist', + ) + self.assertNotEqual(0, rc) diff --git a/src/pybind/mgr/crash/module.py b/src/pybind/mgr/crash/module.py index 9aae5c9306f8..f3f6ff7b2256 100644 --- a/src/pybind/mgr/crash/module.py +++ b/src/pybind/mgr/crash/module.py @@ -228,7 +228,7 @@ class Module(MgrModule): assert self.crashes is not None crash = self.crashes.get(crashid) if not crash: - return errno.EINVAL, '', 'crash info: %s not found' % crashid + return -errno.EINVAL, '', 'crash info: %s not found' % crashid val = json.dumps(crash, indent=4, sort_keys=True) return 0, val, '' @@ -240,7 +240,7 @@ class Module(MgrModule): try: metadata = self.validate_crash_metadata(inbuf) except Exception as e: - return errno.EINVAL, '', 'malformed crash metadata: %s' % e + return -errno.EINVAL, '', 'malformed crash metadata: %s' % e if 'backtrace' in metadata: backtrace = cast(List[str], metadata.get('backtrace')) assert_msg = cast(Optional[str], metadata.get('assert_msg')) @@ -345,7 +345,7 @@ class Module(MgrModule): assert self.crashes is not None crash = self.crashes.get(crashid) if not crash: - return errno.EINVAL, '', 'crash info: %s not found' % crashid + return -errno.EINVAL, '', 'crash info: %s not found' % crashid if not crash.get('archived'): crash['archived'] = str(datetime.datetime.utcnow()) self.crashes[crashid] = crash diff --git a/src/pybind/mgr/status/module.py b/src/pybind/mgr/status/module.py index 7e7264a762f6..bae1136a3d1b 100644 --- a/src/pybind/mgr/status/module.py +++ b/src/pybind/mgr/status/module.py @@ -226,7 +226,7 @@ class Module(MgrModule): output += "\n" + pools_table.get_string() + "\n" if not output and not json_output and fs_filter is not None: - return errno.EINVAL, "", "Invalid filesystem: " + fs_filter + return -errno.EINVAL, "", "Invalid filesystem: " + fs_filter standby_table = PrettyTable(["STANDBY MDS"], border=False) standby_table.left_padding_width = 0 @@ -320,7 +320,7 @@ class Module(MgrModule): if not found: msg = "Bucket '{0}' not found".format(bucket_filter) - return errno.ENOENT, msg, "" + return -errno.ENOENT, msg, "" # Build dict of OSD ID to stats osd_stats = dict([(o['osd'], o) for o in self.get("osd_stats")['osd_stats']])