The crash and status mgr modules return positive errno values (e.g.
errno.EINVAL) on error. However, the ceph CLI (src/ceph.in) only
treats negative return codes as errors:
if ret < 0:
final_ret = -ret
A positive return code falls through and the CLI exits with 0, masking
the error. This causes commands like "ceph crash post" and "ceph crash
info <bad-id>" to return success (rc=0) despite failing, with the
error message only appearing on stderr.
This was observed in the wild and worked around in
a77b47eeeb57, which added a
stderr != "" check to ceph-crash's post_crash() as a mitigation.
Fix the root cause by negating the errno values in both modules,
consistent with the HandleCommandResult convention
(retval: "E.g. 0 or -errno.EINVAL") and the 154 other call sites
across mgr modules that already use negative errno.
Also add error-path tests for the crash module to cover the three
affected commands (info, post, archive).
See-also:
a77b47eeeb5770eeefcf4619ab2105ee7a6a003e
Fixes: https://tracker.ceph.com/issues/75937
Signed-off-by: Kefu Chai <k.chai@proxmox.com>
'crash', 'ls',
)
self.assertNotIn(self.oldest_crashid, retstr)
+
+ def test_info_nonexistent(self):
+ rc = self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
+ 'crash', 'info', 'does-not-exist',
+ )
+ self.assertNotEqual(0, rc)
+
+ def test_post_malformed(self):
+ rc = self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
+ 'crash', 'post', '-i', '-',
+ stdin='{"not_a_valid_crash": true}',
+ )
+ self.assertNotEqual(0, rc)
+
+ def test_archive_nonexistent(self):
+ rc = self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
+ 'crash', 'archive', 'does-not-exist',
+ )
+ self.assertNotEqual(0, rc)
assert self.crashes is not None
crash = self.crashes.get(crashid)
if not crash:
- return errno.EINVAL, '', 'crash info: %s not found' % crashid
+ return -errno.EINVAL, '', 'crash info: %s not found' % crashid
val = json.dumps(crash, indent=4, sort_keys=True)
return 0, val, ''
try:
metadata = self.validate_crash_metadata(inbuf)
except Exception as e:
- return errno.EINVAL, '', 'malformed crash metadata: %s' % e
+ return -errno.EINVAL, '', 'malformed crash metadata: %s' % e
if 'backtrace' in metadata:
backtrace = cast(List[str], metadata.get('backtrace'))
assert_msg = cast(Optional[str], metadata.get('assert_msg'))
assert self.crashes is not None
crash = self.crashes.get(crashid)
if not crash:
- return errno.EINVAL, '', 'crash info: %s not found' % crashid
+ return -errno.EINVAL, '', 'crash info: %s not found' % crashid
if not crash.get('archived'):
crash['archived'] = str(datetime.datetime.utcnow())
self.crashes[crashid] = crash
output += "\n" + pools_table.get_string() + "\n"
if not output and not json_output and fs_filter is not None:
- return errno.EINVAL, "", "Invalid filesystem: " + fs_filter
+ return -errno.EINVAL, "", "Invalid filesystem: " + fs_filter
standby_table = PrettyTable(["STANDBY MDS"], border=False)
standby_table.left_padding_width = 0
if not found:
msg = "Bucket '{0}' not found".format(bucket_filter)
- return errno.ENOENT, msg, ""
+ return -errno.ENOENT, msg, ""
# Build dict of OSD ID to stats
osd_stats = dict([(o['osd'], o) for o in self.get("osd_stats")['osd_stats']])