From 8624259a32ce64c3aff2bfcdca1852015a43d0cb Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Thu, 21 Dec 2023 10:39:03 -0500 Subject: [PATCH] qa: test devicehealth legacy load of deleted snap obj Failure without fix looks like: 2023-12-21T16:05:55.737+0000 7fbe585b0700 0 [devicehealth DEBUG root] loading object ABC_DEADB33F_FA 2023-12-21T16:05:55.737+0000 7fbe585b0700 -1 log_channel(cluster) log [ERR] : Unhandled exception from module 'devicehealth' while running on mgr.x: [errno 2] RADOS object not found (Failed to operate read op for oid ABC_DEADB33F_FA) 2023-12-21T16:05:55.737+0000 7fbe585b0700 -1 devicehealth.serve: 2023-12-21T16:05:55.737+0000 7fbe585b0700 -1 Traceback (most recent call last): File "/home/pdonnell/ceph/src/pybind/mgr/devicehealth/module.py", line 394, in serve self._do_serve() File "/home/pdonnell/ceph/src/pybind/mgr/mgr_module.py", line 524, in check return func(self, *args, **kwargs) File "/home/pdonnell/ceph/src/pybind/mgr/devicehealth/module.py", line 354, in _do_serve finished_loading_legacy = self.check_legacy_pool() File "/home/pdonnell/ceph/src/pybind/mgr/devicehealth/module.py", line 326, in check_legacy_pool if self._load_legacy_object(ioctx, obj.key): File "/home/pdonnell/ceph/src/pybind/mgr/devicehealth/module.py", line 300, in _load_legacy_object ioctx.operate_read_op(op, oid) File "rados.pyx", line 3723, in rados.Ioctx.operate_read_op rados.ObjectNotFound: [errno 2] RADOS object not found (Failed to operate read op for oid ABC_DEADB33F_FA) Credit to Greg Farnum for postulating the cause. Signed-off-by: Patrick Donnelly (cherry picked from commit aa30adbaa7616c8e24b3ccaad9dbcda7c0c663fc) --- qa/tasks/mgr/test_devicehealth.py | 33 +++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 qa/tasks/mgr/test_devicehealth.py diff --git a/qa/tasks/mgr/test_devicehealth.py b/qa/tasks/mgr/test_devicehealth.py new file mode 100644 index 0000000000000..d3aa33fc0951e --- /dev/null +++ b/qa/tasks/mgr/test_devicehealth.py @@ -0,0 +1,33 @@ +from io import StringIO +import logging + +from .mgr_test_case import MgrTestCase + +log = logging.getLogger(__name__) + + +class TestDeviceHealth(MgrTestCase): + MGRS_REQUIRED = 1 + + def setUp(self): + super(TestDeviceHealth, self).setUp() + self.setup_mgrs() + + def tearDown(self): + self.mgr_cluster.mon_manager.raw_cluster_cmd('mgr', 'set', 'down', 'true') + self.mgr_cluster.mon_manager.raw_cluster_cmd('config', 'set', 'mon', 'mon_allow_pool_delete', 'true') + self.mgr_cluster.mon_manager.raw_cluster_cmd('osd', 'pool', 'rm', '.mgr', '.mgr', '--yes-i-really-really-mean-it-not-faking') + self.mgr_cluster.mon_manager.raw_cluster_cmd('mgr', 'set', 'down', 'false') + + def test_legacy_upgrade_snap(self): + """ + """ + + o = "ABC_DEADB33F_FA" + self.mon_manager.do_rados(["put", o, "-"], pool=".mgr", stdin=StringIO("junk")) + self.mon_manager.do_rados(["mksnap", "foo"], pool=".mgr") + self.mon_manager.do_rados(["rm", o], pool=".mgr") + self.mgr_cluster.mgr_fail() + + with self.assert_cluster_log("Unhandled exception from module 'devicehealth' while running", present=False): + self.wait_until_true(lambda: self.mgr_cluster.get_active_id() is not None, timeout=60) -- 2.39.5