From: Ramana Raja Date: Thu, 26 Oct 2023 17:18:52 +0000 (-0400) Subject: mgr/rbd_support: fix recursive locking on CreateSnapshotRequests lock X-Git-Tag: v16.2.15~156^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=ac3df15ccecf4b3cd61dc77644b6c8682f69f2e9;p=ceph.git mgr/rbd_support: fix recursive locking on CreateSnapshotRequests lock The MirrorSnapshotScheduleHandler's run thread issues asynchronous create snapshot requests using a CreateSnapshotRequests instance. When the thread invokes a CreateSnapshotRequests instance's get_ioctx(), the instance's class variable lock is acquired. With the class variable lock held, the garbage collection of a CreateSnapshotRequests instance may race in the thread. The thread would then call CreateSnapshotRequests __del__() that tries to acquire the class variable lock that the thread already holds. Fix this recursive deadlock by converting the CreateSnapshotRequests lock from a class variable to an instance variable. There is no need to share the lock across CreateSnapshotRequests instances. Also convert MirrorSnapshotScheduleHandler, PerfHandler and TrashPurgeScheduleHandler class variables to instance variables that don't need to be shared across the instances. Fixes: https://tracker.ceph.com/issues/62994 Signed-off-by: Ramana Raja Co-Authored-By: Ilya Dryomov (cherry picked from commit 4452bc22d1c6c8499cf55d6e39090adf7ae1dcbf) Conflicts: src/pybind/mgr/rbd_support/mirror_snapshot_schedule.py src/pybind/mgr/rbd_support/perf.py src/pybind/mgr/rbd_support/trash_purge_schedule.py - Above conflicts were due to commit e4a16e2 ("mgr/rbd_support: add type annotation") not in pacific --- diff --git a/src/pybind/mgr/rbd_support/mirror_snapshot_schedule.py b/src/pybind/mgr/rbd_support/mirror_snapshot_schedule.py index 42ecdbbe0cf3..69a8f677f862 100644 --- a/src/pybind/mgr/rbd_support/mirror_snapshot_schedule.py +++ b/src/pybind/mgr/rbd_support/mirror_snapshot_schedule.py @@ -25,10 +25,9 @@ def image_validator(image): class CreateSnapshotRequests: - lock = Lock() - condition = Condition(lock) - def __init__(self, handler): + self.lock = Lock() + self.condition = Condition(self.lock) self.handler = handler self.rados = handler.module.rados self.log = handler.log @@ -306,10 +305,9 @@ class MirrorSnapshotScheduleHandler: SCHEDULE_OID = "rbd_mirror_snapshot_schedule" REFRESH_DELAY_SECONDS = 60.0 - lock = Lock() - condition = Condition(lock) - def __init__(self, module): + self.lock = Lock() + self.condition = Condition(self.lock) self.module = module self.log = module.log self.last_refresh_images = datetime(1970, 1, 1) diff --git a/src/pybind/mgr/rbd_support/perf.py b/src/pybind/mgr/rbd_support/perf.py index 1a26119b1204..469309c60f1e 100644 --- a/src/pybind/mgr/rbd_support/perf.py +++ b/src/pybind/mgr/rbd_support/perf.py @@ -39,15 +39,6 @@ REPORT_MAX_RESULTS = 64 class PerfHandler: - user_queries = {} - image_cache = {} - - lock = Lock() - query_condition = Condition(lock) - refresh_condition = Condition(lock) - - image_name_cache = {} - image_name_refresh_time = datetime.fromtimestamp(0) @classmethod def prepare_regex(cls, value): @@ -85,6 +76,16 @@ class PerfHandler: and (pool_key[0] == search_key[0] or not search_key[0])) def __init__(self, module): + self.user_queries = {} + self.image_cache = {} + + self.lock = Lock() + self.query_condition = Condition(self.lock) + self.refresh_condition = Condition(self.lock) + + self.image_name_cache = {} + self.image_name_refresh_time = datetime.fromtimestamp(0) + self.module = module self.log = module.log diff --git a/src/pybind/mgr/rbd_support/trash_purge_schedule.py b/src/pybind/mgr/rbd_support/trash_purge_schedule.py index cbbdb1321b28..9b8e7b9e8c05 100644 --- a/src/pybind/mgr/rbd_support/trash_purge_schedule.py +++ b/src/pybind/mgr/rbd_support/trash_purge_schedule.py @@ -17,10 +17,9 @@ class TrashPurgeScheduleHandler: SCHEDULE_OID = "rbd_trash_purge_schedule" REFRESH_DELAY_SECONDS = 60.0 - lock = Lock() - condition = Condition(lock) - def __init__(self, module): + self.lock = Lock() + self.condition = Condition(self.lock) self.module = module self.log = module.log self.last_refresh_pools = datetime(1970, 1, 1)