From: Ramana Raja Date: Thu, 26 Oct 2023 17:18:52 +0000 (-0400) Subject: mgr/rbd_support: fix recursive locking on CreateSnapshotRequests lock X-Git-Tag: v18.2.1~49^2~1 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=734a26af5db695c2fe28fe572719484284d3ebb3;p=ceph-ci.git mgr/rbd_support: fix recursive locking on CreateSnapshotRequests lock The MirrorSnapshotScheduleHandler's run thread issues asynchronous create snapshot requests using a CreateSnapshotRequests instance. When the thread invokes a CreateSnapshotRequests instance's get_ioctx(), the instance's class variable lock is acquired. With the class variable lock held, the garbage collection of a CreateSnapshotRequests instance may race in the thread. The thread would then call CreateSnapshotRequests __del__() that tries to acquire the class variable lock that the thread already holds. Fix this recursive deadlock by converting the CreateSnapshotRequests lock from a class variable to an instance variable. There is no need to share the lock across CreateSnapshotRequests instances. Also convert MirrorSnapshotScheduleHandler, PerfHandler and TrashPurgeScheduleHandler class variables to instance variables that don't need to be shared across the instances. Fixes: https://tracker.ceph.com/issues/62994 Signed-off-by: Ramana Raja Co-Authored-By: Ilya Dryomov (cherry picked from commit 4452bc22d1c6c8499cf55d6e39090adf7ae1dcbf) --- diff --git a/src/pybind/mgr/rbd_support/mirror_snapshot_schedule.py b/src/pybind/mgr/rbd_support/mirror_snapshot_schedule.py index 120b59318ba..0ddc2e6e734 100644 --- a/src/pybind/mgr/rbd_support/mirror_snapshot_schedule.py +++ b/src/pybind/mgr/rbd_support/mirror_snapshot_schedule.py @@ -33,10 +33,9 @@ class ImageSpec(NamedTuple): class CreateSnapshotRequests: - lock = Lock() - condition = Condition(lock) - def __init__(self, handler: Any) -> None: + self.lock = Lock() + self.condition = Condition(self.lock) self.handler = handler self.rados = handler.module.rados self.log = handler.log @@ -331,10 +330,9 @@ class MirrorSnapshotScheduleHandler: SCHEDULE_OID = "rbd_mirror_snapshot_schedule" REFRESH_DELAY_SECONDS = 60.0 - lock = Lock() - condition = Condition(lock) - def __init__(self, module: Any) -> None: + self.lock = Lock() + self.condition = Condition(self.lock) self.module = module self.log = module.log self.last_refresh_images = datetime(1970, 1, 1) diff --git a/src/pybind/mgr/rbd_support/perf.py b/src/pybind/mgr/rbd_support/perf.py index 68cbbd3b5f4..20815721de5 100644 --- a/src/pybind/mgr/rbd_support/perf.py +++ b/src/pybind/mgr/rbd_support/perf.py @@ -65,15 +65,6 @@ ExtractDataFuncT = Callable[[int, Optional[RawImageCounterT], SumImageCounterT], class PerfHandler: - user_queries: Dict[PoolKeyT, Dict[str, Any]] = {} - image_cache: Dict[str, str] = {} - - lock = Lock() - query_condition = Condition(lock) - refresh_condition = Condition(lock) - - image_name_cache: Dict[Tuple[int, str], Dict[str, str]] = {} - image_name_refresh_time = datetime.fromtimestamp(0) @classmethod def prepare_regex(cls, value: Any) -> str: @@ -114,6 +105,16 @@ class PerfHandler: and (pool_key[0] == search_key[0] or not search_key[0])) def __init__(self, module: Any) -> None: + self.user_queries: Dict[PoolKeyT, Dict[str, Any]] = {} + self.image_cache: Dict[str, str] = {} + + self.lock = Lock() + self.query_condition = Condition(self.lock) + self.refresh_condition = Condition(self.lock) + + self.image_name_cache: Dict[Tuple[int, str], Dict[str, str]] = {} + self.image_name_refresh_time = datetime.fromtimestamp(0) + self.module = module self.log = module.log diff --git a/src/pybind/mgr/rbd_support/trash_purge_schedule.py b/src/pybind/mgr/rbd_support/trash_purge_schedule.py index b2f7b1614f1..abc50ec394f 100644 --- a/src/pybind/mgr/rbd_support/trash_purge_schedule.py +++ b/src/pybind/mgr/rbd_support/trash_purge_schedule.py @@ -16,10 +16,9 @@ class TrashPurgeScheduleHandler: SCHEDULE_OID = "rbd_trash_purge_schedule" REFRESH_DELAY_SECONDS = 60.0 - lock = Lock() - condition = Condition(lock) - def __init__(self, module: Any) -> None: + self.lock = Lock() + self.condition = Condition(self.lock) self.module = module self.log = module.log self.last_refresh_pools = datetime(1970, 1, 1)