From: Kotresh HR Date: Thu, 10 Aug 2023 10:02:23 +0000 (+0530) Subject: pybind/mgr/volumes: log mutex locks to help debug deadlocks X-Git-Tag: v16.2.15~139^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=ea28a2a81e32e3f426a630f2e7c445523a5acb28;p=ceph.git pybind/mgr/volumes: log mutex locks to help debug deadlocks This patch logs the mutex locks which were missed logging as part of the commit cf2a1ad65120. Refer [1] for more details. [1] https://tracker.ceph.com/issues/49605#note-5 Signed-off-by: Kotresh HR (cherry picked from commit 5e689f8792da712d1fcc61d07b135267aed7e3a8) --- diff --git a/src/pybind/mgr/volumes/fs/async_job.py b/src/pybind/mgr/volumes/fs/async_job.py index dca95ff2286fa..0006724e0be0e 100644 --- a/src/pybind/mgr/volumes/fs/async_job.py +++ b/src/pybind/mgr/volumes/fs/async_job.py @@ -34,7 +34,7 @@ class JobThread(threading.Thread): vol_job = None try: # fetch next job to execute - with self.async_job.lock: + with lock_timeout_log(self.async_job.lock): while True: if self.should_reconfigure_num_threads(): log.info("thread [{0}] terminating due to reconfigure".format(thread_name)) @@ -63,12 +63,12 @@ class JobThread(threading.Thread): finally: # when done, unregister the job if vol_job: - with self.async_job.lock: + with lock_timeout_log(self.async_job.lock): self.async_job.unregister_async_job(vol_job[0], vol_job[1], thread_id) time.sleep(1) log.error("thread [{0}] reached exception limit, bailing out...".format(thread_name)) self.vc.cluster_log("thread {0} bailing out due to exception".format(thread_name)) - with self.async_job.lock: + with lock_timeout_log(self.async_job.lock): self.async_job.threads.remove(self) def should_reconfigure_num_threads(self): @@ -148,7 +148,7 @@ class AsyncJobs(threading.Thread): def shutdown(self): self.stopping.set() self.cancel_all_jobs() - with self.lock: + with lock_timeout_log(self.lock): self.cv.notifyAll() self.join()