]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
pybind/mgr/volumes: log mutex locks to help debug deadlocks 53916/head
authorKotresh HR <khiremat@redhat.com>
Thu, 10 Aug 2023 10:02:23 +0000 (15:32 +0530)
committerKotresh HR <khiremat@redhat.com>
Tue, 10 Oct 2023 12:31:54 +0000 (18:01 +0530)
This patch logs the mutex locks which were missed logging
as part of the commit cf2a1ad65120. Refer [1] for more
details.

[1] https://tracker.ceph.com/issues/49605#note-5
Signed-off-by: Kotresh HR <khiremat@redhat.com>
(cherry picked from commit 5e689f8792da712d1fcc61d07b135267aed7e3a8)

src/pybind/mgr/volumes/fs/async_job.py

index dca95ff2286fa8807d8a9d4702a3a02ae0da70f5..0006724e0be0e83e13dd4de64944b818c24fbd81 100644 (file)
@@ -34,7 +34,7 @@ class JobThread(threading.Thread):
             vol_job = None
             try:
                 # fetch next job to execute
-                with self.async_job.lock:
+                with lock_timeout_log(self.async_job.lock):
                     while True:
                         if self.should_reconfigure_num_threads():
                             log.info("thread [{0}] terminating due to reconfigure".format(thread_name))
@@ -63,12 +63,12 @@ class JobThread(threading.Thread):
             finally:
                 # when done, unregister the job
                 if vol_job:
-                    with self.async_job.lock:
+                    with lock_timeout_log(self.async_job.lock):
                         self.async_job.unregister_async_job(vol_job[0], vol_job[1], thread_id)
                 time.sleep(1)
         log.error("thread [{0}] reached exception limit, bailing out...".format(thread_name))
         self.vc.cluster_log("thread {0} bailing out due to exception".format(thread_name))
-        with self.async_job.lock:
+        with lock_timeout_log(self.async_job.lock):
             self.async_job.threads.remove(self)
 
     def should_reconfigure_num_threads(self):
@@ -148,7 +148,7 @@ class AsyncJobs(threading.Thread):
     def shutdown(self):
         self.stopping.set()
         self.cancel_all_jobs()
-        with self.lock:
+        with lock_timeout_log(self.lock):
             self.cv.notifyAll()
         self.join()