]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/volumes: cluster log when a purge thread bails out 29735/head
authorVenky Shankar <vshankar@redhat.com>
Mon, 19 Aug 2019 07:45:25 +0000 (03:45 -0400)
committerVenky Shankar <vshankar@redhat.com>
Wed, 11 Sep 2019 13:08:49 +0000 (09:08 -0400)
Fixes: http://tracker.ceph.com/issues/41218
Signed-off-by: Venky Shankar <vshankar@redhat.com>
src/pybind/mgr/volumes/fs/purge_queue.py
src/pybind/mgr/volumes/fs/volume.py

index ac99f07418bb2102a2a4a52886d12ed35bfd2335..a76065cbab953da9f724c6ce538faf2df547f170 100644 (file)
@@ -19,7 +19,8 @@ class PurgeQueueBase(object):
     MAX_RETRIES_ON_EXCEPTION = 10
 
     class PurgeThread(threading.Thread):
-        def __init__(self, name, purge_fn):
+        def __init__(self, volume_client, name, purge_fn):
+            self.vc = volume_client
             self.purge_fn = purge_fn
             # event object to cancel ongoing purge
             self.cancel_event = threading.Event()
@@ -42,6 +43,7 @@ class PurgeQueueBase(object):
                         traceback.format_exception(exc_type, exc_value, exc_traceback))))
                     time.sleep(1)
             log.error("purge thread [{0}] reached exception limit, bailing out...".format(thread_name))
+            self.vc.cluster_log("purge thread {0} bailing out due to exception".format(thread_name))
 
         def cancel_job(self):
             self.cancel_event.set()
@@ -154,7 +156,7 @@ class ThreadPoolPurgeQueueMixin(PurgeQueueBase):
         self.threads = []
         for i in range(tp_size):
             self.threads.append(
-                PurgeQueueBase.PurgeThread(name="purgejob.{}".format(i), purge_fn=self.run))
+                PurgeQueueBase.PurgeThread(volume_client, name="purgejob.{}".format(i), purge_fn=self.run))
             self.threads[-1].start()
 
     def pick_purge_dir_from_volume(self):
index ea411bce980a05f4e80b328f12531c3f46a043a5..9beb1206e8ae9d95d663d6f3eca74978ce6c251a 100644 (file)
@@ -175,6 +175,14 @@ class VolumeClient(object):
         for fs in fs_map['filesystems']:
             self.purge_queue.queue_purge_job(fs['mdsmap']['fs_name'])
 
+    def cluster_log(self, msg, lvl=None):
+        """
+        log to cluster log with default log level as WARN.
+        """
+        if not lvl:
+            lvl = self.mgr.CLUSTER_LOG_PRIO_WARN
+        self.mgr.cluster_log("cluster", lvl, msg)
+
     def gen_pool_names(self, volname):
         """
         return metadata and data pool name (from a filesystem/volume name) as a tuple