]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/volumes: access volume in lockless mode when fetching async job
authorVenky Shankar <vshankar@redhat.com>
Wed, 19 Feb 2020 12:31:40 +0000 (07:31 -0500)
committerVenky Shankar <vshankar@redhat.com>
Mon, 24 Feb 2020 09:27:02 +0000 (04:27 -0500)
Saw a deadlock when deleting lot of subvolumes -- purge threads were
stuck in accessing global lock for volume access. This can happen
when there is a concurrent remove (which renames and signals the
purge threads) and a purge thread is just about to scan the trash
directory for entries.

For the fix, purge threads fetches entries by accessing the volume
in lockless mode. This is safe from functionality point-of-view as
the rename and directory scan is correctly handled by the filesystem.
Worst case the purge thread would pick up the trash entry on next
scan, never leaving a stale trash entry.

Signed-off-by: Venky Shankar <vshankar@redhat.com>
src/pybind/mgr/volumes/fs/async_cloner.py
src/pybind/mgr/volumes/fs/purge_queue.py

index c67dbde1abef33d858a253e27b7fce9d88333f89..debefd17f5a2f09137c663984940d5cc8135d654 100644 (file)
@@ -24,7 +24,7 @@ def get_next_clone_entry(volume_client, volname, running_jobs):
     log.debug("fetching clone entry for volume '{0}'".format(volname))
 
     try:
-        with open_volume(volume_client, volname) as fs_handle:
+        with open_volume_lockless(volume_client, volname) as fs_handle:
             try:
                 with open_clone_index(fs_handle, volume_client.volspec) as clone_index:
                     job = clone_index.get_oldest_clone_entry(running_jobs)
index 922c8f0027688a323f60a3323baaa72e12c6999f..41052ba303d658999b4ce67b85d2d050b7236eb2 100644 (file)
@@ -13,7 +13,7 @@ def get_trash_entry_for_volume(volume_client, volname, running_jobs):
     log.debug("fetching trash entry for volume '{0}'".format(volname))
 
     try:
-        with open_volume(volume_client, volname) as fs_handle:
+        with open_volume_lockless(volume_client, volname) as fs_handle:
             try:
                 with open_trashcan(fs_handle, volume_client.volspec) as trashcan:
                     path = trashcan.get_trash_entry(running_jobs)