From 808a1ce1f96f6dfd3472156ce5087372da4c1314 Mon Sep 17 00:00:00 2001 From: Venky Shankar Date: Wed, 19 Feb 2020 07:31:40 -0500 Subject: [PATCH] mgr/volumes: access volume in lockless mode when fetching async job Saw a deadlock when deleting lot of subvolumes -- purge threads were stuck in accessing global lock for volume access. This can happen when there is a concurrent remove (which renames and signals the purge threads) and a purge thread is just about to scan the trash directory for entries. For the fix, purge threads fetches entries by accessing the volume in lockless mode. This is safe from functionality point-of-view as the rename and directory scan is correctly handled by the filesystem. Worst case the purge thread would pick up the trash entry on next scan, never leaving a stale trash entry. Signed-off-by: Venky Shankar --- src/pybind/mgr/volumes/fs/async_cloner.py | 2 +- src/pybind/mgr/volumes/fs/purge_queue.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pybind/mgr/volumes/fs/async_cloner.py b/src/pybind/mgr/volumes/fs/async_cloner.py index c67dbde1abe..debefd17f5a 100644 --- a/src/pybind/mgr/volumes/fs/async_cloner.py +++ b/src/pybind/mgr/volumes/fs/async_cloner.py @@ -24,7 +24,7 @@ def get_next_clone_entry(volume_client, volname, running_jobs): log.debug("fetching clone entry for volume '{0}'".format(volname)) try: - with open_volume(volume_client, volname) as fs_handle: + with open_volume_lockless(volume_client, volname) as fs_handle: try: with open_clone_index(fs_handle, volume_client.volspec) as clone_index: job = clone_index.get_oldest_clone_entry(running_jobs) diff --git a/src/pybind/mgr/volumes/fs/purge_queue.py b/src/pybind/mgr/volumes/fs/purge_queue.py index 922c8f00276..41052ba303d 100644 --- a/src/pybind/mgr/volumes/fs/purge_queue.py +++ b/src/pybind/mgr/volumes/fs/purge_queue.py @@ -13,7 +13,7 @@ def get_trash_entry_for_volume(volume_client, volname, running_jobs): log.debug("fetching trash entry for volume '{0}'".format(volname)) try: - with open_volume(volume_client, volname) as fs_handle: + with open_volume_lockless(volume_client, volname) as fs_handle: try: with open_trashcan(fs_handle, volume_client.volspec) as trashcan: path = trashcan.get_trash_entry(running_jobs) -- 2.39.5