From eaa0ba00a3e8fa73bfb5d516a7743f6e3719cc01 Mon Sep 17 00:00:00 2001 From: Lucian Petrut Date: Fri, 24 May 2024 10:03:11 +0000 Subject: [PATCH] rbd-wnbd: wait for the disk cleanup to complete The WNBD disk removal workflow is asynchronous, which is why we'll need to wait for the cleanup to complete when stopping the service. The "disconnect_all_mappings" function is moved to RbdMappingDispatcher::stop, allowing us to access the mapping list more easily and reject new mappings after a stop has been requested. While at it, we'll log service stop requests. Signed-off-by: Lucian Petrut (cherry picked from commit 9136cbdecb520def4fdfbbf696e1802037cac510) --- src/common/win32/service.cc | 4 + src/tools/rbd_wnbd/rbd_mapping.cc | 132 ++++++++++++++++++++++++++++++ src/tools/rbd_wnbd/rbd_mapping.h | 8 ++ src/tools/rbd_wnbd/rbd_wnbd.cc | 78 ++---------------- src/tools/rbd_wnbd/rbd_wnbd.h | 5 -- 5 files changed, 149 insertions(+), 78 deletions(-) diff --git a/src/common/win32/service.cc b/src/common/win32/service.cc index 7cf7620bf87bc..5e86f1af90da0 100644 --- a/src/common/win32/service.cc +++ b/src/common/win32/service.cc @@ -86,6 +86,8 @@ void ServiceBase::shutdown(bool ignore_errors) DWORD original_state = status.dwCurrentState; set_status(SERVICE_STOP_PENDING); + dout(0) << "Shutdown requested." << dendl; + int err = shutdown_hook(); if (err) { derr << "Shutdown service hook failed. Error code: " << err << dendl; @@ -108,6 +110,8 @@ void ServiceBase::stop() DWORD original_state = status.dwCurrentState; set_status(SERVICE_STOP_PENDING); + dout(0) << "Service stop requested." << dendl; + int err = stop_hook(); if (err) { derr << "Service stop hook failed. Error code: " << err << dendl; diff --git a/src/tools/rbd_wnbd/rbd_mapping.cc b/src/tools/rbd_wnbd/rbd_mapping.cc index 35e3b7718c885..287a529ae3731 100644 --- a/src/tools/rbd_wnbd/rbd_mapping.cc +++ b/src/tools/rbd_wnbd/rbd_mapping.cc @@ -232,6 +232,12 @@ int RbdMappingDispatcher::create(Config& cfg) return -EEXIST; } + if (stop_requested) { + derr << "service stop requested, refusing to create new mapping." + << dendl; + return -ESHUTDOWN; + } + auto rbd_mapping = std::make_shared( cfg, client_cache, std::bind( @@ -282,3 +288,129 @@ void RbdMappingDispatcher::disconnect_cbk(std::string devpath, int ret) mappings.erase(devpath); } } + +int RbdMappingDispatcher::stop( + bool hard_disconnect, + int soft_disconnect_timeout, + int worker_count) +{ + stop_requested = true; + + // Although not generally recommended, soft_disconnect_timeout can be 0, + // which means infinite timeout. + ceph_assert(soft_disconnect_timeout >= 0); + ceph_assert(worker_count > 0); + + std::atomic err = 0; + + boost::asio::thread_pool pool(worker_count); + LARGE_INTEGER start_t, counter_freq; + QueryPerformanceFrequency(&counter_freq); + QueryPerformanceCounter(&start_t); + + // We're initiating the disk removal through libwnbd, which uses PNP + // to notify the storage stack that the disk is going to be removed + // and waits for pending operations to complete. + // + // Once ready, the WNBD driver notifies rbd-wnbd that the disk has been + // disconnected. + auto mapped_devpaths = get_mapped_devpaths(); + for (const auto& devpath: mapped_devpaths) { + boost::asio::post(pool, + [devpath, start_t, counter_freq, soft_disconnect_timeout, + hard_disconnect, &err]() + { + LARGE_INTEGER curr_t, elapsed_ms; + QueryPerformanceCounter(&curr_t); + elapsed_ms.QuadPart = curr_t.QuadPart - start_t.QuadPart; + elapsed_ms.QuadPart *= 1000; + elapsed_ms.QuadPart /= counter_freq.QuadPart; + + int64_t time_left_ms = std::max( + (int64_t)0, + soft_disconnect_timeout * 1000 - elapsed_ms.QuadPart); + + dout(1) << "Removing mapping: " << devpath + << ". Timeout: " << time_left_ms + << "ms. Hard disconnect: " << hard_disconnect + << dendl; + + WNBD_REMOVE_OPTIONS remove_options = {0}; + remove_options.Flags.HardRemove = hard_disconnect || !time_left_ms; + remove_options.Flags.HardRemoveFallback = true; + remove_options.SoftRemoveTimeoutMs = time_left_ms; + remove_options.SoftRemoveRetryIntervalMs = SOFT_REMOVE_RETRY_INTERVAL * 1000; + + // This is asynchronous, it may take a few seconds for the disk to be + // removed. We'll perform the wait outside the loop to speed up the + // process. + int r = WnbdRemoveEx(devpath.c_str(), &remove_options); + if (r && r != ERROR_FILE_NOT_FOUND) { + err = -EINVAL; + derr << "Could not initiate mapping removal: " << devpath + << ". Error: " << win32_strerror(r) << dendl; + } else { + dout(1) << "Successfully initiated mapping removal: " << devpath << dendl; + } + }); + } + pool.join(); + + if (err) { + derr << "Could not initiate removal of all mappings. Error: " << err << dendl; + return err; + } + + // Wait for the cleanup to complete on the rbd-wnbd service side. + return wait_for_mappings_removal(10000); +} + +std::vector RbdMappingDispatcher::get_mapped_devpaths() { + std::vector out; + std::unique_lock l{map_mutex}; + + for (auto it = mappings.begin(); it != mappings.end(); it++) { + out.push_back(it->first); + } + + return out; +} + +int RbdMappingDispatcher::get_mappings_count() { + std::unique_lock l{map_mutex}; + return mappings.size(); +} + +int RbdMappingDispatcher::wait_for_mappings_removal(int timeout_ms) { + LARGE_INTEGER start_t, counter_freq; + QueryPerformanceFrequency(&counter_freq); + QueryPerformanceCounter(&start_t); + + int time_left_ms = timeout_ms; + int mappings_count = get_mappings_count(); + + while (mappings_count && time_left_ms > 0) { + dout(1) << "Waiting for " << mappings_count << " mapping(s) to be removed. " + << "Time left: " << time_left_ms << "ms." << dendl; + Sleep(2000); + + LARGE_INTEGER curr_t, elapsed_ms; + QueryPerformanceCounter(&curr_t); + elapsed_ms.QuadPart = curr_t.QuadPart - start_t.QuadPart; + elapsed_ms.QuadPart *= 1000; + elapsed_ms.QuadPart /= counter_freq.QuadPart; + + time_left_ms = timeout_ms - elapsed_ms.QuadPart; + mappings_count = get_mappings_count(); + } + + if (mappings_count) { + derr << "Timed out waiting for disk mappings to be removed. " + << "Remaining mapping(s): " << mappings_count << dendl; + return -ETIMEDOUT; + } else { + dout(1) << "Successfully removed all mappings." << dendl; + } + + return 0; +} diff --git a/src/tools/rbd_wnbd/rbd_mapping.h b/src/tools/rbd_wnbd/rbd_mapping.h index 1255880cb7937..de3e55eedc751 100644 --- a/src/tools/rbd_wnbd/rbd_mapping.h +++ b/src/tools/rbd_wnbd/rbd_mapping.h @@ -107,7 +107,12 @@ private: std::map> mappings; ceph::mutex map_mutex = ceph::make_mutex("RbdMappingDispatcher::MapMutex"); + std::atomic stop_requested = false; + void disconnect_cbk(std::string devpath, int ret); + int wait_for_mappings_removal(int timeout_ms); + std::vector get_mapped_devpaths(); + int get_mappings_count(); public: RbdMappingDispatcher(RadosClientCache& _client_cache) @@ -116,4 +121,7 @@ public: int create(Config& cfg); std::shared_ptr get_mapping(std::string& devpath); + int stop(bool hard_disconnect, + int soft_disconnect_timeout, + int worker_count); }; diff --git a/src/tools/rbd_wnbd/rbd_wnbd.cc b/src/tools/rbd_wnbd/rbd_wnbd.cc index 57691bb723600..2ebf2c04b24e8 100644 --- a/src/tools/rbd_wnbd/rbd_wnbd.cc +++ b/src/tools/rbd_wnbd/rbd_wnbd.cc @@ -482,74 +482,6 @@ int restart_registered_mappings( return err; } -int disconnect_all_mappings( - bool unregister, - bool hard_disconnect, - int soft_disconnect_timeout, - int worker_count) -{ - // Although not generally recommended, soft_disconnect_timeout can be 0, - // which means infinite timeout. - ceph_assert(soft_disconnect_timeout >= 0); - ceph_assert(worker_count > 0); - int64_t timeout_ms = soft_disconnect_timeout * 1000; - - Config cfg; - WNBDActiveDiskIterator iterator; - int r; - std::atomic err = 0; - - boost::asio::thread_pool pool(worker_count); - LARGE_INTEGER start_t, counter_freq; - QueryPerformanceFrequency(&counter_freq); - QueryPerformanceCounter(&start_t); - while (iterator.get(&cfg)) { - boost::asio::post(pool, - [cfg, start_t, counter_freq, timeout_ms, - hard_disconnect, unregister, &err]() mutable - { - LARGE_INTEGER curr_t, elapsed_ms; - QueryPerformanceCounter(&curr_t); - elapsed_ms.QuadPart = curr_t.QuadPart - start_t.QuadPart; - elapsed_ms.QuadPart *= 1000; - elapsed_ms.QuadPart /= counter_freq.QuadPart; - - int64_t time_left_ms = max((int64_t)0, timeout_ms - elapsed_ms.QuadPart); - - cfg.hard_disconnect = hard_disconnect || !time_left_ms; - cfg.hard_disconnect_fallback = true; - cfg.soft_disconnect_timeout = time_left_ms / 1000; - - dout(1) << "Removing mapping: " << cfg.devpath - << ". Timeout: " << cfg.soft_disconnect_timeout - << "s. Hard disconnect: " << cfg.hard_disconnect - << dendl; - - int r = do_unmap(&cfg, unregister); - if (r) { - err = r; - derr << "Could not remove mapping: " << cfg.devpath - << ". Error: " << r << dendl; - } else { - dout(1) << "Successfully removed mapping: " << cfg.devpath << dendl; - } - }); - } - pool.join(); - - r = iterator.get_error(); - if (r == -ENOENT) { - dout(0) << __func__ << ": wnbd adapter unavailable, " - << "assuming that no wnbd mappings exist." << dendl; - err = 0; - } else if (r) { - derr << "Could not fetch all mappings. Error: " << r << dendl; - err = r; - } - - return err; -} - class RBDService : public ServiceBase { private: bool hard_disconnect; @@ -565,7 +497,7 @@ class RBDService : public ServiceBase { ceph::mutex start_hook_lock = ceph::make_mutex("RBDService::StartLocker"); ceph::mutex stop_hook_lock = ceph::make_mutex("RBDService::ShutdownLocker"); bool started = false; - std::atomic stop_requsted = false; + std::atomic stop_requested = false; public: RBDService(bool _hard_disconnect, @@ -743,7 +675,7 @@ exit: dout(0) << "monitoring wnbd adapter state changes" << dendl; // The event watcher will wait at most WMI_EVENT_TIMEOUT (2s) // and exit the loop if the service is being stopped. - while (!stop_requsted) { + while (!stop_requested) { IWbemClassObject* object; ULONG returned = 0; @@ -826,10 +758,10 @@ exit: int stop_hook() override { std::unique_lock l{stop_hook_lock}; - stop_requsted = true; + stop_requested = true; - int r = disconnect_all_mappings( - false, hard_disconnect, soft_disconnect_timeout, thread_count); + int r = mapping_dispatcher.stop( + hard_disconnect, soft_disconnect_timeout, thread_count); if (adapter_monitor_thread.joinable()) { dout(10) << "waiting for wnbd event monitor thread" << dendl; diff --git a/src/tools/rbd_wnbd/rbd_wnbd.h b/src/tools/rbd_wnbd/rbd_wnbd.h index 6ec4851e8a7c0..efe80dd590182 100644 --- a/src/tools/rbd_wnbd/rbd_wnbd.h +++ b/src/tools/rbd_wnbd/rbd_wnbd.h @@ -55,11 +55,6 @@ typedef struct { bool is_process_running(DWORD pid); void unmap_at_exit(); -int disconnect_all_mappings( - bool unregister, - bool hard_disconnect, - int soft_disconnect_timeout, - int worker_count); int restart_registered_mappings( int worker_count, int total_timeout, int image_map_timeout); int map_device_using_same_process(std::string command_line); -- 2.39.5