From: Mykola Golub Date: Sun, 6 Sep 2020 12:48:53 +0000 (+0100) Subject: librbd: track complete async operation requests X-Git-Tag: v17.0.0~1146^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=07fbc4b71df450655dec046c10e919dbfde989ba;p=ceph.git librbd: track complete async operation requests to prevent duplicate maintenance operations due to RPC hiccups. Fixes: https://tracker.ceph.com/issues/46803 Signed-off-by: Mykola Golub --- diff --git a/src/librbd/ImageWatcher.cc b/src/librbd/ImageWatcher.cc index 11291813f536f..575a5fe3229a1 100644 --- a/src/librbd/ImageWatcher.cc +++ b/src/librbd/ImageWatcher.cc @@ -167,7 +167,7 @@ void ImageWatcher::handle_async_complete(const AsyncRequestId &request, } std::unique_lock async_request_locker{m_async_request_lock}; - m_async_pending.erase(request); + mark_async_request_complete(request); m_async_op_tracker.finish_op(); } @@ -593,6 +593,36 @@ void ImageWatcher::notify_lock_owner(Payload *payload, Context *on_finish) { notify_lock_owner->send(); } +template +bool ImageWatcher::is_new_request(const AsyncRequestId &id) const { + ceph_assert(ceph_mutex_is_locked(m_async_request_lock)); + + return m_async_pending.count(id) == 0 && m_async_complete.count(id) == 0; +} + +template +bool ImageWatcher::mark_async_request_complete(const AsyncRequestId &id) { + ceph_assert(ceph_mutex_is_locked(m_async_request_lock)); + + bool found = m_async_pending.erase(id); + + auto now = ceph_clock_now(); + + auto it = m_async_complete_expiration.begin(); + while (it != m_async_complete_expiration.end() && it->first < now) { + m_async_complete.erase(it->second); + it = m_async_complete_expiration.erase(it); + } + + if (m_async_complete.insert(id).second) { + auto expiration_time = now; + expiration_time += 600; + m_async_complete_expiration.insert({expiration_time, id}); + } + + return found; +} + template Context *ImageWatcher::remove_async_request(const AsyncRequestId &id) { std::unique_lock async_request_locker{m_async_request_lock}; @@ -673,7 +703,7 @@ int ImageWatcher::prepare_async_request(const AsyncRequestId& async_request_i return -ERESTART; } else { std::unique_lock l{m_async_request_lock}; - if (m_async_pending.count(async_request_id) == 0) { + if (is_new_request(async_request_id)) { m_async_pending.insert(async_request_id); *new_request = true; *prog_ctx = new RemoteProgressContext(*this, async_request_id); @@ -692,7 +722,7 @@ Context *ImageWatcher::prepare_quiesce_request( auto timeout = 2 * watcher::Notifier::NOTIFY_TIMEOUT / 1000; - if (m_async_pending.count(request) != 0) { + if (!is_new_request(request)) { auto it = m_async_requests.find(request); if (it != m_async_requests.end()) { delete it->second.first; @@ -723,7 +753,7 @@ Context *ImageWatcher::prepare_quiesce_request( auto on_finish = new LambdaContext( [this, request](int r) { std::unique_lock async_request_locker{m_async_request_lock}; - m_async_pending.erase(request); + mark_async_request_complete(request); }); m_image_ctx.state->notify_unquiesce(on_finish); @@ -744,7 +774,7 @@ template Context *ImageWatcher::prepare_unquiesce_request(const AsyncRequestId &request) { { std::unique_lock async_request_locker{m_async_request_lock}; - bool found = m_async_pending.erase(request); + bool found = mark_async_request_complete(request); if (!found) { ldout(m_image_ctx.cct, 20) << this << " " << request << ": not found in pending" << dendl; diff --git a/src/librbd/ImageWatcher.h b/src/librbd/ImageWatcher.h index 098e1fd6725b4..ac0232d891ea5 100644 --- a/src/librbd/ImageWatcher.h +++ b/src/librbd/ImageWatcher.h @@ -169,6 +169,9 @@ private: ceph::shared_mutex m_async_request_lock; std::map m_async_requests; std::set m_async_pending; + std::set m_async_complete; + std::set> m_async_complete_expiration; ceph::mutex m_owner_client_id_lock; watch_notify::ClientId m_owner_client_id; @@ -188,6 +191,8 @@ private: void notify_lock_owner(watch_notify::Payload *payload, Context *on_finish); + bool is_new_request(const watch_notify::AsyncRequestId &id) const; + bool mark_async_request_complete(const watch_notify::AsyncRequestId &id); Context *remove_async_request(const watch_notify::AsyncRequestId &id); void schedule_async_request_timed_out(const watch_notify::AsyncRequestId &id); void async_request_timed_out(const watch_notify::AsyncRequestId &id);