From 704c0e09479d435ecbb84693a786c60694732b19 Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Thu, 7 May 2015 12:51:49 -0400 Subject: [PATCH] librbd: retry lock requests periodically until acquired If the exclusive lock owner acks the lock release request but crashes before it actually releases the lock, the requestor will wait forever. Therefore, after a certain timeout, retry the request again until it succeeds. Fixes: #11537 Backport: hammer Signed-off-by: Jason Dillaman (cherry picked from commit 37c74e6e5274208d1b6efaf315afec03ea7eaa82) --- src/librbd/ImageWatcher.cc | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/librbd/ImageWatcher.cc b/src/librbd/ImageWatcher.cc index 798b4320343ea..477e9cdb32bbf 100644 --- a/src/librbd/ImageWatcher.cc +++ b/src/librbd/ImageWatcher.cc @@ -602,6 +602,13 @@ void ImageWatcher::notify_request_lock() { lderr(m_image_ctx.cct) << "error requesting lock: " << cpp_strerror(r) << dendl; schedule_retry_aio_requests(true); + } else { + // lock owner acked -- but resend if we don't see them release the lock + int retry_timeout = m_image_ctx.cct->_conf->client_notify_timeout; + FunctionContext *ctx = new FunctionContext( + boost::bind(&ImageWatcher::notify_request_lock, this)); + m_task_finisher->add_event_after(TASK_CODE_REQUEST_LOCK, + retry_timeout, ctx); } } @@ -728,6 +735,7 @@ void ImageWatcher::handle_payload(const HeaderUpdatePayload &payload, void ImageWatcher::handle_payload(const AcquiredLockPayload &payload, bufferlist *out) { ldout(m_image_ctx.cct, 10) << "image exclusively locked announcement" << dendl; + m_task_finisher->cancel(TASK_CODE_REQUEST_LOCK); if (payload.client_id.is_valid()) { Mutex::Locker l(m_owner_client_id_lock); if (payload.client_id == m_owner_client_id) { @@ -747,6 +755,7 @@ void ImageWatcher::handle_payload(const AcquiredLockPayload &payload, void ImageWatcher::handle_payload(const ReleasedLockPayload &payload, bufferlist *out) { ldout(m_image_ctx.cct, 10) << "exclusive lock released" << dendl; + m_task_finisher->cancel(TASK_CODE_REQUEST_LOCK); if (payload.client_id.is_valid()) { Mutex::Locker l(m_owner_client_id_lock); if (payload.client_id != m_owner_client_id) { -- 2.39.5