From 37c74e6e5274208d1b6efaf315afec03ea7eaa82 Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Thu, 7 May 2015 12:51:49 -0400 Subject: [PATCH] librbd: retry lock requests periodically until acquired If the exclusive lock owner acks the lock release request but crashes before it actually releases the lock, the requestor will wait forever. Therefore, after a certain timeout, retry the request again until it succeeds. Fixes: #11537 Backport: hammer Signed-off-by: Jason Dillaman --- src/librbd/ImageWatcher.cc | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/librbd/ImageWatcher.cc b/src/librbd/ImageWatcher.cc index 82e45db169378..6cfa6d7c5626c 100644 --- a/src/librbd/ImageWatcher.cc +++ b/src/librbd/ImageWatcher.cc @@ -626,6 +626,13 @@ void ImageWatcher::notify_request_lock() { lderr(m_image_ctx.cct) << "error requesting lock: " << cpp_strerror(r) << dendl; schedule_retry_aio_requests(true); + } else { + // lock owner acked -- but resend if we don't see them release the lock + int retry_timeout = m_image_ctx.cct->_conf->client_notify_timeout; + FunctionContext *ctx = new FunctionContext( + boost::bind(&ImageWatcher::notify_request_lock, this)); + m_task_finisher->add_event_after(TASK_CODE_REQUEST_LOCK, + retry_timeout, ctx); } } @@ -777,6 +784,7 @@ void ImageWatcher::handle_payload(const HeaderUpdatePayload &payload, void ImageWatcher::handle_payload(const AcquiredLockPayload &payload, bufferlist *out) { ldout(m_image_ctx.cct, 10) << "image exclusively locked announcement" << dendl; + m_task_finisher->cancel(TASK_CODE_REQUEST_LOCK); if (payload.client_id.is_valid()) { Mutex::Locker l(m_owner_client_id_lock); if (payload.client_id == m_owner_client_id) { @@ -796,6 +804,7 @@ void ImageWatcher::handle_payload(const AcquiredLockPayload &payload, void ImageWatcher::handle_payload(const ReleasedLockPayload &payload, bufferlist *out) { ldout(m_image_ctx.cct, 10) << "exclusive lock released" << dendl; + m_task_finisher->cancel(TASK_CODE_REQUEST_LOCK); if (payload.client_id.is_valid()) { Mutex::Locker l(m_owner_client_id_lock); if (payload.client_id != m_owner_client_id) { -- 2.39.5