From: Jason Dillaman Date: Thu, 7 May 2015 16:51:49 +0000 (-0400) Subject: librbd: retry lock requests periodically until acquired X-Git-Tag: v0.94.4~76^2~6 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=704c0e09479d435ecbb84693a786c60694732b19;p=ceph.git librbd: retry lock requests periodically until acquired If the exclusive lock owner acks the lock release request but crashes before it actually releases the lock, the requestor will wait forever. Therefore, after a certain timeout, retry the request again until it succeeds. Fixes: #11537 Backport: hammer Signed-off-by: Jason Dillaman (cherry picked from commit 37c74e6e5274208d1b6efaf315afec03ea7eaa82) --- diff --git a/src/librbd/ImageWatcher.cc b/src/librbd/ImageWatcher.cc index 798b4320343ea..477e9cdb32bbf 100644 --- a/src/librbd/ImageWatcher.cc +++ b/src/librbd/ImageWatcher.cc @@ -602,6 +602,13 @@ void ImageWatcher::notify_request_lock() { lderr(m_image_ctx.cct) << "error requesting lock: " << cpp_strerror(r) << dendl; schedule_retry_aio_requests(true); + } else { + // lock owner acked -- but resend if we don't see them release the lock + int retry_timeout = m_image_ctx.cct->_conf->client_notify_timeout; + FunctionContext *ctx = new FunctionContext( + boost::bind(&ImageWatcher::notify_request_lock, this)); + m_task_finisher->add_event_after(TASK_CODE_REQUEST_LOCK, + retry_timeout, ctx); } } @@ -728,6 +735,7 @@ void ImageWatcher::handle_payload(const HeaderUpdatePayload &payload, void ImageWatcher::handle_payload(const AcquiredLockPayload &payload, bufferlist *out) { ldout(m_image_ctx.cct, 10) << "image exclusively locked announcement" << dendl; + m_task_finisher->cancel(TASK_CODE_REQUEST_LOCK); if (payload.client_id.is_valid()) { Mutex::Locker l(m_owner_client_id_lock); if (payload.client_id == m_owner_client_id) { @@ -747,6 +755,7 @@ void ImageWatcher::handle_payload(const AcquiredLockPayload &payload, void ImageWatcher::handle_payload(const ReleasedLockPayload &payload, bufferlist *out) { ldout(m_image_ctx.cct, 10) << "exclusive lock released" << dendl; + m_task_finisher->cancel(TASK_CODE_REQUEST_LOCK); if (payload.client_id.is_valid()) { Mutex::Locker l(m_owner_client_id_lock); if (payload.client_id != m_owner_client_id) {