From: Jason Dillaman Date: Thu, 7 May 2015 16:51:49 +0000 (-0400) Subject: librbd: retry lock requests periodically until acquired X-Git-Tag: v9.0.2~15^2~6 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=37c74e6e5274208d1b6efaf315afec03ea7eaa82;p=ceph.git librbd: retry lock requests periodically until acquired If the exclusive lock owner acks the lock release request but crashes before it actually releases the lock, the requestor will wait forever. Therefore, after a certain timeout, retry the request again until it succeeds. Fixes: #11537 Backport: hammer Signed-off-by: Jason Dillaman --- diff --git a/src/librbd/ImageWatcher.cc b/src/librbd/ImageWatcher.cc index 82e45db16937..6cfa6d7c5626 100644 --- a/src/librbd/ImageWatcher.cc +++ b/src/librbd/ImageWatcher.cc @@ -626,6 +626,13 @@ void ImageWatcher::notify_request_lock() { lderr(m_image_ctx.cct) << "error requesting lock: " << cpp_strerror(r) << dendl; schedule_retry_aio_requests(true); + } else { + // lock owner acked -- but resend if we don't see them release the lock + int retry_timeout = m_image_ctx.cct->_conf->client_notify_timeout; + FunctionContext *ctx = new FunctionContext( + boost::bind(&ImageWatcher::notify_request_lock, this)); + m_task_finisher->add_event_after(TASK_CODE_REQUEST_LOCK, + retry_timeout, ctx); } } @@ -777,6 +784,7 @@ void ImageWatcher::handle_payload(const HeaderUpdatePayload &payload, void ImageWatcher::handle_payload(const AcquiredLockPayload &payload, bufferlist *out) { ldout(m_image_ctx.cct, 10) << "image exclusively locked announcement" << dendl; + m_task_finisher->cancel(TASK_CODE_REQUEST_LOCK); if (payload.client_id.is_valid()) { Mutex::Locker l(m_owner_client_id_lock); if (payload.client_id == m_owner_client_id) { @@ -796,6 +804,7 @@ void ImageWatcher::handle_payload(const AcquiredLockPayload &payload, void ImageWatcher::handle_payload(const ReleasedLockPayload &payload, bufferlist *out) { ldout(m_image_ctx.cct, 10) << "exclusive lock released" << dendl; + m_task_finisher->cancel(TASK_CODE_REQUEST_LOCK); if (payload.client_id.is_valid()) { Mutex::Locker l(m_owner_client_id_lock); if (payload.client_id != m_owner_client_id) {