]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
librbd: retry lock requests periodically until acquired
authorJason Dillaman <dillaman@redhat.com>
Thu, 7 May 2015 16:51:49 +0000 (12:51 -0400)
committerJason Dillaman <dillaman@redhat.com>
Fri, 5 Jun 2015 16:29:32 +0000 (12:29 -0400)
If the exclusive lock owner acks the lock release request but crashes
before it actually releases the lock, the requestor will wait forever.
Therefore, after a certain timeout, retry the request again until it
succeeds.

Fixes: #11537
Backport: hammer
Signed-off-by: Jason Dillaman <dillaman@redhat.com>
src/librbd/ImageWatcher.cc

index 82e45db1693781dab3b37e9b9aab7dc91d8a876b..6cfa6d7c5626ca675652d98b1e280b3174679bfe 100644 (file)
@@ -626,6 +626,13 @@ void ImageWatcher::notify_request_lock() {
     lderr(m_image_ctx.cct) << "error requesting lock: " << cpp_strerror(r)
                           << dendl;
     schedule_retry_aio_requests(true);
+  } else {
+    // lock owner acked -- but resend if we don't see them release the lock
+    int retry_timeout = m_image_ctx.cct->_conf->client_notify_timeout;
+    FunctionContext *ctx = new FunctionContext(
+      boost::bind(&ImageWatcher::notify_request_lock, this));
+    m_task_finisher->add_event_after(TASK_CODE_REQUEST_LOCK,
+                                     retry_timeout, ctx);
   }
 }
 
@@ -777,6 +784,7 @@ void ImageWatcher::handle_payload(const HeaderUpdatePayload &payload,
 void ImageWatcher::handle_payload(const AcquiredLockPayload &payload,
                                   bufferlist *out) {
   ldout(m_image_ctx.cct, 10) << "image exclusively locked announcement" << dendl;
+  m_task_finisher->cancel(TASK_CODE_REQUEST_LOCK);
   if (payload.client_id.is_valid()) {
     Mutex::Locker l(m_owner_client_id_lock);
     if (payload.client_id == m_owner_client_id) {
@@ -796,6 +804,7 @@ void ImageWatcher::handle_payload(const AcquiredLockPayload &payload,
 void ImageWatcher::handle_payload(const ReleasedLockPayload &payload,
                                   bufferlist *out) {
   ldout(m_image_ctx.cct, 10) << "exclusive lock released" << dendl;
+  m_task_finisher->cancel(TASK_CODE_REQUEST_LOCK);
   if (payload.client_id.is_valid()) {
     Mutex::Locker l(m_owner_client_id_lock);
     if (payload.client_id != m_owner_client_id) {