]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
Objecter: pool_op callback may hang forever. 6588/head
authorxiexingguo <258156334@qq.com>
Thu, 29 Oct 2015 12:04:11 +0000 (20:04 +0800)
committerAbhishek Lekshmanan <abhishek.lekshmanan@ril.com>
Sat, 14 Nov 2015 18:53:47 +0000 (00:23 +0530)
pool_op callback may hang forever due to osdmap update during reply handling.
Fixes: #13642
Signed-off-by: xie xingguo <xie.xingguo@zte.com.cn>
(cherry picked from commit 00c6fa9e31975a935ed2bb33a099e2b4f02ad7f2)

src/osdc/Objecter.cc

index 9f35ffdda753cddd495b262bce8bb2b3ade350c6..faddcd4fc1db34805b1a467b5046300abd51863d 100644 (file)
@@ -3626,12 +3626,24 @@ void Objecter::handle_pool_op_reply(MPoolOpReply *m)
     if (osdmap->get_epoch() < m->epoch) {
       rwlock.unlock();
       rwlock.get_write();
+      // recheck op existence since we have let go of rwlock
+      // (for promotion) above.
+      iter = pool_ops.find(tid);
+      if (iter == pool_ops.end())
+        goto done; // op is gone.
       if (osdmap->get_epoch() < m->epoch) {
         ldout(cct, 20) << "waiting for client to reach epoch " << m->epoch << " before calling back" << dendl;
         _wait_for_new_map(op->onfinish, m->epoch, m->replyCode);
+      } else {
+       // map epoch changed, probably because a MOSDMap message
+       // sneaked in. Do caller-specified callback now or else
+       // we lose it forever.
+       assert(op->onfinish);
+       op->onfinish->complete(m->replyCode);   
       }
     }
     else {
+      assert(op->onfinish);
       op->onfinish->complete(m->replyCode);
     }
     op->onfinish = NULL;
@@ -3646,6 +3658,8 @@ void Objecter::handle_pool_op_reply(MPoolOpReply *m)
   } else {
     ldout(cct, 10) << "unknown request " << tid << dendl;
   }
+
+done:
   rwlock.unlock();
 
   ldout(cct, 10) << "done" << dendl;