]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
Objecter: pool_op callback may hang forever. 6426/head
authorxiexingguo <258156334@qq.com>
Thu, 29 Oct 2015 12:04:11 +0000 (20:04 +0800)
committerxiexingguo <258156334@qq.com>
Wed, 4 Nov 2015 12:02:14 +0000 (20:02 +0800)
pool_op callback may hang forever due to osdmap update during reply handling.
Fixes: #13642
Signed-off-by: xie xingguo <xie.xingguo@zte.com.cn>
src/osdc/Objecter.cc

index 87f7257ae6c1cff561fc8b242272d9e367ad3c7c..66db3115ab230bc6f83d3384fee4e531bdb251b6 100644 (file)
@@ -3769,12 +3769,24 @@ void Objecter::handle_pool_op_reply(MPoolOpReply *m)
     if (osdmap->get_epoch() < m->epoch) {
       rwlock.unlock();
       rwlock.get_write();
+      // recheck op existence since we have let go of rwlock
+      // (for promotion) above.
+      iter = pool_ops.find(tid);
+      if (iter == pool_ops.end())
+        goto done; // op is gone.
       if (osdmap->get_epoch() < m->epoch) {
         ldout(cct, 20) << "waiting for client to reach epoch " << m->epoch << " before calling back" << dendl;
         _wait_for_new_map(op->onfinish, m->epoch, m->replyCode);
+      } else {
+       // map epoch changed, probably because a MOSDMap message
+       // sneaked in. Do caller-specified callback now or else
+       // we lose it forever.
+       assert(op->onfinish);
+       op->onfinish->complete(m->replyCode);   
       }
     }
     else {
+      assert(op->onfinish);
       op->onfinish->complete(m->replyCode);
     }
     op->onfinish = NULL;
@@ -3789,6 +3801,8 @@ void Objecter::handle_pool_op_reply(MPoolOpReply *m)
   } else {
     ldout(cct, 10) << "unknown request " << tid << dendl;
   }
+
+done:
   rwlock.unlock();
 
   ldout(cct, 10) << "done" << dendl;