From: xiexingguo <258156334@qq.com> Date: Thu, 29 Oct 2015 12:04:11 +0000 (+0800) Subject: Objecter: pool_op callback may hang forever. X-Git-Tag: v0.94.6~61^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F6588%2Fhead;p=ceph.git Objecter: pool_op callback may hang forever. pool_op callback may hang forever due to osdmap update during reply handling. Fixes: #13642 Signed-off-by: xie xingguo (cherry picked from commit 00c6fa9e31975a935ed2bb33a099e2b4f02ad7f2) --- diff --git a/src/osdc/Objecter.cc b/src/osdc/Objecter.cc index 9f35ffdda753c..faddcd4fc1db3 100644 --- a/src/osdc/Objecter.cc +++ b/src/osdc/Objecter.cc @@ -3626,12 +3626,24 @@ void Objecter::handle_pool_op_reply(MPoolOpReply *m) if (osdmap->get_epoch() < m->epoch) { rwlock.unlock(); rwlock.get_write(); + // recheck op existence since we have let go of rwlock + // (for promotion) above. + iter = pool_ops.find(tid); + if (iter == pool_ops.end()) + goto done; // op is gone. if (osdmap->get_epoch() < m->epoch) { ldout(cct, 20) << "waiting for client to reach epoch " << m->epoch << " before calling back" << dendl; _wait_for_new_map(op->onfinish, m->epoch, m->replyCode); + } else { + // map epoch changed, probably because a MOSDMap message + // sneaked in. Do caller-specified callback now or else + // we lose it forever. + assert(op->onfinish); + op->onfinish->complete(m->replyCode); } } else { + assert(op->onfinish); op->onfinish->complete(m->replyCode); } op->onfinish = NULL; @@ -3646,6 +3658,8 @@ void Objecter::handle_pool_op_reply(MPoolOpReply *m) } else { ldout(cct, 10) << "unknown request " << tid << dendl; } + +done: rwlock.unlock(); ldout(cct, 10) << "done" << dendl;