From: Sage Weil Date: Thu, 17 Nov 2016 20:05:26 +0000 (-0600) Subject: osdc/Objecter: handle race between calc_target and handle_osd_map X-Git-Tag: v11.1.0~184^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=a863ae1c0fab636eabced0979889cbb3be74bf74;p=ceph.git osdc/Objecter: handle race between calc_target and handle_osd_map If we fail to get an existing session and have to take the exclusive lock, we may race with an OSDMap update and end up with a stale target. Check for an epoch change and, if it happens, recalculate the mapping. Fixes: http://tracker.ceph.com/issues/17942 Reported-by: wangdongxu Reported-by: menglingkun Signed-off-by: Sage Weil --- diff --git a/src/osdc/Objecter.cc b/src/osdc/Objecter.cc index 963c8108278..81a92a30116 100644 --- a/src/osdc/Objecter.cc +++ b/src/osdc/Objecter.cc @@ -2296,27 +2296,29 @@ void Objecter::_op_submit(Op *op, shunique_lock& sul, ceph_tid_t *ptid) assert(op->session == NULL); OSDSession *s = NULL; - bool const check_for_latest_map = _calc_target(&op->target, - &op->last_force_resend) + bool check_for_latest_map = _calc_target(&op->target, &op->last_force_resend) == RECALC_OP_TARGET_POOL_DNE; // Try to get a session, including a retry if we need to take write lock int r = _get_session(op->target.osd, &s, sul); - if (r == -EAGAIN) { - assert(s == NULL); + if (r == -EAGAIN || + (check_for_latest_map && sul.owns_lock_shared())) { + epoch_t orig_epoch = osdmap->get_epoch(); sul.unlock(); sul.lock(); + if (orig_epoch != osdmap->get_epoch()) { + // map changed; recalculate mapping + check_for_latest_map = _calc_target(&op->target, &op->last_force_resend) + == RECALC_OP_TARGET_POOL_DNE; + } + } + if (r == -EAGAIN) { + assert(s == NULL); r = _get_session(op->target.osd, &s, sul); } assert(r == 0); assert(s); // may be homeless - // We may need to take wlock if we will need to _set_op_map_check later. - if (check_for_latest_map && sul.owns_lock_shared()) { - sul.unlock(); - sul.lock(); - } - _send_op_account(op); // send?