From d7fd6fccc9e155a8b19fa8aee13d016ad004b6f6 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 23 Dec 2014 15:59:19 -0800 Subject: [PATCH] osdc/Objecter: improve pool deletion detection Currently we can have a race like so: - send op in epoch X - osd replies - pool deleted in epoch X+1 - client gets X+1, sends map epoch check - client gets reply -> fails assert that there is no map check in flight Avoid this situation by inferring that the pool is deleted when we see that we previously sent the request but the pool is no longer present. Since pool ids are not reused there is no point in doing a synchronous map check at all. Backport: giant Fixes: #10372 Signed-off-by: Sage Weil --- src/osdc/Objecter.cc | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/src/osdc/Objecter.cc b/src/osdc/Objecter.cc index 339ed8427100f..79068d9bc2a84 100644 --- a/src/osdc/Objecter.cc +++ b/src/osdc/Objecter.cc @@ -1271,16 +1271,25 @@ void Objecter::_check_op_pool_dne(Op *op, bool session_locked) { assert(rwlock.is_wlocked()); - ldout(cct, 10) << "check_op_pool_dne tid " << op->tid - << " current " << osdmap->get_epoch() - << " map_dne_bound " << op->map_dne_bound - << dendl; + if (op->attempts) { + // we send a reply earlier, which means that previously the pool + // existed, and now it does not (i.e., it was deleted). + op->map_dne_bound = osdmap->get_epoch(); + ldout(cct, 10) << "check_op_pool_dne tid " << op->tid + << " pool previously exists but now does not" + << dendl; + } else { + ldout(cct, 10) << "check_op_pool_dne tid " << op->tid + << " current " << osdmap->get_epoch() + << " map_dne_bound " << op->map_dne_bound + << dendl; + } if (op->map_dne_bound > 0) { if (osdmap->get_epoch() >= op->map_dne_bound) { // we had a new enough map ldout(cct, 10) << "check_op_pool_dne tid " << op->tid - << " concluding pool " << op->target.base_pgid.pool() << " dne" - << dendl; + << " concluding pool " << op->target.base_pgid.pool() + << " dne" << dendl; if (op->onack) { op->onack->complete(-ENOENT); } @@ -1367,10 +1376,17 @@ void Objecter::_check_linger_pool_dne(LingerOp *op, bool *need_unregister) *need_unregister = false; - ldout(cct, 10) << "_check_linger_pool_dne linger_id " << op->linger_id - << " current " << osdmap->get_epoch() - << " map_dne_bound " << op->map_dne_bound - << dendl; + if (op->register_gen > 0) { + ldout(cct, 10) << "_check_linger_pool_dne linger_id " << op->linger_id + << " pool previously existed but now does not" + << dendl; + op->map_dne_bound = osdmap->get_epoch(); + } else { + ldout(cct, 10) << "_check_linger_pool_dne linger_id " << op->linger_id + << " current " << osdmap->get_epoch() + << " map_dne_bound " << op->map_dne_bound + << dendl; + } if (op->map_dne_bound > 0) { if (osdmap->get_epoch() >= op->map_dne_bound) { if (op->on_reg_ack) { -- 2.39.5