]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osdc/Objecter: improve pool deletion detection 3249/head 3250/head
authorSage Weil <sage@redhat.com>
Tue, 23 Dec 2014 23:59:19 +0000 (15:59 -0800)
committerSage Weil <sage@redhat.com>
Tue, 23 Dec 2014 23:59:19 +0000 (15:59 -0800)
Currently we can have a race like so:

 - send op in epoch X
 - osd replies
 - pool deleted in epoch X+1
 - client gets X+1, sends map epoch check
 - client gets reply
   -> fails assert that there is no map check in flight

Avoid this situation by inferring that the pool is deleted when we see
that we previously sent the request but the pool is no longer present.
Since pool ids are not reused there is no point in doing a synchronous
map check at all.

Backport: giant
Fixes: #10372
Signed-off-by: Sage Weil <sage@redhat.com>
src/osdc/Objecter.cc

index 339ed8427100f05d5339dd40a59b90e6f9b82067..79068d9bc2a841b009932a5da5ab9450a6fed2b6 100644 (file)
@@ -1271,16 +1271,25 @@ void Objecter::_check_op_pool_dne(Op *op, bool session_locked)
 {
   assert(rwlock.is_wlocked());
 
-  ldout(cct, 10) << "check_op_pool_dne tid " << op->tid
-                << " current " << osdmap->get_epoch()
-                << " map_dne_bound " << op->map_dne_bound
-                << dendl;
+  if (op->attempts) {
+    // we send a reply earlier, which means that previously the pool
+    // existed, and now it does not (i.e., it was deleted).
+    op->map_dne_bound = osdmap->get_epoch();
+    ldout(cct, 10) << "check_op_pool_dne tid " << op->tid
+                  << " pool previously exists but now does not"
+                  << dendl;
+  } else {
+    ldout(cct, 10) << "check_op_pool_dne tid " << op->tid
+                  << " current " << osdmap->get_epoch()
+                  << " map_dne_bound " << op->map_dne_bound
+                  << dendl;
+  }
   if (op->map_dne_bound > 0) {
     if (osdmap->get_epoch() >= op->map_dne_bound) {
       // we had a new enough map
       ldout(cct, 10) << "check_op_pool_dne tid " << op->tid
-                    << " concluding pool " << op->target.base_pgid.pool() << " dne"
-                    << dendl;
+                    << " concluding pool " << op->target.base_pgid.pool()
+                    << " dne" << dendl;
       if (op->onack) {
        op->onack->complete(-ENOENT);
       }
@@ -1367,10 +1376,17 @@ void Objecter::_check_linger_pool_dne(LingerOp *op, bool *need_unregister)
 
   *need_unregister = false;
 
-  ldout(cct, 10) << "_check_linger_pool_dne linger_id " << op->linger_id
-                << " current " << osdmap->get_epoch()
-                << " map_dne_bound " << op->map_dne_bound
-                << dendl;
+  if (op->register_gen > 0) {
+    ldout(cct, 10) << "_check_linger_pool_dne linger_id " << op->linger_id
+                  << " pool previously existed but now does not"
+                  << dendl;
+    op->map_dne_bound = osdmap->get_epoch();
+  } else {
+    ldout(cct, 10) << "_check_linger_pool_dne linger_id " << op->linger_id
+                  << " current " << osdmap->get_epoch()
+                  << " map_dne_bound " << op->map_dne_bound
+                  << dendl;
+  }
   if (op->map_dne_bound > 0) {
     if (osdmap->get_epoch() >= op->map_dne_bound) {
       if (op->on_reg_ack) {