]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/PG: make scan recovery op cancellation match up reliably
authorSage Weil <sage@redhat.com>
Wed, 25 Oct 2017 03:32:18 +0000 (22:32 -0500)
committerSage Weil <sage@redhat.com>
Thu, 26 Oct 2017 02:52:12 +0000 (21:52 -0500)
Previously, there was only one time we would end up in this region of code:
when the backfill was rejected by the peer.  Previously that was apparently
reliably when we had an outstanding SCAN request, because we would
unconditionally cancle the MAX recovery op and clear waiting_on_backfill.

See 624aaf2a4ea9950153a89ff921e2adce683a6f51 for when this code appeared.

Now we have several similar paths, and we don't always have an outstanding
scan call (I don't think!).  Regardless, move most these three cases into
a common helper and make the finish_recovery_op completion conditional
on whether there is an outstanding SCAN.  This fixes a leak of a recovery
op when we defer while a scan is outstanding (this bug was recently
introduced by e708410542b0a52fbb29e14b76f49c94adbc0a59 and then
duplicated by 2463c6463d1ed38a2e15a0960ed1530a47851489).

Note that there is still one other time we register MAX ops: when we are
finishing backfill.  There, we start one per target.  But we will always
get back our reply and process it in the normal way (that old commit
did not change the timing for these).

Signed-off-by: Sage Weil <sage@redhat.com>
src/osd/PG.cc
src/osd/PG.h

index 06c2767455be4af292fb9cdffb569530498b15b6..7a91811b45ee57dbe48a0516021a8d8e63134564 100644 (file)
@@ -6412,16 +6412,11 @@ PG::RecoveryState::Backfilling::Backfilling(my_context ctx)
   pg->publish_stats_to_osd();
 }
 
-boost::statechart::result
-PG::RecoveryState::Backfilling::react(const DeferBackfill &c)
+void PG::RecoveryState::Backfilling::cancel_backfill()
 {
   PG *pg = context< RecoveryMachine >().pg;
-  ldout(pg->cct, 10) << "defer backfill, retry delay " << c.delay << dendl;
   pg->osd->local_reserver.cancel_reservation(pg->info.pgid);
 
-  pg->state_set(PG_STATE_BACKFILL_WAIT);
-  pg->state_clear(PG_STATE_BACKFILLING);
-
   for (set<pg_shard_t>::iterator it = pg->backfill_targets.begin();
        it != pg->backfill_targets.end();
        ++it) {
@@ -6438,8 +6433,20 @@ PG::RecoveryState::Backfilling::react(const DeferBackfill &c)
     }
   }
 
-  pg->waiting_on_backfill.clear();
+  if (!pg->waiting_on_backfill.empty()) {
+    pg->waiting_on_backfill.clear();
+    pg->finish_recovery_op(hobject_t::get_max());
+  }
+}
 
+boost::statechart::result
+PG::RecoveryState::Backfilling::react(const DeferBackfill &c)
+{
+  PG *pg = context< RecoveryMachine >().pg;
+  ldout(pg->cct, 10) << "defer backfill, retry delay " << c.delay << dendl;
+  pg->state_set(PG_STATE_BACKFILL_WAIT);
+  pg->state_clear(PG_STATE_BACKFILLING);
+  cancel_backfill();
   pg->schedule_backfill_retry(c.delay);
   return transit<NotBackfilling>();
 }
@@ -6449,29 +6456,9 @@ PG::RecoveryState::Backfilling::react(const UnfoundBackfill &c)
 {
   PG *pg = context< RecoveryMachine >().pg;
   ldout(pg->cct, 10) << "backfill has unfound, can't continue" << dendl;
-  pg->osd->local_reserver.cancel_reservation(pg->info.pgid);
-
   pg->state_set(PG_STATE_BACKFILL_UNFOUND);
   pg->state_clear(PG_STATE_BACKFILLING);
-
-  for (set<pg_shard_t>::iterator it = pg->backfill_targets.begin();
-       it != pg->backfill_targets.end();
-       ++it) {
-    assert(*it != pg->pg_whoami);
-    ConnectionRef con = pg->osd->get_con_osd_cluster(
-      it->osd, pg->get_osdmap()->get_epoch());
-    if (con) {
-      pg->osd->send_message_osd_cluster(
-        new MBackfillReserve(
-         MBackfillReserve::RELEASE,
-         spg_t(pg->info.pgid.pgid, it->shard),
-         pg->get_osdmap()->get_epoch()),
-       con.get());
-    }
-  }
-
-  pg->waiting_on_backfill.clear();
-
+  cancel_backfill();
   return transit<NotBackfilling>();
 }
 
@@ -6479,28 +6466,9 @@ boost::statechart::result
 PG::RecoveryState::Backfilling::react(const RemoteReservationRevokedTooFull &)
 {
   PG *pg = context< RecoveryMachine >().pg;
-  pg->osd->local_reserver.cancel_reservation(pg->info.pgid);
   pg->state_set(PG_STATE_BACKFILL_TOOFULL);
-
-  for (set<pg_shard_t>::iterator it = pg->backfill_targets.begin();
-       it != pg->backfill_targets.end();
-       ++it) {
-    assert(*it != pg->pg_whoami);
-    ConnectionRef con = pg->osd->get_con_osd_cluster(
-      it->osd, pg->get_osdmap()->get_epoch());
-    if (con) {
-      pg->osd->send_message_osd_cluster(
-        new MBackfillReserve(
-         MBackfillReserve::RELEASE,
-         spg_t(pg->info.pgid.pgid, it->shard),
-         pg->get_osdmap()->get_epoch()),
-       con.get());
-    }
-  }
-
-  pg->waiting_on_backfill.clear();
-  pg->finish_recovery_op(hobject_t::get_max());
-
+  pg->state_clear(PG_STATE_BACKFILLING);
+  cancel_backfill();
   pg->schedule_backfill_retry(pg->cct->_conf->osd_recovery_retry_interval);
   return transit<NotBackfilling>();
 }
@@ -6509,27 +6477,8 @@ boost::statechart::result
 PG::RecoveryState::Backfilling::react(const RemoteReservationRevoked &)
 {
   PG *pg = context< RecoveryMachine >().pg;
-  pg->osd->local_reserver.cancel_reservation(pg->info.pgid);
   pg->state_set(PG_STATE_BACKFILL_WAIT);
-
-  for (set<pg_shard_t>::iterator it = pg->backfill_targets.begin();
-       it != pg->backfill_targets.end();
-       ++it) {
-    assert(*it != pg->pg_whoami);
-    ConnectionRef con = pg->osd->get_con_osd_cluster(
-      it->osd, pg->get_osdmap()->get_epoch());
-    if (con) {
-      pg->osd->send_message_osd_cluster(
-        new MBackfillReserve(
-         MBackfillReserve::RELEASE,
-         spg_t(pg->info.pgid.pgid, it->shard),
-         pg->get_osdmap()->get_epoch()),
-       con.get());
-    }
-  }
-
-  pg->waiting_on_backfill.clear();
-
+  cancel_backfill();
   return transit<WaitLocalBackfillReserved>();
 }
 
index 2b9571d9276001d4f1cd0908ee0491555b4e88ef..a03deb2e906b9bd62dd9d79660fed9f6b39b9496 100644 (file)
@@ -2193,6 +2193,7 @@ protected:
       boost::statechart::result react(const RemoteReservationRevoked& evt);
       boost::statechart::result react(const DeferBackfill& evt);
       boost::statechart::result react(const UnfoundBackfill& evt);
+      void cancel_backfill();
       void exit();
     };