]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: Cancel backfill when can't proceed due to errors
authorDavid Zafman <dzafman@redhat.com>
Mon, 8 May 2017 18:29:55 +0000 (11:29 -0700)
committerDavid Zafman <dzafman@redhat.com>
Fri, 23 Jun 2017 15:02:51 +0000 (08:02 -0700)
Add new transition CancelBackfill (Backfilling -> NotBackfilling)
When giving up on backfill due to errors use new transition
which includes scheduling retry of backfill.

Signed-off-by: David Zafman <dzafman@redhat.com>
src/osd/OSD.cc
src/osd/PG.cc
src/osd/PG.h

index fb77b0777bca21d4cdcfbb6a4b681cf6a96b13d7..bfee81253888501003a360cc932f89519538c2a4 100644 (file)
@@ -9124,9 +9124,16 @@ void OSD::do_recovery(
     if (!more && pg->have_unfound()) {
       pg->discover_all_missing(*rctx.query_map);
       if (rctx.query_map->empty()) {
-       dout(10) << "do_recovery  no luck, giving up on this pg for now" << dendl;
+       dout(10) << __func__ << ": no luck, giving up on this pg for now" << dendl;
+        if (pg->state_test(PG_STATE_BACKFILL)) {
+         auto evt = PG::CephPeeringEvtRef(new PG::CephPeeringEvt(
+           queued,
+           queued,
+           PG::CancelBackfill()));
+         pg->queue_peering_event(evt);
+        }
       } else {
-       dout(10) << "do_recovery  no luck, giving up on this pg for now" << dendl;
+       dout(10) << __func__ << ": no luck, giving up on this pg for now" << dendl;
        pg->queue_recovery();
       }
     }
index 39b373fd478abf9815b23152185c1fdd0ddbfa85..0ea8abadb5354dbcb312d577a24c6be1f27b62bf 100644 (file)
@@ -6273,6 +6273,37 @@ PG::RecoveryState::Backfilling::Backfilling(my_context ctx)
   pg->publish_stats_to_osd();
 }
 
+boost::statechart::result
+PG::RecoveryState::Backfilling::react(const CancelBackfill &)
+{
+  PG *pg = context< RecoveryMachine >().pg;
+  pg->osd->local_reserver.cancel_reservation(pg->info.pgid);
+  // XXX: Add a new pg state so user can see why backfill isn't proceeding
+  // Can't use PG_STATE_BACKFILL_WAIT since it means waiting for reservations
+  //pg->state_set(PG_STATE_BACKFILL_STALLED????);
+
+  for (set<pg_shard_t>::iterator it = pg->backfill_targets.begin();
+       it != pg->backfill_targets.end();
+       ++it) {
+    assert(*it != pg->pg_whoami);
+    ConnectionRef con = pg->osd->get_con_osd_cluster(
+      it->osd, pg->get_osdmap()->get_epoch());
+    if (con) {
+      pg->osd->send_message_osd_cluster(
+        new MBackfillReserve(
+         MBackfillReserve::REJECT,
+         spg_t(pg->info.pgid.pgid, it->shard),
+         pg->get_osdmap()->get_epoch()),
+       con.get());
+    }
+  }
+
+  pg->waiting_on_backfill.clear();
+
+  pg->schedule_backfill_full_retry();
+  return transit<NotBackfilling>();
+}
+
 boost::statechart::result
 PG::RecoveryState::Backfilling::react(const RemoteReservationRejected &)
 {
index 8b3fef6d39654920e62a22d6fb878d90ad8610bb..0923c0570ce0553e06a195de627373c4a0fabf0b 100644 (file)
@@ -1560,6 +1560,7 @@ public:
   TrivialEvent(LocalBackfillReserved)
   TrivialEvent(RemoteBackfillReserved)
   TrivialEvent(RemoteReservationRejected)
+  TrivialEvent(CancelBackfill)
   TrivialEvent(RequestBackfill)
   TrivialEvent(RequestRecovery)
   TrivialEvent(RecoveryDone)
@@ -1871,10 +1872,12 @@ public:
     struct Backfilling : boost::statechart::state< Backfilling, Active >, NamedState {
       typedef boost::mpl::list<
        boost::statechart::transition< Backfilled, Recovered >,
+       boost::statechart::custom_reaction< CancelBackfill >,
        boost::statechart::custom_reaction< RemoteReservationRejected >
        > reactions;
       explicit Backfilling(my_context ctx);
       boost::statechart::result react(const RemoteReservationRejected& evt);
+      boost::statechart::result react(const CancelBackfill& evt);
       void exit();
     };