]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: Cancel recovering when no more progress can be made
authorDavid Zafman <dzafman@redhat.com>
Tue, 9 May 2017 21:35:58 +0000 (14:35 -0700)
committerDavid Zafman <dzafman@redhat.com>
Fri, 23 Jun 2017 15:02:51 +0000 (08:02 -0700)
Add new CancelRecovery transition (Recovering -> NotRecovering)
When giving up on recovery due to errors use new transition
which includes scheduling retry of recovery.

Signed-off-by: David Zafman <dzafman@redhat.com>
src/osd/OSD.cc
src/osd/PG.cc
src/osd/PG.h

index bfee81253888501003a360cc932f89519538c2a4..79c87fa473871049c3ea27139cd5cc674a6a18b0 100644 (file)
@@ -9124,16 +9124,27 @@ void OSD::do_recovery(
     if (!more && pg->have_unfound()) {
       pg->discover_all_missing(*rctx.query_map);
       if (rctx.query_map->empty()) {
-       dout(10) << __func__ << ": no luck, giving up on this pg for now" << dendl;
+       string action;
         if (pg->state_test(PG_STATE_BACKFILL)) {
          auto evt = PG::CephPeeringEvtRef(new PG::CephPeeringEvt(
            queued,
            queued,
            PG::CancelBackfill()));
          pg->queue_peering_event(evt);
-        }
+         action = "in backfill";
+        } else if (pg->state_test(PG_STATE_RECOVERING)) {
+         auto evt = PG::CephPeeringEvtRef(new PG::CephPeeringEvt(
+           queued,
+           queued,
+           PG::CancelRecovery()));
+         pg->queue_peering_event(evt);
+         action = "in recovery";
+       } else {
+         action = "already out of recovery/backfill";
+       }
+       dout(10) << __func__ << ": no luck, giving up on this pg for now (" << action << ")" << dendl;
       } else {
-       dout(10) << __func__ << ": no luck, giving up on this pg for now" << dendl;
+       dout(10) << __func__ << ": no luck, giving up on this pg for now (queue_recovery)" << dendl;
        pg->queue_recovery();
       }
     }
index 0ea8abadb5354dbcb312d577a24c6be1f27b62bf..5fb3833bd118e964da3bce1144125a7430e96236 100644 (file)
@@ -6774,10 +6774,10 @@ PG::RecoveryState::Recovering::Recovering(my_context ctx)
   pg->queue_recovery();
 }
 
-void PG::RecoveryState::Recovering::release_reservations()
+void PG::RecoveryState::Recovering::release_reservations(bool cancel)
 {
   PG *pg = context< RecoveryMachine >().pg;
-  assert(!pg->pg_log.get_missing().have_missing());
+  assert(cancel || !pg->pg_log.get_missing().have_missing());
 
   // release remote reservations
   for (set<pg_shard_t>::const_iterator i =
@@ -6817,6 +6817,17 @@ PG::RecoveryState::Recovering::react(const RequestBackfill &evt)
   return transit<WaitRemoteBackfillReserved>();
 }
 
+boost::statechart::result
+PG::RecoveryState::Recovering::react(const CancelRecovery &evt)
+{
+  PG *pg = context< RecoveryMachine >().pg;
+  pg->state_clear(PG_STATE_RECOVERING);
+  pg->osd->local_reserver.cancel_reservation(pg->info.pgid);
+  release_reservations(true);
+  pg->schedule_recovery_full_retry();
+  return transit<NotRecovering>();
+}
+
 void PG::RecoveryState::Recovering::exit()
 {
   context< RecoveryMachine >().log_exit(state_name, enter_time);
index 0923c0570ce0553e06a195de627373c4a0fabf0b..ef39da8edc0bce82e626c1c1793bdaaa341f20e5 100644 (file)
@@ -1566,6 +1566,7 @@ public:
   TrivialEvent(RecoveryDone)
   TrivialEvent(BackfillTooFull)
   TrivialEvent(RecoveryTooFull)
+  TrivialEvent(CancelRecovery)
 
   TrivialEvent(AllReplicasRecovered)
   TrivialEvent(DoRecovery)
@@ -1988,12 +1989,14 @@ public:
     struct Recovering : boost::statechart::state< Recovering, Active >, NamedState {
       typedef boost::mpl::list <
        boost::statechart::custom_reaction< AllReplicasRecovered >,
+       boost::statechart::custom_reaction< CancelRecovery >,
        boost::statechart::custom_reaction< RequestBackfill >
        > reactions;
       explicit Recovering(my_context ctx);
       void exit();
-      void release_reservations();
+      void release_reservations(bool cancel = false);
       boost::statechart::result react(const AllReplicasRecovered &evt);
+      boost::statechart::result react(const CancelRecovery& evt);
       boost::statechart::result react(const RequestBackfill &evt);
     };