]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: Add new UnfoundBackfill and UnfoundRecovery pg transitions
authorDavid Zafman <dzafman@redhat.com>
Mon, 9 Oct 2017 15:17:29 +0000 (08:17 -0700)
committerDavid Zafman <dzafman@redhat.com>
Wed, 18 Oct 2017 18:01:39 +0000 (11:01 -0700)
Signed-off-by: David Zafman <dzafman@redhat.com>
qa/standalone/erasure-code/test-erasure-eio.sh
src/osd/PG.cc
src/osd/PG.h

index 0cbe6d6443eb41f062b1a7e450ce2fd830c5596a..8404f7e000be6d27dd774ba4d181c1b592371f58 100755 (executable)
@@ -415,8 +415,8 @@ function TEST_ec_recovery_errors() {
     delete_pool $poolname
 }
 
-# Test backfill with errors present
-function TEST_ec_backfill_errors() {
+# Test backfill with unfound object
+function TEST_ec_backfill_unfound() {
     local dir=$1
     local objname=myobject
     local lastobj=300
@@ -456,13 +456,14 @@ function TEST_ec_backfill_errors() {
 
     sleep 15
 
-    while(true); do
+    for tmp in $(seq 1 100); do
       state=$(get_state 2.0)
-      echo $state | grep -v backfilling
+      echo $state | grep backfill_unfound
       if [ "$?" = "0" ]; then
         break
       fi
-      echo -n "$state "
+      echo $state
+      sleep 1
     done
 
     ceph pg dump pgs
@@ -492,8 +493,8 @@ function TEST_ec_backfill_errors() {
     delete_pool $poolname
 }
 
-# Test recovery with errors present
-function TEST_ec_recovery_errors() {
+# Test recovery with unfound object
+function TEST_ec_recovery_unfound() {
     local dir=$1
     local objname=myobject
     local lastobj=100
@@ -531,13 +532,14 @@ function TEST_ec_recovery_errors() {
 
     sleep 15
 
-    while(true); do
+    for tmp in $(seq 1 100); do
       state=$(get_state 2.0)
-      echo $state | grep -v recovering
+      echo $state | grep recovery_unfound
       if [ "$?" = "0" ]; then
         break
       fi
-      echo -n "$state "
+      echo "$state "
+      sleep 1
     done
 
     ceph pg dump pgs
index da44df854154762a72fd1c53353721a95cc93dae..831ea5fbc22971ea03c581c49644586aed6e063e 100644 (file)
@@ -5775,7 +5775,7 @@ void PG::find_unfound(epoch_t queued, RecoveryCtx *rctx)
        new PG::CephPeeringEvt(
          queued,
          queued,
-         PG::DeferBackfill(cct->_conf->osd_recovery_retry_interval)));
+         PG::UnfoundBackfill()));
       queue_peering_event(evt);
       action = "in backfill";
     } else if (state_test(PG_STATE_RECOVERING)) {
@@ -5783,7 +5783,7 @@ void PG::find_unfound(epoch_t queued, RecoveryCtx *rctx)
        new PG::CephPeeringEvt(
          queued,
          queued,
-         PG::DeferRecovery(cct->_conf->osd_recovery_retry_interval)));
+         PG::UnfoundRecovery()));
       queue_peering_event(evt);
       action = "in recovery";
     } else {
@@ -6358,6 +6358,36 @@ PG::RecoveryState::Backfilling::react(const DeferBackfill &c)
   return transit<NotBackfilling>();
 }
 
+boost::statechart::result
+PG::RecoveryState::Backfilling::react(const UnfoundBackfill &c)
+{
+  PG *pg = context< RecoveryMachine >().pg;
+  ldout(pg->cct, 10) << "backfill has unfound, can't continue" << dendl;
+  pg->osd->local_reserver.cancel_reservation(pg->info.pgid);
+
+  pg->state_clear(PG_STATE_BACKFILLING);
+
+  for (set<pg_shard_t>::iterator it = pg->backfill_targets.begin();
+       it != pg->backfill_targets.end();
+       ++it) {
+    assert(*it != pg->pg_whoami);
+    ConnectionRef con = pg->osd->get_con_osd_cluster(
+      it->osd, pg->get_osdmap()->get_epoch());
+    if (con) {
+      pg->osd->send_message_osd_cluster(
+        new MBackfillReserve(
+         MBackfillReserve::CANCEL,
+         spg_t(pg->info.pgid.pgid, it->shard),
+         pg->get_osdmap()->get_epoch()),
+       con.get());
+    }
+  }
+
+  pg->waiting_on_backfill.clear();
+
+  return transit<NotBackfilling>();
+}
+
 boost::statechart::result
 PG::RecoveryState::Backfilling::react(const RemoteReservationRejected &)
 {
@@ -6537,6 +6567,7 @@ void PG::RecoveryState::NotBackfilling::exit()
 {
   context< RecoveryMachine >().log_exit(state_name, enter_time);
   PG *pg = context< RecoveryMachine >().pg;
+  pg->state_clear(PG_STATE_UNFOUND);
   utime_t dur = ceph_clock_now() - enter_time;
   pg->osd->recoverystate_perf->tinc(rs_notbackfilling_latency, dur);
 }
@@ -6555,6 +6586,7 @@ void PG::RecoveryState::NotRecovering::exit()
 {
   context< RecoveryMachine >().log_exit(state_name, enter_time);
   PG *pg = context< RecoveryMachine >().pg;
+  pg->state_clear(PG_STATE_UNFOUND);
   utime_t dur = ceph_clock_now() - enter_time;
   pg->osd->recoverystate_perf->tinc(rs_notrecovering_latency, dur);
 }
@@ -6929,6 +6961,17 @@ PG::RecoveryState::Recovering::react(const DeferRecovery &evt)
   return transit<NotRecovering>();
 }
 
+boost::statechart::result
+PG::RecoveryState::Recovering::react(const UnfoundRecovery &evt)
+{
+  PG *pg = context< RecoveryMachine >().pg;
+  ldout(pg->cct, 10) << "recovery has unfound, can't continue" << dendl;
+  pg->state_clear(PG_STATE_RECOVERING);
+  pg->osd->local_reserver.cancel_reservation(pg->info.pgid);
+  release_reservations(true);
+  return transit<NotRecovering>();
+}
+
 void PG::RecoveryState::Recovering::exit()
 {
   context< RecoveryMachine >().log_exit(state_name, enter_time);
index 2692117714c05535a38fafcdf654c3f2496b4854..ce67cc1d9608152a9bee58002cd3bab30ef4eb7a 100644 (file)
@@ -1815,6 +1815,18 @@ public:
       *out << "DeferRecovery: delay " << delay;
     }
   };
+  struct UnfoundBackfill : boost::statechart::event<UnfoundBackfill> {
+    explicit UnfoundBackfill() {}
+    void print(std::ostream *out) const {
+      *out << "UnfoundBackfill";
+    }
+  };
+  struct UnfoundRecovery : boost::statechart::event<UnfoundRecovery> {
+    explicit UnfoundRecovery() {}
+    void print(std::ostream *out) const {
+      *out << "UnfoundRecovery";
+    }
+  };
 protected:
   TrivialEvent(Initialize)
   TrivialEvent(Load)
@@ -2101,7 +2113,9 @@ protected:
        boost::statechart::custom_reaction< Backfilled >,
        boost::statechart::custom_reaction< AllReplicasActivated >,
        boost::statechart::custom_reaction< DeferRecovery >,
-       boost::statechart::custom_reaction< DeferBackfill >
+       boost::statechart::custom_reaction< DeferBackfill >,
+       boost::statechart::custom_reaction< UnfoundRecovery >,
+       boost::statechart::custom_reaction< UnfoundBackfill >
        > reactions;
       boost::statechart::result react(const QueryState& q);
       boost::statechart::result react(const ActMap&);
@@ -2119,6 +2133,12 @@ protected:
       boost::statechart::result react(const DeferBackfill& evt) {
        return discard_event();
       }
+      boost::statechart::result react(const UnfoundRecovery& evt) {
+       return discard_event();
+      }
+      boost::statechart::result react(const UnfoundBackfill& evt) {
+       return discard_event();
+      }
     };
 
     struct Clean : boost::statechart::state< Clean, Active >, NamedState {
@@ -2147,11 +2167,13 @@ protected:
       typedef boost::mpl::list<
        boost::statechart::transition< Backfilled, Recovered >,
        boost::statechart::custom_reaction< DeferBackfill >,
+       boost::statechart::custom_reaction< UnfoundBackfill >,
        boost::statechart::custom_reaction< RemoteReservationRejected >
        > reactions;
       explicit Backfilling(my_context ctx);
       boost::statechart::result react(const RemoteReservationRejected& evt);
       boost::statechart::result react(const DeferBackfill& evt);
+      boost::statechart::result react(const UnfoundBackfill& evt);
       void exit();
     };
 
@@ -2191,13 +2213,18 @@ protected:
     struct NotRecovering : boost::statechart::state< NotRecovering, Active>, NamedState {
       typedef boost::mpl::list<
        boost::statechart::transition< DoRecovery, WaitLocalRecoveryReserved >,
-       boost::statechart::custom_reaction< DeferRecovery >
+       boost::statechart::custom_reaction< DeferRecovery >,
+       boost::statechart::custom_reaction< UnfoundRecovery >
        > reactions;
       explicit NotRecovering(my_context ctx);
       boost::statechart::result react(const DeferRecovery& evt) {
        /* no-op */
        return discard_event();
       }
+      boost::statechart::result react(const UnfoundRecovery& evt) {
+       /* no-op */
+       return discard_event();
+      }
       void exit();
     };
 
@@ -2214,7 +2241,9 @@ protected:
        boost::statechart::custom_reaction< MLogRec >,
        boost::statechart::custom_reaction< Activate >,
        boost::statechart::custom_reaction< DeferRecovery >,
-       boost::statechart::custom_reaction< DeferBackfill >
+       boost::statechart::custom_reaction< DeferBackfill >,
+       boost::statechart::custom_reaction< UnfoundRecovery >,
+       boost::statechart::custom_reaction< UnfoundBackfill >
        > reactions;
       boost::statechart::result react(const QueryState& q);
       boost::statechart::result react(const MInfoRec& infoevt);
@@ -2228,6 +2257,12 @@ protected:
       boost::statechart::result react(const DeferBackfill& evt) {
        return discard_event();
       }
+      boost::statechart::result react(const UnfoundRecovery& evt) {
+       return discard_event();
+      }
+      boost::statechart::result react(const UnfoundBackfill& evt) {
+       return discard_event();
+      }
     };
 
     struct RepRecovering : boost::statechart::state< RepRecovering, ReplicaActive >, NamedState {
@@ -2295,6 +2330,7 @@ protected:
       typedef boost::mpl::list <
        boost::statechart::custom_reaction< AllReplicasRecovered >,
        boost::statechart::custom_reaction< DeferRecovery >,
+       boost::statechart::custom_reaction< UnfoundRecovery >,
        boost::statechart::custom_reaction< RequestBackfill >
        > reactions;
       explicit Recovering(my_context ctx);
@@ -2302,6 +2338,7 @@ protected:
       void release_reservations(bool cancel = false);
       boost::statechart::result react(const AllReplicasRecovered &evt);
       boost::statechart::result react(const DeferRecovery& evt);
+      boost::statechart::result react(const UnfoundRecovery& evt);
       boost::statechart::result react(const RequestBackfill &evt);
     };