From: David Zafman Date: Mon, 9 Oct 2017 15:17:29 +0000 (-0700) Subject: osd: Add new UnfoundBackfill and UnfoundRecovery pg transitions X-Git-Tag: v13.0.1~510^2~4 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=bb2bcb95f51abc206e005e44ef383ee45b8f2209;p=ceph.git osd: Add new UnfoundBackfill and UnfoundRecovery pg transitions Signed-off-by: David Zafman --- diff --git a/qa/standalone/erasure-code/test-erasure-eio.sh b/qa/standalone/erasure-code/test-erasure-eio.sh index 0cbe6d6443e..8404f7e000b 100755 --- a/qa/standalone/erasure-code/test-erasure-eio.sh +++ b/qa/standalone/erasure-code/test-erasure-eio.sh @@ -415,8 +415,8 @@ function TEST_ec_recovery_errors() { delete_pool $poolname } -# Test backfill with errors present -function TEST_ec_backfill_errors() { +# Test backfill with unfound object +function TEST_ec_backfill_unfound() { local dir=$1 local objname=myobject local lastobj=300 @@ -456,13 +456,14 @@ function TEST_ec_backfill_errors() { sleep 15 - while(true); do + for tmp in $(seq 1 100); do state=$(get_state 2.0) - echo $state | grep -v backfilling + echo $state | grep backfill_unfound if [ "$?" = "0" ]; then break fi - echo -n "$state " + echo $state + sleep 1 done ceph pg dump pgs @@ -492,8 +493,8 @@ function TEST_ec_backfill_errors() { delete_pool $poolname } -# Test recovery with errors present -function TEST_ec_recovery_errors() { +# Test recovery with unfound object +function TEST_ec_recovery_unfound() { local dir=$1 local objname=myobject local lastobj=100 @@ -531,13 +532,14 @@ function TEST_ec_recovery_errors() { sleep 15 - while(true); do + for tmp in $(seq 1 100); do state=$(get_state 2.0) - echo $state | grep -v recovering + echo $state | grep recovery_unfound if [ "$?" = "0" ]; then break fi - echo -n "$state " + echo "$state " + sleep 1 done ceph pg dump pgs diff --git a/src/osd/PG.cc b/src/osd/PG.cc index da44df85415..831ea5fbc22 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -5775,7 +5775,7 @@ void PG::find_unfound(epoch_t queued, RecoveryCtx *rctx) new PG::CephPeeringEvt( queued, queued, - PG::DeferBackfill(cct->_conf->osd_recovery_retry_interval))); + PG::UnfoundBackfill())); queue_peering_event(evt); action = "in backfill"; } else if (state_test(PG_STATE_RECOVERING)) { @@ -5783,7 +5783,7 @@ void PG::find_unfound(epoch_t queued, RecoveryCtx *rctx) new PG::CephPeeringEvt( queued, queued, - PG::DeferRecovery(cct->_conf->osd_recovery_retry_interval))); + PG::UnfoundRecovery())); queue_peering_event(evt); action = "in recovery"; } else { @@ -6358,6 +6358,36 @@ PG::RecoveryState::Backfilling::react(const DeferBackfill &c) return transit(); } +boost::statechart::result +PG::RecoveryState::Backfilling::react(const UnfoundBackfill &c) +{ + PG *pg = context< RecoveryMachine >().pg; + ldout(pg->cct, 10) << "backfill has unfound, can't continue" << dendl; + pg->osd->local_reserver.cancel_reservation(pg->info.pgid); + + pg->state_clear(PG_STATE_BACKFILLING); + + for (set::iterator it = pg->backfill_targets.begin(); + it != pg->backfill_targets.end(); + ++it) { + assert(*it != pg->pg_whoami); + ConnectionRef con = pg->osd->get_con_osd_cluster( + it->osd, pg->get_osdmap()->get_epoch()); + if (con) { + pg->osd->send_message_osd_cluster( + new MBackfillReserve( + MBackfillReserve::CANCEL, + spg_t(pg->info.pgid.pgid, it->shard), + pg->get_osdmap()->get_epoch()), + con.get()); + } + } + + pg->waiting_on_backfill.clear(); + + return transit(); +} + boost::statechart::result PG::RecoveryState::Backfilling::react(const RemoteReservationRejected &) { @@ -6537,6 +6567,7 @@ void PG::RecoveryState::NotBackfilling::exit() { context< RecoveryMachine >().log_exit(state_name, enter_time); PG *pg = context< RecoveryMachine >().pg; + pg->state_clear(PG_STATE_UNFOUND); utime_t dur = ceph_clock_now() - enter_time; pg->osd->recoverystate_perf->tinc(rs_notbackfilling_latency, dur); } @@ -6555,6 +6586,7 @@ void PG::RecoveryState::NotRecovering::exit() { context< RecoveryMachine >().log_exit(state_name, enter_time); PG *pg = context< RecoveryMachine >().pg; + pg->state_clear(PG_STATE_UNFOUND); utime_t dur = ceph_clock_now() - enter_time; pg->osd->recoverystate_perf->tinc(rs_notrecovering_latency, dur); } @@ -6929,6 +6961,17 @@ PG::RecoveryState::Recovering::react(const DeferRecovery &evt) return transit(); } +boost::statechart::result +PG::RecoveryState::Recovering::react(const UnfoundRecovery &evt) +{ + PG *pg = context< RecoveryMachine >().pg; + ldout(pg->cct, 10) << "recovery has unfound, can't continue" << dendl; + pg->state_clear(PG_STATE_RECOVERING); + pg->osd->local_reserver.cancel_reservation(pg->info.pgid); + release_reservations(true); + return transit(); +} + void PG::RecoveryState::Recovering::exit() { context< RecoveryMachine >().log_exit(state_name, enter_time); diff --git a/src/osd/PG.h b/src/osd/PG.h index 2692117714c..ce67cc1d960 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -1815,6 +1815,18 @@ public: *out << "DeferRecovery: delay " << delay; } }; + struct UnfoundBackfill : boost::statechart::event { + explicit UnfoundBackfill() {} + void print(std::ostream *out) const { + *out << "UnfoundBackfill"; + } + }; + struct UnfoundRecovery : boost::statechart::event { + explicit UnfoundRecovery() {} + void print(std::ostream *out) const { + *out << "UnfoundRecovery"; + } + }; protected: TrivialEvent(Initialize) TrivialEvent(Load) @@ -2101,7 +2113,9 @@ protected: boost::statechart::custom_reaction< Backfilled >, boost::statechart::custom_reaction< AllReplicasActivated >, boost::statechart::custom_reaction< DeferRecovery >, - boost::statechart::custom_reaction< DeferBackfill > + boost::statechart::custom_reaction< DeferBackfill >, + boost::statechart::custom_reaction< UnfoundRecovery >, + boost::statechart::custom_reaction< UnfoundBackfill > > reactions; boost::statechart::result react(const QueryState& q); boost::statechart::result react(const ActMap&); @@ -2119,6 +2133,12 @@ protected: boost::statechart::result react(const DeferBackfill& evt) { return discard_event(); } + boost::statechart::result react(const UnfoundRecovery& evt) { + return discard_event(); + } + boost::statechart::result react(const UnfoundBackfill& evt) { + return discard_event(); + } }; struct Clean : boost::statechart::state< Clean, Active >, NamedState { @@ -2147,11 +2167,13 @@ protected: typedef boost::mpl::list< boost::statechart::transition< Backfilled, Recovered >, boost::statechart::custom_reaction< DeferBackfill >, + boost::statechart::custom_reaction< UnfoundBackfill >, boost::statechart::custom_reaction< RemoteReservationRejected > > reactions; explicit Backfilling(my_context ctx); boost::statechart::result react(const RemoteReservationRejected& evt); boost::statechart::result react(const DeferBackfill& evt); + boost::statechart::result react(const UnfoundBackfill& evt); void exit(); }; @@ -2191,13 +2213,18 @@ protected: struct NotRecovering : boost::statechart::state< NotRecovering, Active>, NamedState { typedef boost::mpl::list< boost::statechart::transition< DoRecovery, WaitLocalRecoveryReserved >, - boost::statechart::custom_reaction< DeferRecovery > + boost::statechart::custom_reaction< DeferRecovery >, + boost::statechart::custom_reaction< UnfoundRecovery > > reactions; explicit NotRecovering(my_context ctx); boost::statechart::result react(const DeferRecovery& evt) { /* no-op */ return discard_event(); } + boost::statechart::result react(const UnfoundRecovery& evt) { + /* no-op */ + return discard_event(); + } void exit(); }; @@ -2214,7 +2241,9 @@ protected: boost::statechart::custom_reaction< MLogRec >, boost::statechart::custom_reaction< Activate >, boost::statechart::custom_reaction< DeferRecovery >, - boost::statechart::custom_reaction< DeferBackfill > + boost::statechart::custom_reaction< DeferBackfill >, + boost::statechart::custom_reaction< UnfoundRecovery >, + boost::statechart::custom_reaction< UnfoundBackfill > > reactions; boost::statechart::result react(const QueryState& q); boost::statechart::result react(const MInfoRec& infoevt); @@ -2228,6 +2257,12 @@ protected: boost::statechart::result react(const DeferBackfill& evt) { return discard_event(); } + boost::statechart::result react(const UnfoundRecovery& evt) { + return discard_event(); + } + boost::statechart::result react(const UnfoundBackfill& evt) { + return discard_event(); + } }; struct RepRecovering : boost::statechart::state< RepRecovering, ReplicaActive >, NamedState { @@ -2295,6 +2330,7 @@ protected: typedef boost::mpl::list < boost::statechart::custom_reaction< AllReplicasRecovered >, boost::statechart::custom_reaction< DeferRecovery >, + boost::statechart::custom_reaction< UnfoundRecovery >, boost::statechart::custom_reaction< RequestBackfill > > reactions; explicit Recovering(my_context ctx); @@ -2302,6 +2338,7 @@ protected: void release_reservations(bool cancel = false); boost::statechart::result react(const AllReplicasRecovered &evt); boost::statechart::result react(const DeferRecovery& evt); + boost::statechart::result react(const UnfoundRecovery& evt); boost::statechart::result react(const RequestBackfill &evt); };