]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
osd: Check whether any OSD is full before starting recovery
authorDavid Zafman <dzafman@redhat.com>
Wed, 5 Apr 2017 21:12:43 +0000 (14:12 -0700)
committerDavid Zafman <dzafman@redhat.com>
Mon, 17 Apr 2017 15:00:24 +0000 (08:00 -0700)
Add event RecoveryTooFull to move to NotRecovering state

Signed-off-by: David Zafman <dzafman@redhat.com>
src/common/config_opts.h
src/osd/PG.cc
src/osd/PG.h

index c1630a44c010bf0ae4689d2da2c02e953040d321..c742ae6fa205388827af3438fd2900ba66773cd4 100644 (file)
@@ -871,6 +871,7 @@ OPTION(osd_debug_skip_full_check_in_backfill_reservation, OPT_BOOL, false)
 OPTION(osd_debug_reject_backfill_probability, OPT_DOUBLE, 0)
 OPTION(osd_debug_inject_copyfrom_error, OPT_BOOL, false)  // inject failure during copyfrom completion
 OPTION(osd_debug_misdirected_ops, OPT_BOOL, false)
+OPTION(osd_debug_skip_full_check_in_recovery, OPT_BOOL, false)
 OPTION(osd_enxio_on_misdirected_op, OPT_BOOL, false)
 OPTION(osd_debug_verify_cached_snaps, OPT_BOOL, false)
 OPTION(osd_enable_op_tracker, OPT_BOOL, true) // enable/disable OSD op tracking
index 576ec836dc552a40bd61ebb215c0f29bd7fe6e70..c5a35dbaa4a4410f6898cac17a086bf87987b316 100644 (file)
@@ -6700,6 +6700,15 @@ PG::RecoveryState::WaitLocalRecoveryReserved::WaitLocalRecoveryReserved(my_conte
 {
   context< RecoveryMachine >().log_enter(state_name);
   PG *pg = context< RecoveryMachine >().pg;
+
+  // Make sure all nodes that part of the recovery aren't full
+  if (!pg->cct->_conf->osd_debug_skip_full_check_in_recovery &&
+      pg->osd->check_osdmap_full(pg->actingbackfill)) {
+    post_event(RecoveryTooFull());
+    return;
+  }
+
+  pg->state_clear(PG_STATE_RECOVERY_TOOFULL);
   pg->state_set(PG_STATE_RECOVERY_WAIT);
   pg->osd->local_reserver.request_reservation(
     pg->info.pgid,
@@ -6710,6 +6719,15 @@ PG::RecoveryState::WaitLocalRecoveryReserved::WaitLocalRecoveryReserved(my_conte
   pg->publish_stats_to_osd();
 }
 
+boost::statechart::result
+PG::RecoveryState::WaitLocalRecoveryReserved::react(const RecoveryTooFull &evt)
+{
+  PG *pg = context< RecoveryMachine >().pg;
+  pg->state_set(PG_STATE_RECOVERY_TOOFULL);
+  pg->schedule_recovery_full_retry();
+  return transit<NotRecovering>();
+}
+
 void PG::RecoveryState::WaitLocalRecoveryReserved::exit()
 {
   context< RecoveryMachine >().log_exit(state_name, enter_time);
index 6ac48fd6cbfafd91f9f887171092216d4e638dc6..b2168b3ee6417c671c740ee13b01e2e0c266c9eb 100644 (file)
@@ -1506,6 +1506,7 @@ public:
   TrivialEvent(RequestRecovery)
   TrivialEvent(RecoveryDone)
   TrivialEvent(BackfillTooFull)
+  TrivialEvent(RecoveryTooFull)
 
   TrivialEvent(AllReplicasRecovered)
   TrivialEvent(DoRecovery)
@@ -1947,10 +1948,12 @@ public:
 
     struct WaitLocalRecoveryReserved : boost::statechart::state< WaitLocalRecoveryReserved, Active >, NamedState {
       typedef boost::mpl::list <
-       boost::statechart::transition< LocalRecoveryReserved, WaitRemoteRecoveryReserved >
+       boost::statechart::transition< LocalRecoveryReserved, WaitRemoteRecoveryReserved >,
+       boost::statechart::custom_reaction< RecoveryTooFull >
        > reactions;
       explicit WaitLocalRecoveryReserved(my_context ctx);
       void exit();
+      boost::statechart::result react(const RecoveryTooFull &evt);
     };
 
     struct Activating : boost::statechart::state< Activating, Active >, NamedState {