osd: Add PG state and flag for too full for recovery

author David Zafman <dzafman@redhat.com>

Wed, 5 Apr 2017 21:09:18 +0000 (14:09 -0700)

committer David Zafman <dzafman@redhat.com>

Mon, 17 Apr 2017 15:00:24 +0000 (08:00 -0700)
author David Zafman <dzafman@redhat.com>
Wed, 5 Apr 2017 21:09:18 +0000 (14:09 -0700)
committer David Zafman <dzafman@redhat.com>
Mon, 17 Apr 2017 15:00:24 +0000 (08:00 -0700)
diff --git a/doc/dev/osd_internals/recovery_reservation.rst b/doc/dev/osd_internals/recovery_reservation.rst

index cabea04cc73bbf7e55ff3c95cca4bb6242ce5a1b..4ab03192fe5548466405a250a7b9b5ef1e52252f 100644 (file)
--- a/doc/dev/osd_internals/recovery_reservation.rst
+++ b/doc/dev/osd_internals/recovery_reservation.rst
@@ -62,6 +62,7 @@ to the monitor. The state chart can set:
  
   - recovery_wait: waiting for local/remote reservations
   - recovering: recovering
+ - recovery_toofull: recovery stopped, OSD(s) above full ratio
   - backfill_wait: waiting for remote backfill reservations
   - backfilling: backfilling
   - backfill_toofull: backfill stopped, OSD(s) above backfillfull ratio
diff --git a/src/common/config_opts.h b/src/common/config_opts.h

index 9eed4d485e21ccfc5536b80cda66d44524025044..c1630a44c010bf0ae4689d2da2c02e953040d321 100644 (file)
--- a/src/common/config_opts.h
+++ b/src/common/config_opts.h
@@ -630,6 +630,9 @@ OPTION(osd_min_recovery_priority, OPT_INT, 0)
  // Seconds to wait before retrying refused backfills
  OPTION(osd_backfill_retry_interval, OPT_DOUBLE, 30.0)
  
+// Seconds to wait before retrying refused recovery
+OPTION(osd_recovery_retry_interval, OPT_DOUBLE, 30.0)
+
  // max agent flush ops
  OPTION(osd_agent_max_ops, OPT_INT, 4)
  OPTION(osd_agent_max_low_ops, OPT_INT, 2)
diff --git a/src/mon/PGMonitor.cc b/src/mon/PGMonitor.cc

index 6669ffcd4b35838eaee8754fd30c38f73f2a65b3..477dabce4e0dc41f7cf530e97532659b0bc7612e 100644 (file)
--- a/src/mon/PGMonitor.cc
+++ b/src/mon/PGMonitor.cc
@@ -1316,6 +1316,8 @@ void PGMonitor::get_health(list<pair<health_status_t,string> >& summary,
        note["backfilling"] += p->second;
      if (p->first & PG_STATE_BACKFILL_TOOFULL)
        note["backfill_toofull"] += p->second;
+    if (p->first & PG_STATE_RECOVERY_TOOFULL)
+      note["recovery_toofull"] += p->second;
    }
  
    ceph::unordered_map<pg_t, pg_stat_t> stuck_pgs;
@@ -1403,6 +1405,7 @@ void PGMonitor::get_health(list<pair<health_status_t,string> >& summary,
                                 PG_STATE_REPAIR |
                                 PG_STATE_RECOVERING |
                                 PG_STATE_RECOVERY_WAIT |
+                               PG_STATE_RECOVERY_TOOFULL |
                                 PG_STATE_INCOMPLETE |
                                 PG_STATE_BACKFILL_WAIT |
                                 PG_STATE_BACKFILL |
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc

index 2b2aa97beb0b10bc01030ff75e7d20b81247eaee..e39330e092379e574c75872df45f827aee6443de 100644 (file)
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -2910,6 +2910,7 @@ void OSD::create_recoverystate_perf()
    rs_perf.add_time_avg(rs_down_latency, "down_latency", "Down recovery state latency");
    rs_perf.add_time_avg(rs_getmissing_latency, "getmissing_latency", "Getmissing recovery state latency");
    rs_perf.add_time_avg(rs_waitupthru_latency, "waitupthru_latency", "Waitupthru recovery state latency");
+  rs_perf.add_time_avg(rs_notrecovering_latency, "notrecovering_latency", "Notrecovering recovery state latency");
  
    recoverystate_perf = rs_perf.create_perf_counters();
    cct->get_perfcounters_collection()->add(recoverystate_perf);
diff --git a/src/osd/OSD.h b/src/osd/OSD.h

index 9429640a9b51f01da5fc827fc90e7fb7f41ec69a..f6afebd4df08bcb97f751105bc80e7128a7b0bb7 100644 (file)
--- a/src/osd/OSD.h
+++ b/src/osd/OSD.h
@@ -202,6 +202,7 @@ enum {
    rs_down_latency,
    rs_getmissing_latency,
    rs_waitupthru_latency,
+  rs_notrecovering_latency,
    rs_last,
  };
  
diff --git a/src/osd/PG.cc b/src/osd/PG.cc

index e7e21c6130209a43f6be4360c8dea4b6d7d5315a..576ec836dc552a40bd61ebb215c0f29bd7fe6e70 100644 (file)
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -3817,6 +3817,16 @@ void PG::schedule_backfill_full_retry()
        RequestBackfill()));
  }
  
+void PG::schedule_recovery_full_retry()
+{
+  Mutex::Locker lock(osd->recovery_request_lock);
+  osd->recovery_request_timer.add_event_after(
+    cct->_conf->osd_recovery_retry_interval,
+    new QueuePeeringEvt<DoRecovery>(
+      this, get_osdmap()->get_epoch(),
+      DoRecovery()));
+}
+
  void PG::clear_scrub_reserved()
  {
    scrubber.reserved_peers.clear();
@@ -5237,6 +5247,7 @@ void PG::start_peering_interval(
    state_clear(PG_STATE_PEERED);
    state_clear(PG_STATE_DOWN);
    state_clear(PG_STATE_RECOVERY_WAIT);
+  state_clear(PG_STATE_RECOVERY_TOOFULL);
    state_clear(PG_STATE_RECOVERING);
  
    peer_purged.clear();
@@ -6488,6 +6499,24 @@ void PG::RecoveryState::NotBackfilling::exit()
    pg->osd->recoverystate_perf->tinc(rs_notbackfilling_latency, dur);
  }
  
+/*----NotRecovering------*/
+PG::RecoveryState::NotRecovering::NotRecovering(my_context ctx)
+  : my_base(ctx),
+    NamedState(context< RecoveryMachine >().pg->cct, "Started/Primary/Active/NotRecovering")
+{
+  context< RecoveryMachine >().log_enter(state_name);
+  PG *pg = context< RecoveryMachine >().pg;
+  pg->publish_stats_to_osd();
+}
+
+void PG::RecoveryState::NotRecovering::exit()
+{
+  context< RecoveryMachine >().log_exit(state_name, enter_time);
+  PG *pg = context< RecoveryMachine >().pg;
+  utime_t dur = ceph_clock_now() - enter_time;
+  pg->osd->recoverystate_perf->tinc(rs_notrecovering_latency, dur);
+}
+
  /*---RepNotRecovering----*/
  PG::RecoveryState::RepNotRecovering::RepNotRecovering(my_context ctx)
    : my_base(ctx),
@@ -6737,6 +6766,7 @@ PG::RecoveryState::Recovering::Recovering(my_context ctx)
  
    PG *pg = context< RecoveryMachine >().pg;
    pg->state_clear(PG_STATE_RECOVERY_WAIT);
+  pg->state_clear(PG_STATE_RECOVERY_TOOFULL);
    pg->state_set(PG_STATE_RECOVERING);
    pg->publish_stats_to_osd();
    pg->queue_recovery();
@@ -7185,6 +7215,7 @@ void PG::RecoveryState::Active::exit()
    pg->state_clear(PG_STATE_BACKFILL_TOOFULL);
    pg->state_clear(PG_STATE_BACKFILL_WAIT);
    pg->state_clear(PG_STATE_RECOVERY_WAIT);
+  pg->state_clear(PG_STATE_RECOVERY_TOOFULL);
    utime_t dur = ceph_clock_now() - enter_time;
    pg->osd->recoverystate_perf->tinc(rs_active_latency, dur);
    pg->agent_stop();
diff --git a/src/osd/PG.h b/src/osd/PG.h

index 4763859d66b9adb3868fef21966fa16761b9dcd8..6ac48fd6cbfafd91f9f887171092216d4e638dc6 100644 (file)
--- a/src/osd/PG.h
+++ b/src/osd/PG.h
@@ -1340,6 +1340,7 @@ public:
  
    void reject_reservation();
    void schedule_backfill_full_retry();
+  void schedule_recovery_full_retry();
  
    // -- recovery state --
  
@@ -1850,6 +1851,14 @@ public:
        boost::statechart::result react(const RemoteReservationRejected& evt);
      };
  
+    struct NotRecovering : boost::statechart::state< NotRecovering, Active>, NamedState {
+      typedef boost::mpl::list<
+       boost::statechart::transition< DoRecovery, WaitLocalRecoveryReserved >
+       > reactions;
+      explicit NotRecovering(my_context ctx);
+      void exit();
+    };
+
      struct RepNotRecovering;
      struct ReplicaActive : boost::statechart::state< ReplicaActive, Started, RepNotRecovering >, NamedState {
        explicit ReplicaActive(my_context ctx);
diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc

index 2cce17e029fc19f6858e95bb9a489973e5248782..145a0d76831a4b59f525ad52fea07dfb714e7117 100644 (file)
--- a/src/osd/osd_types.cc
+++ b/src/osd/osd_types.cc
@@ -789,6 +789,8 @@ std::string pg_state_string(int state)
      oss << "clean+";
    if (state & PG_STATE_RECOVERY_WAIT)
      oss << "recovery_wait+";
+  if (state & PG_STATE_RECOVERY_TOOFULL)
+    oss << "recovery_toofull+";
    if (state & PG_STATE_RECOVERING)
      oss << "recovering+";
    if (state & PG_STATE_DOWN)
@@ -869,6 +871,8 @@ int pg_string_state(const std::string& state)
      type = PG_STATE_BACKFILL_TOOFULL;
    else if (state == "recovery_wait")
      type = PG_STATE_RECOVERY_WAIT;
+  else if (state == "recovery_toofull")
+    type = PG_STATE_RECOVERY_TOOFULL;
    else if (state == "undersized")
      type = PG_STATE_UNDERSIZED;
    else if (state == "activating")
diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h

index 0d2297582f56674e98b76f4f7966fc0b564acfcb..1c4e4c65a6cebbd8adc2a933a081b6492438288c 100644 (file)
--- a/src/osd/osd_types.h
+++ b/src/osd/osd_types.h
@@ -971,6 +971,7 @@ inline ostream& operator<<(ostream& out, const osd_stat_t& s) {
  #define PG_STATE_PEERED        (1<<25) // peered, cannot go active, can recover
  #define PG_STATE_SNAPTRIM      (1<<26) // trimming snaps
  #define PG_STATE_SNAPTRIM_WAIT (1<<27) // queued to trim snaps
+#define PG_STATE_RECOVERY_TOOFULL (1<<28) // recovery can't proceed: too full
  
  std::string pg_state_string(int state);
  std::string pg_vector_string(const vector<int32_t> &a);
author	David Zafman <dzafman@redhat.com>
	Wed, 5 Apr 2017 21:09:18 +0000 (14:09 -0700)
committer	David Zafman <dzafman@redhat.com>
	Mon, 17 Apr 2017 15:00:24 +0000 (08:00 -0700)
doc/dev/osd_internals/recovery_reservation.rst		patch \| blob \| history
src/common/config_opts.h		patch \| blob \| history
src/mon/PGMonitor.cc		patch \| blob \| history
src/osd/OSD.cc		patch \| blob \| history
src/osd/OSD.h		patch \| blob \| history
src/osd/PG.cc		patch \| blob \| history
src/osd/PG.h		patch \| blob \| history
src/osd/osd_types.cc		patch \| blob \| history
src/osd/osd_types.h		patch \| blob \| history