osd, pg: implement force_recovery

author Piotr Dałek <piotr.dalek@corp.ovh.com>

Fri, 24 Feb 2017 07:45:30 +0000 (08:45 +0100)

committer Piotr Dałek <piotr.dalek@corp.ovh.com>

Thu, 20 Jul 2017 07:35:54 +0000 (09:35 +0200)
author Piotr Dałek <piotr.dalek@corp.ovh.com>
Fri, 24 Feb 2017 07:45:30 +0000 (08:45 +0100)
committer Piotr Dałek <piotr.dalek@corp.ovh.com>
Thu, 20 Jul 2017 07:35:54 +0000 (09:35 +0200)
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc

index 7a494f8e18d78e32e33decc3c21b113a4ac5bb6c..279e6e6a3db581cd7b96b2d2aad69f0d2965cf0f 100644 (file)
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -88,6 +88,7 @@
  #include "messages/MOSDPGBackfill.h"
  #include "messages/MBackfillReserve.h"
  #include "messages/MRecoveryReserve.h"
+#include "messages/MOSDForceRecovery.h"
  #include "messages/MOSDECSubOpWrite.h"
  #include "messages/MOSDECSubOpWriteReply.h"
  #include "messages/MOSDECSubOpRead.h"
@@ -7063,6 +7064,10 @@ void OSD::_dispatch(Message *m)
      handle_scrub(static_cast<MOSDScrub*>(m));
      break;
  
+  case MSG_OSD_FORCE_RECOVERY:
+    handle_force_recovery(m);
+    break;
+
      // -- need OSDMap --
  
    case MSG_OSD_PG_CREATE:
@@ -8870,6 +8875,31 @@ void OSD::handle_pg_recovery_reserve(OpRequestRef op)
    pg->unlock();
  }
  
+void OSD::handle_force_recovery(Message *m)
+{
+  MOSDForceRecovery *msg = static_cast<MOSDForceRecovery*>(m);
+  assert(msg->get_type() == MSG_OSD_FORCE_RECOVERY);
+  RWLock::RLocker l(pg_map_lock);
+
+  vector<PG*> local_pgs;
+  local_pgs.reserve(msg->forced_pgs.size());
+
+  for (auto& i : msg->forced_pgs) {
+    spg_t locpg;
+    if (osdmap->get_primary_shard(i, &locpg)) {
+      auto pg_map_entry = pg_map.find(locpg);
+      if (pg_map_entry != pg_map.end()) {
+       local_pgs.push_back(pg_map_entry->second);
+      }
+    }
+  }
+
+  if (local_pgs.size()) {
+    service.adjust_pg_priorities(local_pgs, msg->options);
+  }
+
+  msg->put();
+}
  
  /** PGQuery
   * from primary to replica | stray
@@ -9124,6 +9154,50 @@ bool OSDService::_recover_now(uint64_t *available_pushes)
    return true;
  }
  
+
+void OSDService::adjust_pg_priorities(vector<PG*> pgs, int newflags)
+{
+  if (!pgs.size() || !(newflags & (OFR_BACKFILL | OFR_RECOVERY)))
+    return;
+  int newstate = 0;
+
+  Mutex::Locker l(recovery_lock);
+
+  if (newflags & OFR_BACKFILL) {
+    newstate = PG_STATE_FORCED_BACKFILL;
+  } else if (newflags & OFR_RECOVERY) {
+    newstate = PG_STATE_FORCED_RECOVERY;
+  }
+
+  // debug output here may get large, don't generate it if debug level is below
+  // 10 and use abbreviated pg ids otherwise
+  if ((cct)->_conf->subsys.should_gather(ceph_subsys_osd, 10)) {
+    stringstream ss;
+
+    for (auto& i : pgs) {
+      ss << i->get_pgid() << " ";
+    }
+
+    dout(10) << __func__ << " working on " << ss.str() << dendl;
+  }
+
+  if (newflags & OFR_CANCEL) {
+    for (auto& i : pgs) {
+      i->change_recovery_force_mode(newstate, true);
+    }
+  } else {
+    for (auto& i : pgs) {
+      // make sure the PG is in correct state before forcing backfill or recovery, or
+      // else we'll make PG keeping FORCE_* flag forever, requiring osds restart
+      // or forcing somehow recovery/backfill.
+      int pgstate = i->get_state();
+      if ( ((newstate == PG_STATE_FORCED_RECOVERY) && (pgstate & (PG_STATE_DEGRADED | PG_STATE_RECOVERY_WAIT | PG_STATE_RECOVERING))) ||
+           ((newstate == PG_STATE_FORCED_BACKFILL) && (pgstate & (PG_STATE_DEGRADED | PG_STATE_BACKFILL_WAIT | PG_STATE_BACKFILL))) )
+        i->change_recovery_force_mode(newstate, false);
+    }
+  }
+}
+
  void OSD::do_recovery(
    PG *pg, epoch_t queued, uint64_t reserved_pushes,
    ThreadPool::TPHandle &handle)
diff --git a/src/osd/OSD.h b/src/osd/OSD.h

index 6da927e1affd73443b91b253b77f84a1e0a0db0d..38942c44aac47dd57097c63504a79e011c2f375f 100644 (file)
--- a/src/osd/OSD.h
+++ b/src/osd/OSD.h
@@ -962,6 +962,7 @@ public:
      _queue_for_recovery(make_pair(queued, pg), reserved_pushes);
    }
  
+  void adjust_pg_priorities(vector<PG*> pgs, int newflags);
  
    // osd map cache (past osd maps)
    Mutex map_cache_lock;
@@ -2170,6 +2171,8 @@ protected:
    void handle_pg_backfill_reserve(OpRequestRef op);
    void handle_pg_recovery_reserve(OpRequestRef op);
  
+  void handle_force_recovery(Message *m);
+
    void handle_pg_remove(OpRequestRef op);
    void _remove_pg(PG *pg);
  
diff --git a/src/osd/PG.cc b/src/osd/PG.cc

index 49eb61498b0444ee3d4ac03357b3b4c1286a8920..eb8dab61cd034ccda2001c05ebf37b6a02ac7b58 100644 (file)
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -2037,6 +2037,19 @@ void PG::mark_clean()
    kick_snap_trim();
  }
  
+void PG::change_recovery_force_mode(int new_mode, bool clear)
+{
+  lock(true);
+  if (clear) {
+    state_clear(new_mode);
+  } else {
+    state_set(new_mode);
+  }
+  publish_stats_to_osd();
+
+  unlock();
+}
+
  inline int PG::clamp_recovery_priority(int priority)
  {
    static_assert(OSD_RECOVERY_PRIORITY_MIN < OSD_RECOVERY_PRIORITY_MAX, "Invalid priority range");
@@ -2063,7 +2076,7 @@ unsigned PG::get_recovery_priority()
      pool.info.opts.get(pool_opts_t::RECOVERY_PRIORITY, &ret);
      ret = clamp_recovery_priority(OSD_RECOVERY_PRIORITY_BASE + ret);
    }
-
+  dout(20) << __func__ << " recovery priority for " << *this << " is " << ret << ", state is " << state << dendl;
    return static_cast<unsigned>(ret);
  }
  
diff --git a/src/osd/PG.h b/src/osd/PG.h

index d986f3f473c58ea87a1e0640c4de610022d19eef..924c930dd83d995ad9eae067588695bf9a7779e2 100644 (file)
--- a/src/osd/PG.h
+++ b/src/osd/PG.h
@@ -982,6 +982,7 @@ public:
    unsigned get_backfill_priority();
  
    void mark_clean();  ///< mark an active pg clean
+  void change_recovery_force_mode(int new_mode, bool clear);
  
    /// return [start,end) bounds for required past_intervals
    static pair<epoch_t, epoch_t> get_required_past_interval_bounds(
author	Piotr Dałek <piotr.dalek@corp.ovh.com>
	Fri, 24 Feb 2017 07:45:30 +0000 (08:45 +0100)
committer	Piotr Dałek <piotr.dalek@corp.ovh.com>
	Thu, 20 Jul 2017 07:35:54 +0000 (09:35 +0200)
src/osd/OSD.cc		patch \| blob \| history
src/osd/OSD.h		patch \| blob \| history
src/osd/PG.cc		patch \| blob \| history
src/osd/PG.h		patch \| blob \| history