]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/PG: move more recovery logic into PG
authorSage Weil <sage@redhat.com>
Sun, 17 Sep 2017 22:29:16 +0000 (17:29 -0500)
committerSage Weil <sage@redhat.com>
Fri, 6 Oct 2017 18:08:18 +0000 (13:08 -0500)
I suspect we eventually want to move the create_context and
dispatch_context into OSDService (if it isn't there already) and move
even more of this logic into PG.

Signed-off-by: Sage Weil <sage@redhat.com>
src/osd/OSD.cc
src/osd/PG.cc
src/osd/PG.h

index 342051a1c6a0498afdbe8dc7c23c863c6d7b6cba..de71ab9855f96f25afac50f01bdd9cbadb97017e 100644 (file)
@@ -8859,55 +8859,19 @@ void OSD::do_recovery(
     dout(20) << "  active was " << service.recovery_oids[pg->pg_id] << dendl;
 #endif
 
-    bool more = pg->start_recovery_ops(reserved_pushes, handle, &started);
+    bool wip = pg->start_recovery_ops(reserved_pushes, handle, &started);
     dout(10) << "do_recovery started " << started << "/" << reserved_pushes 
             << " on " << *pg << dendl;
 
     // If no recovery op is started, don't bother to manipulate the RecoveryCtx
-    if (!started && (more || !pg->have_unfound())) {
+    if (!started && (wip || !pg->have_unfound())) {
       goto out;
     }
 
     PG::RecoveryCtx rctx = create_context();
     rctx.handle = &handle;
-
-    /*
-     * if we couldn't start any recovery ops and things are still
-     * unfound, see if we can discover more missing object locations.
-     * It may be that our initial locations were bad and we errored
-     * out while trying to pull.
-     */
-    if (!more && pg->have_unfound()) {
-      pg->discover_all_missing(*rctx.query_map);
-      if (rctx.query_map->empty()) {
-       string action;
-        if (pg->state_test(PG_STATE_BACKFILLING)) {
-         auto evt = PG::CephPeeringEvtRef(new PG::CephPeeringEvt(
-           queued,
-           queued,
-           PG::DeferBackfill(cct->_conf->osd_recovery_retry_interval)));
-         pg->queue_peering_event(evt);
-         action = "in backfill";
-        } else if (pg->state_test(PG_STATE_RECOVERING)) {
-         auto evt = PG::CephPeeringEvtRef(new PG::CephPeeringEvt(
-           queued,
-           queued,
-           PG::DeferRecovery(cct->_conf->osd_recovery_retry_interval)));
-         pg->queue_peering_event(evt);
-         action = "in recovery";
-       } else {
-         action = "already out of recovery/backfill";
-       }
-       dout(10) << __func__ << ": no luck, giving up on this pg for now (" << action << ")" << dendl;
-      } else {
-       dout(10) << __func__ << ": no luck, giving up on this pg for now (queue_recovery)" << dendl;
-       pg->queue_recovery();
-      }
-    }
-
-    pg->write_if_dirty(*rctx.transaction);
-    OSDMapRef curmap = pg->get_osdmap();
-    dispatch_context(rctx, pg, curmap);
+    pg->stuck_on_unfound(queued, wip, &rctx);
+    dispatch_context(rctx, pg, pg->get_osdmap());
   }
 
  out:
index 36058c439b6412e055158479d65760a6ff43f460..05779f5eaf366dce61e1a28de11e89f5d9f43f71 100644 (file)
@@ -5722,6 +5722,46 @@ void PG::queue_query(epoch_t msg_epoch,
                                         MQuery(from, q, query_epoch))));
 }
 
+void PG::stuck_on_unfound(epoch_t queued, bool wip, RecoveryCtx *rctx)
+{
+  /*
+    * if we couldn't start any recovery ops and things are still
+    * unfound, see if we can discover more missing object locations.
+    * It may be that our initial locations were bad and we errored
+    * out while trying to pull.
+    */
+  if (!wip && have_unfound()) {
+    discover_all_missing(*rctx->query_map);
+    if (rctx->query_map->empty()) {
+      string action;
+      if (state_test(PG_STATE_BACKFILLING)) {
+       auto evt = PG::CephPeeringEvtRef(
+         new PG::CephPeeringEvt(
+           queued,
+           queued,
+           PG::DeferBackfill(cct->_conf->osd_recovery_retry_interval)));
+       queue_peering_event(evt);
+       action = "in backfill";
+      } else if (state_test(PG_STATE_RECOVERING)) {
+       auto evt = PG::CephPeeringEvtRef(
+         new PG::CephPeeringEvt(
+           queued,
+           queued,
+           PG::DeferRecovery(cct->_conf->osd_recovery_retry_interval)));
+       queue_peering_event(evt);
+       action = "in recovery";
+      } else {
+       action = "already out of recovery/backfill";
+      }
+      dout(10) << __func__ << ": no luck, giving up on this pg for now (" << action << ")" << dendl;
+    } else {
+      dout(10) << __func__ << ": no luck, giving up on this pg for now (queue_recovery)" << dendl;
+      queue_recovery();
+    }
+  }
+  write_if_dirty(*rctx->transaction);
+}
+
 void PG::handle_advance_map(
   OSDMapRef osdmap, OSDMapRef lastmap,
   vector<int>& newup, int up_primary,
index 4c64a0e60010531c51c0d1c5de20c4e623a63d2b..fe4571477cabaeed459fa3d3d09939a09dd6b60c 100644 (file)
@@ -410,6 +410,18 @@ public:
 
   void handle_pg_trim(epoch_t epoch, int from, shard_id_t shard, eversion_t trim_to);
 
+  /**
+   * @param ops_begun returns how many recovery ops the function started
+   * @returns true if any useful work was accomplished; false otherwise
+   */
+  virtual bool start_recovery_ops(
+    uint64_t max,
+    ThreadPool::TPHandle &handle,
+    uint64_t *ops_begun) = 0;
+
+  // more work after the above, but with a RecoveryCtx
+  void stuck_on_unfound(epoch_t queued, bool wip, RecoveryCtx *rctx);
+
   virtual void get_watchers(std::list<obj_watch_item_t> *ls) = 0;
 
   void dump_pgstate_history(Formatter *f);
@@ -1342,15 +1354,6 @@ protected:
 
   virtual void check_local() = 0;
 
-  /**
-   * @param ops_begun returns how many recovery ops the function started
-   * @returns true if any useful work was accomplished; false otherwise
-   */
-  virtual bool start_recovery_ops(
-    uint64_t max,
-    ThreadPool::TPHandle &handle,
-    uint64_t *ops_begun) = 0;
-
   void purge_strays();
 
   void update_heartbeat_peers();