PG: switch the start_recovery_ops interface to specify work to do as a param

author Greg Farnum <greg@inktank.com>

Tue, 22 Oct 2013 00:36:04 +0000 (17:36 -0700)

committer Samuel Just <sam.just@inktank.com>

Sun, 27 Oct 2013 17:40:32 +0000 (10:40 -0700)
author Greg Farnum <greg@inktank.com>
Tue, 22 Oct 2013 00:36:04 +0000 (17:36 -0700)
committer Samuel Just <sam.just@inktank.com>
Sun, 27 Oct 2013 17:40:32 +0000 (10:40 -0700)
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc

index fabe6da30b869586022ef13ff25ca1dd6ef03e2d..eb5191f770e15ab047d06421d80854cd732aa3ef 100644 (file)
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -6762,7 +6762,9 @@ void OSD::do_recovery(PG *pg, ThreadPool::TPHandle &handle)
  #endif
      
      PG::RecoveryCtx rctx = create_context();
-    int started = pg->start_recovery_ops(max, &rctx, handle);
+
+    int started;
+    bool more = pg->start_recovery_ops(max, &rctx, handle, &started);
      dout(10) << "do_recovery started " << started << "/" << max << " on " << *pg << dendl;
  
      /*
@@ -6771,7 +6773,7 @@ void OSD::do_recovery(PG *pg, ThreadPool::TPHandle &handle)
       * It may be that our initial locations were bad and we errored
       * out while trying to pull.
       */
-    if (!started && pg->have_unfound()) {
+    if (!more && pg->have_unfound()) {
        pg->discover_all_missing(*rctx.query_map);
        if (rctx.query_map->empty()) {
         dout(10) << "do_recovery  no luck, giving up on this pg for now" << dendl;
diff --git a/src/osd/PG.h b/src/osd/PG.h

index dc11638fd4b9e2aef4cf26d6b8a7b4c7d77eb1cf..9f982167c91b86d7831a1b3576a918f6381b8b52 100644 (file)
--- a/src/osd/PG.h
+++ b/src/osd/PG.h
@@ -645,9 +645,14 @@ public:
  
    virtual void check_local() = 0;
  
-  virtual int start_recovery_ops(
+  /**
+   * @param ops_begun returns how many recovery ops the function started
+   * @returns true if any useful work was accomplished; false otherwise
+   */
+  virtual bool start_recovery_ops(
      int max, RecoveryCtx *prctx,
-    ThreadPool::TPHandle &handle) = 0;
+    ThreadPool::TPHandle &handle,
+    int *ops_begun) = 0;
  
    void purge_strays();
  
diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc

index 5a0bb43642646f3de0c0e904db46b31ca3720ff1..cdbc5a82718a063cd1941f5e09e935dd937ede42 100644 (file)
--- a/src/osd/ReplicatedPG.cc
+++ b/src/osd/ReplicatedPG.cc
@@ -7496,17 +7496,22 @@ void ReplicatedPG::check_recovery_sources(const OSDMapRef osdmap)
  }
    
  
-int ReplicatedPG::start_recovery_ops(
+bool ReplicatedPG::start_recovery_ops(
    int max, RecoveryCtx *prctx,
-  ThreadPool::TPHandle &handle)
+  ThreadPool::TPHandle &handle,
+  int *ops_started)
  {
-  int started = 0;
+  int& started = *ops_started;
+  started = 0;
+  bool work_in_progress = false;
    assert(is_primary());
  
    if (!state_test(PG_STATE_RECOVERING) &&
        !state_test(PG_STATE_BACKFILL)) {
+    /* TODO: I think this case is broken and will make do_recovery()
+     * unhappy since we're returning false */
      dout(10) << "recovery raced and were queued twice, ignoring!" << dendl;
-    return 0;
+    return false;
    }
  
    const pg_missing_t &missing = pg_log.get_missing();
@@ -7532,6 +7537,9 @@ int ReplicatedPG::start_recovery_ops(
      started = recover_replicas(max, handle);
    }
  
+  if (started)
+    work_in_progress = true;
+
    bool deferred_backfill = false;
    if (recovering.empty() &&
        state_test(PG_STATE_BACKFILL) &&
@@ -7555,7 +7563,7 @@ int ReplicatedPG::start_recovery_ops(
        }
        deferred_backfill = true;
      } else {
-      started += recover_backfill(max - started, handle);
+      started += recover_backfill(max - started, handle, &work_in_progress);
      }
    }
  
@@ -7563,8 +7571,8 @@ int ReplicatedPG::start_recovery_ops(
    osd->logger->inc(l_osd_rop, started);
  
    if (!recovering.empty() ||
-      started || recovery_ops_active > 0 || deferred_backfill)
-    return started;
+      work_in_progress || recovery_ops_active > 0 || deferred_backfill)
+    return work_in_progress;
  
    assert(recovering.empty());
    assert(recovery_ops_active == 0);
@@ -7572,21 +7580,21 @@ int ReplicatedPG::start_recovery_ops(
    int unfound = get_num_unfound();
    if (unfound) {
      dout(10) << " still have " << unfound << " unfound" << dendl;
-    return started;
+    return work_in_progress;
    }
  
    if (missing.num_missing() > 0) {
      // this shouldn't happen!
      osd->clog.error() << info.pgid << " recovery ending with " << missing.num_missing()
                       << ": " << missing.missing << "\n";
-    return started;
+    return work_in_progress;
    }
  
    if (needs_recovery()) {
      // this shouldn't happen!
      // We already checked num_missing() so we must have missing replicas
      osd->clog.error() << info.pgid << " recovery ending with missing replicas\n";
-    return started;
+    return work_in_progress;
    }
  
    if (state_test(PG_STATE_RECOVERING)) {
@@ -7937,7 +7945,7 @@ int ReplicatedPG::recover_replicas(int max, ThreadPool::TPHandle &handle)
   */
  int ReplicatedPG::recover_backfill(
    int max,
-  ThreadPool::TPHandle &handle)
+  ThreadPool::TPHandle &handle, bool *work_started)
  {
    dout(10) << "recover_backfill (" << max << ")" << dendl;
    assert(backfill_target >= 0);
@@ -8117,6 +8125,8 @@ int ReplicatedPG::recover_backfill(
  
    dout(10) << " peer num_objects now " << pinfo.stats.stats.sum.num_objects
            << " / " << info.stats.stats.sum.num_objects << dendl;
+  if (ops)
+    *work_started = true;
    return ops;
  }
  
diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h

index c7e73eafa521fb689eff53c52ecc19a47293a093..f36138079a0b524b3b39ee4282cdb2941ffb7386 100644 (file)
--- a/src/osd/ReplicatedPG.h
+++ b/src/osd/ReplicatedPG.h
@@ -898,13 +898,18 @@ protected:
    void _clear_recovery_state();
  
    void queue_for_recovery();
-  int start_recovery_ops(
+  bool start_recovery_ops(
      int max, RecoveryCtx *prctx,
-    ThreadPool::TPHandle &handle);
+    ThreadPool::TPHandle &handle, int *started);
  
    int recover_primary(int max, ThreadPool::TPHandle &handle);
    int recover_replicas(int max, ThreadPool::TPHandle &handle);
-  int recover_backfill(int max, ThreadPool::TPHandle &handle);
+  /**
+   * @param work_started will be set to true if recover_backfill got anywhere
+   * @returns the number of operations started
+   */
+  int recover_backfill(int max, ThreadPool::TPHandle &handle,
+                       bool *work_started);
  
    /**
     * scan a (hash) range of objects in the current pg
author	Greg Farnum <greg@inktank.com>
	Tue, 22 Oct 2013 00:36:04 +0000 (17:36 -0700)
committer	Samuel Just <sam.just@inktank.com>
	Sun, 27 Oct 2013 17:40:32 +0000 (10:40 -0700)
src/osd/OSD.cc		patch \| blob \| history
src/osd/PG.h		patch \| blob \| history
src/osd/ReplicatedPG.cc		patch \| blob \| history
src/osd/ReplicatedPG.h		patch \| blob \| history