]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: scrub: wait for digest updates to apply before next scrub chunk 3134/head
authorSage Weil <sage@redhat.com>
Wed, 17 Dec 2014 00:39:35 +0000 (16:39 -0800)
committerSage Weil <sage@redhat.com>
Sat, 20 Dec 2014 15:30:06 +0000 (07:30 -0800)
Wait for any digest updates to apply before we scrub the next chunk.  This
bounds the number of repops we initiate by the size of the scrub chunk, and
it generally nicer to the cluster.

Signed-off-by: Sage Weil <sage@redhat.com>
src/osd/PG.cc
src/osd/PG.h
src/osd/ReplicatedPG.cc
src/osd/ReplicatedPG.h

index fd68039ad37c35a201eee9cf8dad8c85fb324b92..6a42e5f490585196d7f370c5670c0cd3557c2eeb 100644 (file)
@@ -3743,8 +3743,9 @@ void PG::scrub(ThreadPool::TPHandle &handle)
  *  (4) Wait for writes to flush on the chunk
  *  (5) Wait for maps from replicas
  *  (6) Compare / repair all scrub maps
+ *  (7) Wait for digest updates to apply
  *
- * This logic is encoded in the very linear state machine:
+ * This logic is encoded in the mostly linear state machine:
  *
  *           +------------------+
  *  _________v__________        |
@@ -3782,6 +3783,12 @@ void PG::scrub(ThreadPool::TPHandle &handle)
  * |                    |   |   |
  * |    COMPARE_MAPS    |   |   |
  * |____________________|   |   |
+ *           |              |   |
+ *           |              |   |
+ *  _________v__________    |   |
+ * |                    |   |   |
+ * |WAIT_DIGEST_UPDATES |   |   |
+ * |____________________|   |   |
  *           |   |          |   |
  *           |   +----------+   |
  *  _________v__________        |
@@ -4002,8 +4009,21 @@ void PG::chunky_scrub(ThreadPool::TPHandle &handle)
         // requeue the writes from the chunk that just finished
         requeue_ops(waiting_for_active);
 
-        if (scrubber.end < hobject_t::get_max()) {
-          // schedule another leg of the scrub
+       scrubber.state = PG::Scrubber::WAIT_DIGEST_UPDATES;
+
+       // fall-thru
+
+      case PG::Scrubber::WAIT_DIGEST_UPDATES:
+       if (scrubber.num_digest_updates_pending) {
+         dout(10) << __func__ << " waiting on "
+                  << scrubber.num_digest_updates_pending
+                  << " digest updates" << dendl;
+         done = true;
+         break;
+       }
+
+       if (scrubber.end < hobject_t::get_max()) {
+         // schedule another leg of the scrub
           scrubber.start = scrubber.end;
 
           scrubber.state = PG::Scrubber::NEW_CHUNK;
@@ -4013,7 +4033,7 @@ void PG::chunky_scrub(ThreadPool::TPHandle &handle)
           scrubber.state = PG::Scrubber::FINISH;
         }
 
-        break;
+       break;
 
       case PG::Scrubber::FINISH:
         scrub_finish();
index b76178d65bcb3bfe9538b918f469247c20e9a3ca..6cc7aa472b21e352ac9bf5fbc56f9ac8b4e372b0 100644 (file)
@@ -1029,6 +1029,7 @@ public:
       waiting_on(0), shallow_errors(0), deep_errors(0), fixed(0),
       active_rep_scrub(0),
       must_scrub(false), must_deep_scrub(false), must_repair(false),
+      num_digest_updates_pending(0),
       state(INACTIVE),
       deep(false),
       seed(0)
@@ -1066,6 +1067,7 @@ public:
 
     // Objects who need digest updates
     map<hobject_t, pair<uint32_t,uint32_t> > missing_digest;
+    int num_digest_updates_pending;
 
     // chunky scrub
     hobject_t start, end;
@@ -1080,6 +1082,7 @@ public:
       BUILD_MAP,
       WAIT_REPLICAS,
       COMPARE_MAPS,
+      WAIT_DIGEST_UPDATES,
       FINISH,
     } state;
 
@@ -1112,6 +1115,7 @@ public:
         case BUILD_MAP: ret = "BUILD_MAP"; break;
         case WAIT_REPLICAS: ret = "WAIT_REPLICAS"; break;
         case COMPARE_MAPS: ret = "COMPARE_MAPS"; break;
+        case WAIT_DIGEST_UPDATES: ret = "WAIT_DIGEST_UPDATES"; break;
         case FINISH: ret = "FINISH"; break;
       }
       return ret;
@@ -1162,6 +1166,7 @@ public:
       missing.clear();
       authoritative.clear();
       missing_digest.clear();
+      num_digest_updates_pending = 0;
     }
 
   } scrubber;
index 9a6b90884939e6728b581816260c85d62dee8fb9..7c74eaa24348eb2bff3d4cd4816fcf4df6bd4ea1 100644 (file)
@@ -12290,6 +12290,22 @@ bool ReplicatedPG::_range_available_for_scrub(
   return true;
 }
 
+struct C_ScrubDigestUpdated : public Context {
+  ReplicatedPGRef pg;
+  C_ScrubDigestUpdated(ReplicatedPG *pg) : pg(pg) {}
+  void finish(int r) {
+    pg->_scrub_digest_updated();
+  }
+};
+
+void ReplicatedPG::_scrub_digest_updated()
+{
+  dout(20) << __func__ << dendl;
+  if (--scrubber.num_digest_updates_pending == 0) {
+    osd->scrub_wq.queue(this);
+  }
+}
+
 void ReplicatedPG::_scrub(ScrubMap& scrubmap)
 {
   dout(10) << "_scrub" << dendl;
@@ -12516,7 +12532,9 @@ void ReplicatedPG::_scrub(ScrubMap& scrubmap)
       ctx->new_obs.oi.set_data_digest(p->second.first);
       ctx->new_obs.oi.set_omap_digest(p->second.second);
       finish_ctx(ctx, pg_log_entry_t::MODIFY, true, true);
+      ctx->on_finish = new C_ScrubDigestUpdated(this);
       simple_repop_submit(repop);
+      ++scrubber.num_digest_updates_pending;
     }
   }
   
index 65f7773e5926d11794fd2768500c0c0ff4568d22..c6e3a39d36eabc8615e1611265abcbdb3944b302 100644 (file)
@@ -1281,9 +1281,11 @@ protected:
   virtual bool _range_available_for_scrub(
     const hobject_t &begin, const hobject_t &end);
   virtual void _scrub(ScrubMap& map);
+  void _scrub_digest_updated();
   virtual void _scrub_clear_state();
   virtual void _scrub_finish();
   object_stat_collection_t scrub_cstat;
+  friend class C_ScrubDigestUpdated;
 
   virtual void _split_into(pg_t child_pgid, PG *child, unsigned split_bits);
   void apply_and_flush_repops(bool requeue);