From 218de829b1affa8ed413572736fcc3314f65730b Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 16 Dec 2014 16:39:35 -0800 Subject: [PATCH] osd: scrub: wait for digest updates to apply before next scrub chunk Wait for any digest updates to apply before we scrub the next chunk. This bounds the number of repops we initiate by the size of the scrub chunk, and it generally nicer to the cluster. Signed-off-by: Sage Weil --- src/osd/PG.cc | 28 ++++++++++++++++++++++++---- src/osd/PG.h | 5 +++++ src/osd/ReplicatedPG.cc | 18 ++++++++++++++++++ src/osd/ReplicatedPG.h | 2 ++ 4 files changed, 49 insertions(+), 4 deletions(-) diff --git a/src/osd/PG.cc b/src/osd/PG.cc index fd68039ad37c3..6a42e5f490585 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -3743,8 +3743,9 @@ void PG::scrub(ThreadPool::TPHandle &handle) * (4) Wait for writes to flush on the chunk * (5) Wait for maps from replicas * (6) Compare / repair all scrub maps + * (7) Wait for digest updates to apply * - * This logic is encoded in the very linear state machine: + * This logic is encoded in the mostly linear state machine: * * +------------------+ * _________v__________ | @@ -3782,6 +3783,12 @@ void PG::scrub(ThreadPool::TPHandle &handle) * | | | | * | COMPARE_MAPS | | | * |____________________| | | + * | | | + * | | | + * _________v__________ | | + * | | | | + * |WAIT_DIGEST_UPDATES | | | + * |____________________| | | * | | | | * | +----------+ | * _________v__________ | @@ -4002,8 +4009,21 @@ void PG::chunky_scrub(ThreadPool::TPHandle &handle) // requeue the writes from the chunk that just finished requeue_ops(waiting_for_active); - if (scrubber.end < hobject_t::get_max()) { - // schedule another leg of the scrub + scrubber.state = PG::Scrubber::WAIT_DIGEST_UPDATES; + + // fall-thru + + case PG::Scrubber::WAIT_DIGEST_UPDATES: + if (scrubber.num_digest_updates_pending) { + dout(10) << __func__ << " waiting on " + << scrubber.num_digest_updates_pending + << " digest updates" << dendl; + done = true; + break; + } + + if (scrubber.end < hobject_t::get_max()) { + // schedule another leg of the scrub scrubber.start = scrubber.end; scrubber.state = PG::Scrubber::NEW_CHUNK; @@ -4013,7 +4033,7 @@ void PG::chunky_scrub(ThreadPool::TPHandle &handle) scrubber.state = PG::Scrubber::FINISH; } - break; + break; case PG::Scrubber::FINISH: scrub_finish(); diff --git a/src/osd/PG.h b/src/osd/PG.h index b76178d65bcb3..6cc7aa472b21e 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -1029,6 +1029,7 @@ public: waiting_on(0), shallow_errors(0), deep_errors(0), fixed(0), active_rep_scrub(0), must_scrub(false), must_deep_scrub(false), must_repair(false), + num_digest_updates_pending(0), state(INACTIVE), deep(false), seed(0) @@ -1066,6 +1067,7 @@ public: // Objects who need digest updates map > missing_digest; + int num_digest_updates_pending; // chunky scrub hobject_t start, end; @@ -1080,6 +1082,7 @@ public: BUILD_MAP, WAIT_REPLICAS, COMPARE_MAPS, + WAIT_DIGEST_UPDATES, FINISH, } state; @@ -1112,6 +1115,7 @@ public: case BUILD_MAP: ret = "BUILD_MAP"; break; case WAIT_REPLICAS: ret = "WAIT_REPLICAS"; break; case COMPARE_MAPS: ret = "COMPARE_MAPS"; break; + case WAIT_DIGEST_UPDATES: ret = "WAIT_DIGEST_UPDATES"; break; case FINISH: ret = "FINISH"; break; } return ret; @@ -1162,6 +1166,7 @@ public: missing.clear(); authoritative.clear(); missing_digest.clear(); + num_digest_updates_pending = 0; } } scrubber; diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 9a6b90884939e..7c74eaa24348e 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -12290,6 +12290,22 @@ bool ReplicatedPG::_range_available_for_scrub( return true; } +struct C_ScrubDigestUpdated : public Context { + ReplicatedPGRef pg; + C_ScrubDigestUpdated(ReplicatedPG *pg) : pg(pg) {} + void finish(int r) { + pg->_scrub_digest_updated(); + } +}; + +void ReplicatedPG::_scrub_digest_updated() +{ + dout(20) << __func__ << dendl; + if (--scrubber.num_digest_updates_pending == 0) { + osd->scrub_wq.queue(this); + } +} + void ReplicatedPG::_scrub(ScrubMap& scrubmap) { dout(10) << "_scrub" << dendl; @@ -12516,7 +12532,9 @@ void ReplicatedPG::_scrub(ScrubMap& scrubmap) ctx->new_obs.oi.set_data_digest(p->second.first); ctx->new_obs.oi.set_omap_digest(p->second.second); finish_ctx(ctx, pg_log_entry_t::MODIFY, true, true); + ctx->on_finish = new C_ScrubDigestUpdated(this); simple_repop_submit(repop); + ++scrubber.num_digest_updates_pending; } } diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h index 65f7773e5926d..c6e3a39d36eab 100644 --- a/src/osd/ReplicatedPG.h +++ b/src/osd/ReplicatedPG.h @@ -1281,9 +1281,11 @@ protected: virtual bool _range_available_for_scrub( const hobject_t &begin, const hobject_t &end); virtual void _scrub(ScrubMap& map); + void _scrub_digest_updated(); virtual void _scrub_clear_state(); virtual void _scrub_finish(); object_stat_collection_t scrub_cstat; + friend class C_ScrubDigestUpdated; virtual void _split_into(pg_t child_pgid, PG *child, unsigned split_bits); void apply_and_flush_repops(bool requeue); -- 2.39.5