From: Mike Ryan Date: Tue, 26 Jun 2012 23:25:27 +0000 (-0700) Subject: pg: reduce scrub write lock window X-Git-Tag: v0.49~73 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=ecd7ffe7ce6aeab7b03e9c4e8eff26e4fd18ed64;p=ceph.git pg: reduce scrub write lock window Wait for all replicas to construct the base scrub map before finalizing the scrub and locking out writes. Signed-off-by: Mike Ryan --- diff --git a/src/osd/PG.cc b/src/osd/PG.cc index d3c670aa77dc..132c9266e3bc 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -2630,9 +2630,19 @@ void PG::sub_op_scrub_map(OpRequestRef op) scrub_received_maps[from].decode(p, info.pgid.pool()); } - if (--scrub_waiting_on == 0) { - assert(last_update_applied == info.last_update); - osd->scrub_finalize_wq.queue(this); + --scrub_waiting_on; + if (scrub_waiting_on == 0) { + if (finalizing_scrub) { // incremental lists received + osd->scrub_finalize_wq.queue(this); + } else { // initial lists received + scrub_block_writes = true; + if (last_update_applied == info.last_update) { + finalizing_scrub = true; + scrub_gather_replica_maps(); + ++scrub_waiting_on; + osd->scrub_wq.queue(this); + } + } } } @@ -2965,8 +2975,9 @@ void PG::replica_scrub(MOSDRepScrub *msg) * PG_STATE_SCRUBBING is set when the scrub is queued * * Once the initial scrub has completed and the requests have gone out to - * replicas for maps, finalizing_scrub is set. scrub_waiting_on is set to - * the number of maps outstanding (active.size()). + * replicas for maps, we set scrub_active and wait for the replicas to + * complete their maps. Once the maps are received, scrub_block_writes is set. + * scrub_waiting_on is set to the number of maps outstanding (active.size()). * * If last_update_applied is behind the head of the log, scrub returns to be * requeued by op_applied. @@ -2998,8 +3009,10 @@ void PG::scrub() return; } - if (!finalizing_scrub) { + if (!scrub_active) { dout(10) << "scrub start" << dendl; + scrub_active = true; + update_stats(); scrub_received_maps.clear(); scrub_epoch_start = info.history.same_interval_since; @@ -3037,18 +3050,35 @@ void PG::scrub() return; } - finalizing_scrub = true; + --scrub_waiting_on; + + if (scrub_waiting_on == 0) { + // the replicas have completed their scrub map, so lock out writes + scrub_block_writes = true; + } else { + dout(10) << "wait for replicas to build initial scrub map" << dendl; + unlock(); + return; + } + if (last_update_applied != info.last_update) { dout(10) << "wait for cleanup" << dendl; unlock(); return; } + + // fall through if last_update_applied == info.last_update and scrub_waiting_on == 0 + + // request incrementals from replicas + scrub_gather_replica_maps(); + ++scrub_waiting_on; } - dout(10) << "clean up scrub" << dendl; assert(last_update_applied == info.last_update); - + + finalizing_scrub = true; + if (scrub_epoch_start != info.history.same_interval_since) { dout(10) << "scrub pg changed, aborting" << dendl; scrub_clear_state(); @@ -3085,6 +3115,8 @@ void PG::scrub_clear_state() osd->requeue_ops(this, waiting_for_active); finalizing_scrub = false; + scrub_block_writes = false; + scrub_active = false; if (active_rep_scrub) { active_rep_scrub->put(); active_rep_scrub = NULL; diff --git a/src/osd/PG.h b/src/osd/PG.h index fa4be911298a..e7837a4f545f 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -736,7 +736,9 @@ public: // -- scrub -- set scrub_reserved_peers; map scrub_received_maps; - bool finalizing_scrub; + bool finalizing_scrub; + bool scrub_block_writes; + bool scrub_active; bool scrub_reserved, scrub_reserve_failed; int scrub_waiting_on; epoch_t scrub_epoch_start; @@ -1243,6 +1245,8 @@ public: osr(stringify(p)), finish_sync_event(NULL), finalizing_scrub(false), + scrub_block_writes(false), + scrub_active(false), scrub_reserved(false), scrub_reserve_failed(false), scrub_waiting_on(0), active_rep_scrub(0), diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index a2a0efcdce98..3ae32eb407fd 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -614,7 +614,7 @@ void ReplicatedPG::do_op(OpRequestRef op) dout(10) << "do_op " << *m << (m->may_write() ? " may_write" : "") << dendl; - if (finalizing_scrub && m->may_write()) { + if (scrub_block_writes && m->may_write()) { dout(20) << __func__ << ": waiting for scrub" << dendl; waiting_for_active.push_back(op); op->mark_delayed(); @@ -1410,7 +1410,7 @@ bool ReplicatedPG::snap_trimmer() put(); return true; } - if (!finalizing_scrub) { + if (!scrub_block_writes) { dout(10) << "snap_trimmer posting" << dendl; snap_trimmer_machine.process_event(SnapTrim()); } @@ -3408,8 +3408,11 @@ void ReplicatedPG::op_applied(RepGather *repop) assert(info.last_update >= repop->v); assert(last_update_applied < repop->v); last_update_applied = repop->v; - if (last_update_applied == info.last_update && finalizing_scrub) { + if (last_update_applied == info.last_update && scrub_block_writes) { dout(10) << "requeueing scrub for cleanup" << dendl; + finalizing_scrub = true; + scrub_gather_replica_maps(); + ++scrub_waiting_on; osd->scrub_wq.queue(this); } @@ -5681,7 +5684,7 @@ void ReplicatedPG::on_change() clear_scrub_reserved(); // clear scrub state - if (finalizing_scrub) { + if (scrub_block_writes) { scrub_clear_state(); } else if (is_scrubbing()) { state_clear(PG_STATE_SCRUBBING); @@ -6631,7 +6634,7 @@ boost::statechart::result ReplicatedPG::NotTrimming::react(const SnapTrim&) } else if (!pg->is_primary() || !pg->is_active() || !pg->is_clean()) { dout(10) << "NotTrimming not primary, active, clean" << dendl; return discard_event(); - } else if (pg->finalizing_scrub) { + } else if (pg->scrub_block_writes) { dout(10) << "NotTrimming finalizing scrub" << dendl; pg->queue_snap_trim(); return discard_event();