From: Samuel Just Date: Wed, 10 May 2023 22:56:49 +0000 (-0700) Subject: src/osd/scrubber: process MapsCompared event syncronously X-Git-Tag: v18.1.0~22^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=7ebdc4b5b54a65789a46e9c00c55a49330d0e1bb;p=ceph-ci.git src/osd/scrubber: process MapsCompared event syncronously Summary: WaitReplica submits two asyncronous events through the OSD scheduler concurrently: DigestUpdate and MapsCompared. This causes the error described in https://tracker.ceph.com/issues/59049 because MapsCompared must be processed first, but they may be scheduled with differing priorities depending on whether there are writes blocked. This patch avoids the problem by simply processing MapsCompared syncronously. Details: Within the WaitReplicas state handler for GotReplicas, we invoke PGScrubber::maps_compare_n_cleanup: sc::result WaitReplicas::react(const GotReplicas&) { ... // maps_compare_n_cleanup() will arrange for MapsCompared event to be // sent: scrbr->maps_compare_n_cleanup(); return discard_event(); void PgScrubber::maps_compare_n_cleanup() { ... auto required_fixes = m_be->scrub_compare_maps( m_end.is_max(), m_listener->sl_get_snap_map_reader()); ... m_osds->queue_scrub_maps_compared(m_pg, Scrub::scrub_prio_t::low_priority); PgScrubber::maps_compare_n_cleanup arranges for the MapsCompared event to be submitted through the OSD schdeduler to be delivered to the scrub state machine later. It also invokes scrub_compare_maps objs_fix_list_t ScrubBackend::scrub_compare_maps( bool max_reached, SnapMapReaderI& snaps_getter) { ... scrub_snapshot_metadata(for_meta_scrub); which in turn invokes scrub_snapshot_metadata void ScrubBackend::scrub_snapshot_metadata(ScrubMap& map) { ... m_scrubber.submit_digest_fixes(this_chunk->missing_digest); which in turn invokes submit_digest_fixes void PrimaryLogScrub::submit_digest_fixes(const digests_fixes_t& fixes) { ... for (auto& [obj, dgs] : fixes) { ... PrimaryLogPG::OpContextUPtr ctx = m_pl_pg->simple_opc_create(obc); ... m_pl_pg->finish_ctx(ctx.get(), pg_log_entry_t::MODIFY); ... ctx->register_on_success([this]() { if ((num_digest_updates_pending >= 1) && (--num_digest_updates_pending == 0)) { m_osds->queue_scrub_digest_update(m_pl_pg, m_pl_pg->is_scrub_blocking_ops()); } }); m_pl_pg->simple_opc_submit(std::move(ctx)); } } which submits a sequence of repops updating objects' digests and schduling the DigestUpdate event once they have completed. These two events, DigestUpdate and MapsCompared, are therefore in flight concurrently. However, only one event ordering is actually tolerated -- we must process MapsCompared first in order to be in state WaitDigestUpdate when the DigestUpdate event arrives. This would be fine except that the priority at which we submit the DigestUpdate event may end up with a higher priority if there is currently an op blocked. There are two ways we could solve this. We could ensure that the two are always queued at the same priority and therefore will be processed in order. However, there's actually no reason to process the MapsCompared event asyncronously -- it would be far less brittle to simply process the MapsCompared syncronously and avoid this problem entirely. Fixes: https://tracker.ceph.com/issues/59049 Signed-off-by: Samuel Just (cherry picked from commit 604860e54098b20ea65785dbcf7553dd50b22532) --- diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index c2404ce793b..d826b1e169a 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -1862,12 +1862,6 @@ void OSDService::queue_scrub_got_repl_maps(PG* pg, Scrub::scrub_prio_t with_prio queue_scrub_event_msg(pg, with_priority); } -void OSDService::queue_scrub_maps_compared(PG* pg, Scrub::scrub_prio_t with_priority) -{ - // Resulting scrub event: 'MapsCompared' - queue_scrub_event_msg(pg, with_priority); -} - void OSDService::queue_scrub_replica_pushes(PG *pg, Scrub::scrub_prio_t with_priority) { // Resulting scrub event: 'ReplicaPushesUpd' diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 9f8ac6ea7f6..5914f251a50 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -559,10 +559,6 @@ public: /// Signals that there are more chunks to handle void queue_scrub_next_chunk(PG* pg, Scrub::scrub_prio_t with_priority); - /// Signals that we have finished comparing the maps for this chunk - /// Note: required, as in Crimson this operation is 'futurized'. - void queue_scrub_maps_compared(PG* pg, Scrub::scrub_prio_t with_priority); - void queue_for_rep_scrub(PG* pg, Scrub::scrub_prio_t with_high_priority, unsigned int qu_priority, diff --git a/src/osd/PG.h b/src/osd/PG.h index 42b485e24bb..88c893b350d 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -501,11 +501,6 @@ public: "ReplicaPushesUpd"); } - void scrub_send_maps_compared(epoch_t queued, ThreadPool::TPHandle& handle) - { - forward_scrub_event(&ScrubPgIF::send_maps_compared, queued, "MapsCompared"); - } - void scrub_send_get_next_chunk(epoch_t queued, ThreadPool::TPHandle& handle) { forward_scrub_event(&ScrubPgIF::send_get_next_chunk, queued, "NextChunk"); diff --git a/src/osd/scheduler/OpSchedulerItem.cc b/src/osd/scheduler/OpSchedulerItem.cc index 755bf2d3175..d1abc264a8f 100644 --- a/src/osd/scheduler/OpSchedulerItem.cc +++ b/src/osd/scheduler/OpSchedulerItem.cc @@ -149,15 +149,6 @@ void PGScrubGotReplMaps::run(OSD* osd, pg->unlock(); } -void PGScrubMapsCompared::run(OSD* osd, - OSDShard* sdata, - PGRef& pg, - ThreadPool::TPHandle& handle) -{ - pg->scrub_send_maps_compared(epoch_queued, handle); - pg->unlock(); -} - void PGRepScrub::run(OSD* osd, OSDShard* sdata, PGRef& pg, ThreadPool::TPHandle& handle) { pg->replica_scrub(epoch_queued, activation_index, handle); diff --git a/src/osd/scheduler/OpSchedulerItem.h b/src/osd/scheduler/OpSchedulerItem.h index 3ae176e2b55..a9ec14de332 100644 --- a/src/osd/scheduler/OpSchedulerItem.h +++ b/src/osd/scheduler/OpSchedulerItem.h @@ -435,14 +435,6 @@ class PGScrubGotReplMaps : public PGScrubItem { void run(OSD* osd, OSDShard* sdata, PGRef& pg, ThreadPool::TPHandle& handle) final; }; -class PGScrubMapsCompared : public PGScrubItem { - public: - PGScrubMapsCompared(spg_t pg, epoch_t epoch_queued) - : PGScrubItem{pg, epoch_queued, "PGScrubMapsCompared"} - {} - void run(OSD* osd, OSDShard* sdata, PGRef& pg, ThreadPool::TPHandle& handle) final; -}; - class PGRepScrub : public PGScrubItem { public: PGRepScrub(spg_t pg, epoch_t epoch_queued, Scrub::act_token_t op_token) diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc index 6faf065d20e..4cb4ef8daf3 100644 --- a/src/osd/scrubber/pg_scrubber.cc +++ b/src/osd/scrubber/pg_scrubber.cc @@ -408,16 +408,6 @@ void PgScrubber::send_scrub_is_finished(epoch_t epoch_queued) dout(10) << "scrubber event --<< " << __func__ << dendl; } -void PgScrubber::send_maps_compared(epoch_t epoch_queued) -{ - dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued - << dendl; - - m_fsm->process_event(Scrub::MapsCompared{}); - - dout(10) << "scrubber event --<< " << __func__ << dendl; -} - // ----------------- bool PgScrubber::is_reserving() const @@ -1413,7 +1403,6 @@ void PgScrubber::maps_compare_n_cleanup() // requeue the writes from the chunk that just finished requeue_waiting(); - m_osds->queue_scrub_maps_compared(m_pg, Scrub::scrub_prio_t::low_priority); } Scrub::preemption_t& PgScrubber::get_preemptor() diff --git a/src/osd/scrubber/pg_scrubber.h b/src/osd/scrubber/pg_scrubber.h index 77659377a4e..10e08b72e49 100644 --- a/src/osd/scrubber/pg_scrubber.h +++ b/src/osd/scrubber/pg_scrubber.h @@ -371,8 +371,6 @@ class PgScrubber : public ScrubPgIF, void send_local_map_done(epoch_t epoch_queued) final; - void send_maps_compared(epoch_t epoch_queued) final; - void send_get_next_chunk(epoch_t epoch_queued) final; void send_scrub_is_finished(epoch_t epoch_queued) final; diff --git a/src/osd/scrubber/scrub_machine.cc b/src/osd/scrubber/scrub_machine.cc index d63c50fafc6..c372c7ede9e 100644 --- a/src/osd/scrubber/scrub_machine.cc +++ b/src/osd/scrubber/scrub_machine.cc @@ -451,11 +451,8 @@ sc::result WaitReplicas::react(const GotReplicas&) return transit(); } else { - - // maps_compare_n_cleanup() will arrange for MapsCompared event to be - // sent: scrbr->maps_compare_n_cleanup(); - return discard_event(); + return transit(); } } else { return discard_event(); diff --git a/src/osd/scrubber/scrub_machine.h b/src/osd/scrubber/scrub_machine.h index 3303124f0cb..038668fb264 100644 --- a/src/osd/scrubber/scrub_machine.h +++ b/src/osd/scrubber/scrub_machine.h @@ -105,9 +105,6 @@ MEV(IntLocalMapDone) /// scrub_snapshot_metadata() MEV(DigestUpdate) -/// maps_compare_n_cleanup() transactions are done -MEV(MapsCompared) - /// initiating replica scrub MEV(StartReplica) @@ -337,7 +334,6 @@ struct WaitReplicas : sc::state, NamedSimply { using reactions = mpl::list< // all replicas are accounted for: sc::custom_reaction, - sc::transition, sc::custom_reaction>; sc::result react(const GotReplicas&); diff --git a/src/osd/scrubber_common.h b/src/osd/scrubber_common.h index b50b280b08e..a15ea65969f 100644 --- a/src/osd/scrubber_common.h +++ b/src/osd/scrubber_common.h @@ -235,8 +235,6 @@ struct ScrubPgIF { virtual void send_scrub_is_finished(epoch_t epoch_queued) = 0; - virtual void send_maps_compared(epoch_t epoch_queued) = 0; - virtual void on_applied_when_primary(const eversion_t& applied_version) = 0; // --------------------------------------------------