]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
src/osd/scrubber: process MapsCompared event syncronously
authorSamuel Just <sjust@redhat.com>
Wed, 10 May 2023 22:56:49 +0000 (15:56 -0700)
committerSamuel Just <sjust@redhat.com>
Mon, 22 May 2023 20:20:44 +0000 (13:20 -0700)
Summary:

WaitReplica submits two asyncronous events through the OSD scheduler
concurrently: DigestUpdate and MapsCompared.  This causes the error
described in https://tracker.ceph.com/issues/59049 because MapsCompared
must be processed first, but they may be scheduled with differing
priorities depending on whether there are writes blocked.  This patch
avoids the problem by simply processing MapsCompared syncronously.

Details:

Within the WaitReplicas state handler for GotReplicas, we invoke
PGScrubber::maps_compare_n_cleanup:

sc::result WaitReplicas::react(const GotReplicas&)
{
  ...
      // maps_compare_n_cleanup() will arrange for MapsCompared event to be
      // sent:
      scrbr->maps_compare_n_cleanup();
      return discard_event();

void PgScrubber::maps_compare_n_cleanup()
{
  ...
  auto required_fixes =
    m_be->scrub_compare_maps(
      m_end.is_max(),
      m_listener->sl_get_snap_map_reader());
  ...
  m_osds->queue_scrub_maps_compared(m_pg, Scrub::scrub_prio_t::low_priority);

PgScrubber::maps_compare_n_cleanup arranges for the MapsCompared event
to be submitted through the OSD schdeduler to be delivered to the scrub
state machine later.  It also invokes scrub_compare_maps

objs_fix_list_t ScrubBackend::scrub_compare_maps(
  bool max_reached,
  SnapMapReaderI& snaps_getter)
{
  ...
  scrub_snapshot_metadata(for_meta_scrub);

which in turn invokes scrub_snapshot_metadata

void ScrubBackend::scrub_snapshot_metadata(ScrubMap& map)
{
  ...
  m_scrubber.submit_digest_fixes(this_chunk->missing_digest);

which in turn invokes submit_digest_fixes

void PrimaryLogScrub::submit_digest_fixes(const digests_fixes_t& fixes)
{
  ...
  for (auto& [obj, dgs] : fixes) {
    ...
    PrimaryLogPG::OpContextUPtr ctx = m_pl_pg->simple_opc_create(obc);
    ...
    m_pl_pg->finish_ctx(ctx.get(), pg_log_entry_t::MODIFY);
    ...
    ctx->register_on_success([this]() {
      if ((num_digest_updates_pending >= 1) &&
  (--num_digest_updates_pending == 0)) {
m_osds->queue_scrub_digest_update(m_pl_pg,
  m_pl_pg->is_scrub_blocking_ops());
      }
    });

    m_pl_pg->simple_opc_submit(std::move(ctx));
  }
}

which submits a sequence of repops updating objects' digests and
schduling the DigestUpdate event once they have completed.

These two events, DigestUpdate and MapsCompared, are therefore in flight
concurrently.  However, only one event ordering is actually tolerated --
we must process MapsCompared first in order to be in state
WaitDigestUpdate when the DigestUpdate event arrives.  This would be
fine except that the priority at which we submit the DigestUpdate event
may end up with a higher priority if there is currently an op blocked.

There are two ways we could solve this.  We could ensure that the two
are always queued at the same priority and therefore will be processed
in order.  However, there's actually no reason to process the
MapsCompared event asyncronously -- it would be far less brittle to
simply process the MapsCompared syncronously and avoid this problem
entirely.

Fixes: https://tracker.ceph.com/issues/59049
Signed-off-by: Samuel Just <sjust@redhat.com>
(cherry picked from commit 604860e54098b20ea65785dbcf7553dd50b22532)

src/osd/OSD.cc
src/osd/OSD.h
src/osd/PG.h
src/osd/scheduler/OpSchedulerItem.cc
src/osd/scheduler/OpSchedulerItem.h
src/osd/scrubber/pg_scrubber.cc
src/osd/scrubber/pg_scrubber.h
src/osd/scrubber/scrub_machine.cc
src/osd/scrubber/scrub_machine.h
src/osd/scrubber_common.h

index c2404ce793b38256d2625660629bd08df7553d14..d826b1e169a370b4e7e41562e46a471782458ae3 100644 (file)
@@ -1862,12 +1862,6 @@ void OSDService::queue_scrub_got_repl_maps(PG* pg, Scrub::scrub_prio_t with_prio
   queue_scrub_event_msg<PGScrubGotReplMaps>(pg, with_priority);
 }
 
-void OSDService::queue_scrub_maps_compared(PG* pg, Scrub::scrub_prio_t with_priority)
-{
-  // Resulting scrub event: 'MapsCompared'
-  queue_scrub_event_msg<PGScrubMapsCompared>(pg, with_priority);
-}
-
 void OSDService::queue_scrub_replica_pushes(PG *pg, Scrub::scrub_prio_t with_priority)
 {
   // Resulting scrub event: 'ReplicaPushesUpd'
index 9f8ac6ea7f6afb558f96479237751c8d98367153..5914f251a506273d818d141277bd650d9557402d 100644 (file)
@@ -559,10 +559,6 @@ public:
   /// Signals that there are more chunks to handle
   void queue_scrub_next_chunk(PG* pg, Scrub::scrub_prio_t with_priority);
 
-  /// Signals that we have finished comparing the maps for this chunk
-  /// Note: required, as in Crimson this operation is 'futurized'.
-  void queue_scrub_maps_compared(PG* pg, Scrub::scrub_prio_t with_priority);
-
   void queue_for_rep_scrub(PG* pg,
                           Scrub::scrub_prio_t with_high_priority,
                           unsigned int qu_priority,
index 42b485e24bb2aed5d02ab63cb5025e2d48251832..88c893b350db393736937ba6764b1ef181e7479c 100644 (file)
@@ -501,11 +501,6 @@ public:
                        "ReplicaPushesUpd");
   }
 
-  void scrub_send_maps_compared(epoch_t queued, ThreadPool::TPHandle& handle)
-  {
-    forward_scrub_event(&ScrubPgIF::send_maps_compared, queued, "MapsCompared");
-  }
-
   void scrub_send_get_next_chunk(epoch_t queued, ThreadPool::TPHandle& handle)
   {
     forward_scrub_event(&ScrubPgIF::send_get_next_chunk, queued, "NextChunk");
index 755bf2d31757b1f8f5c978c537d7661386ff4eea..d1abc264a8f8c5a1f491e8cc646c488a0a59264c 100644 (file)
@@ -149,15 +149,6 @@ void PGScrubGotReplMaps::run(OSD* osd,
   pg->unlock();
 }
 
-void PGScrubMapsCompared::run(OSD* osd,
-                            OSDShard* sdata,
-                            PGRef& pg,
-                            ThreadPool::TPHandle& handle)
-{
-  pg->scrub_send_maps_compared(epoch_queued, handle);
-  pg->unlock();
-}
-
 void PGRepScrub::run(OSD* osd, OSDShard* sdata, PGRef& pg, ThreadPool::TPHandle& handle)
 {
   pg->replica_scrub(epoch_queued, activation_index, handle);
index 3ae176e2b551bbde9f4fcc5166a798425a1ad13a..a9ec14de332c6af4575d38bd5d22374a49fcfa1d 100644 (file)
@@ -435,14 +435,6 @@ class PGScrubGotReplMaps : public PGScrubItem {
   void run(OSD* osd, OSDShard* sdata, PGRef& pg, ThreadPool::TPHandle& handle) final;
 };
 
-class PGScrubMapsCompared : public PGScrubItem {
- public:
-  PGScrubMapsCompared(spg_t pg, epoch_t epoch_queued)
-    : PGScrubItem{pg, epoch_queued, "PGScrubMapsCompared"}
-  {}
-  void run(OSD* osd, OSDShard* sdata, PGRef& pg, ThreadPool::TPHandle& handle) final;
-};
-
 class PGRepScrub : public PGScrubItem {
  public:
   PGRepScrub(spg_t pg, epoch_t epoch_queued, Scrub::act_token_t op_token)
index 6faf065d20e1cf3ab59af2f32b2af80d8b92c124..4cb4ef8daf3a6687cd3cd2b08df3b5f9973505e7 100644 (file)
@@ -408,16 +408,6 @@ void PgScrubber::send_scrub_is_finished(epoch_t epoch_queued)
   dout(10) << "scrubber event --<< " << __func__ << dendl;
 }
 
-void PgScrubber::send_maps_compared(epoch_t epoch_queued)
-{
-  dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued
-          << dendl;
-
-  m_fsm->process_event(Scrub::MapsCompared{});
-
-  dout(10) << "scrubber event --<< " << __func__ << dendl;
-}
-
 // -----------------
 
 bool PgScrubber::is_reserving() const
@@ -1413,7 +1403,6 @@ void PgScrubber::maps_compare_n_cleanup()
 
   // requeue the writes from the chunk that just finished
   requeue_waiting();
-  m_osds->queue_scrub_maps_compared(m_pg, Scrub::scrub_prio_t::low_priority);
 }
 
 Scrub::preemption_t& PgScrubber::get_preemptor()
index 77659377a4e32a84cd1172cecd5169ab9d9cccce..10e08b72e49dc4525eada20c937f42d59926f675 100644 (file)
@@ -371,8 +371,6 @@ class PgScrubber : public ScrubPgIF,
 
   void send_local_map_done(epoch_t epoch_queued) final;
 
-  void send_maps_compared(epoch_t epoch_queued) final;
-
   void send_get_next_chunk(epoch_t epoch_queued) final;
 
   void send_scrub_is_finished(epoch_t epoch_queued) final;
index d63c50fafc6769e7440d90ab2cd7c9b857c684b7..c372c7ede9e587decbdf03b16c59d6472e765e5c 100644 (file)
@@ -451,11 +451,8 @@ sc::result WaitReplicas::react(const GotReplicas&)
       return transit<PendingTimer>();
 
     } else {
-
-      // maps_compare_n_cleanup() will arrange for MapsCompared event to be
-      // sent:
       scrbr->maps_compare_n_cleanup();
-      return discard_event();
+      return transit<WaitDigestUpdate>();
     }
   } else {
     return discard_event();
index 3303124f0cb6e1c7ff7156a5de8bd63a451d58ae..038668fb264ca3988c1eb7541dae4d83069a4242 100644 (file)
@@ -105,9 +105,6 @@ MEV(IntLocalMapDone)
 /// scrub_snapshot_metadata()
 MEV(DigestUpdate)
 
-/// maps_compare_n_cleanup() transactions are done
-MEV(MapsCompared)
-
 /// initiating replica scrub
 MEV(StartReplica)
 
@@ -337,7 +334,6 @@ struct WaitReplicas : sc::state<WaitReplicas, ActiveScrubbing>, NamedSimply {
   using reactions = mpl::list<
     // all replicas are accounted for:
     sc::custom_reaction<GotReplicas>,
-    sc::transition<MapsCompared, WaitDigestUpdate>,
     sc::custom_reaction<DigestUpdate>>;
 
   sc::result react(const GotReplicas&);
index b50b280b08e4fe87808ee4305fe1e737bbf36044..a15ea65969f34d4c1e4e6653de91265395ef0dcf 100644 (file)
@@ -235,8 +235,6 @@ struct ScrubPgIF {
 
   virtual void send_scrub_is_finished(epoch_t epoch_queued) = 0;
 
-  virtual void send_maps_compared(epoch_t epoch_queued) = 0;
-
   virtual void on_applied_when_primary(const eversion_t& applied_version) = 0;
 
   // --------------------------------------------------