]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
src/osd/scrubber: process MapsCompared event syncronously 51498/head
authorSamuel Just <sjust@redhat.com>
Wed, 10 May 2023 22:56:49 +0000 (15:56 -0700)
committerSamuel Just <sjust@redhat.com>
Tue, 16 May 2023 14:40:44 +0000 (14:40 +0000)
Summary:

WaitReplica submits two asyncronous events through the OSD scheduler
concurrently: DigestUpdate and MapsCompared.  This causes the error
described in https://tracker.ceph.com/issues/59049 because MapsCompared
must be processed first, but they may be scheduled with differing
priorities depending on whether there are writes blocked.  This patch
avoids the problem by simply processing MapsCompared syncronously.

Details:

Within the WaitReplicas state handler for GotReplicas, we invoke
PGScrubber::maps_compare_n_cleanup:

sc::result WaitReplicas::react(const GotReplicas&)
{
  ...
      // maps_compare_n_cleanup() will arrange for MapsCompared event to be
      // sent:
      scrbr->maps_compare_n_cleanup();
      return discard_event();

void PgScrubber::maps_compare_n_cleanup()
{
  ...
  auto required_fixes =
    m_be->scrub_compare_maps(
      m_end.is_max(),
      m_listener->sl_get_snap_map_reader());
  ...
  m_osds->queue_scrub_maps_compared(m_pg, Scrub::scrub_prio_t::low_priority);

PgScrubber::maps_compare_n_cleanup arranges for the MapsCompared event
to be submitted through the OSD schdeduler to be delivered to the scrub
state machine later.  It also invokes scrub_compare_maps

objs_fix_list_t ScrubBackend::scrub_compare_maps(
  bool max_reached,
  SnapMapReaderI& snaps_getter)
{
  ...
  scrub_snapshot_metadata(for_meta_scrub);

which in turn invokes scrub_snapshot_metadata

void ScrubBackend::scrub_snapshot_metadata(ScrubMap& map)
{
  ...
  m_scrubber.submit_digest_fixes(this_chunk->missing_digest);

which in turn invokes submit_digest_fixes

void PrimaryLogScrub::submit_digest_fixes(const digests_fixes_t& fixes)
{
  ...
  for (auto& [obj, dgs] : fixes) {
    ...
    PrimaryLogPG::OpContextUPtr ctx = m_pl_pg->simple_opc_create(obc);
    ...
    m_pl_pg->finish_ctx(ctx.get(), pg_log_entry_t::MODIFY);
    ...
    ctx->register_on_success([this]() {
      if ((num_digest_updates_pending >= 1) &&
  (--num_digest_updates_pending == 0)) {
m_osds->queue_scrub_digest_update(m_pl_pg,
  m_pl_pg->is_scrub_blocking_ops());
      }
    });

    m_pl_pg->simple_opc_submit(std::move(ctx));
  }
}

which submits a sequence of repops updating objects' digests and
schduling the DigestUpdate event once they have completed.

These two events, DigestUpdate and MapsCompared, are therefore in flight
concurrently.  However, only one event ordering is actually tolerated --
we must process MapsCompared first in order to be in state
WaitDigestUpdate when the DigestUpdate event arrives.  This would be
fine except that the priority at which we submit the DigestUpdate event
may end up with a higher priority if there is currently an op blocked.

There are two ways we could solve this.  We could ensure that the two
are always queued at the same priority and therefore will be processed
in order.  However, there's actually no reason to process the
MapsCompared event asyncronously -- it would be far less brittle to
simply process the MapsCompared syncronously and avoid this problem
entirely.

Fixes: https://tracker.ceph.com/issues/59049
Signed-off-by: Samuel Just <sjust@redhat.com>
src/osd/OSD.cc
src/osd/OSD.h
src/osd/PG.h
src/osd/scheduler/OpSchedulerItem.cc
src/osd/scheduler/OpSchedulerItem.h
src/osd/scrubber/pg_scrubber.cc
src/osd/scrubber/pg_scrubber.h
src/osd/scrubber/scrub_machine.cc
src/osd/scrubber/scrub_machine.h
src/osd/scrubber_common.h

index 6ad11ed98921ede7733e7bc683939f0be3033dc1..25b760c499536b412c755724fc506aec0c594737 100644 (file)
@@ -1862,12 +1862,6 @@ void OSDService::queue_scrub_got_repl_maps(PG* pg, Scrub::scrub_prio_t with_prio
   queue_scrub_event_msg<PGScrubGotReplMaps>(pg, with_priority);
 }
 
-void OSDService::queue_scrub_maps_compared(PG* pg, Scrub::scrub_prio_t with_priority)
-{
-  // Resulting scrub event: 'MapsCompared'
-  queue_scrub_event_msg<PGScrubMapsCompared>(pg, with_priority);
-}
-
 void OSDService::queue_scrub_replica_pushes(PG *pg, Scrub::scrub_prio_t with_priority)
 {
   // Resulting scrub event: 'ReplicaPushesUpd'
index 9f8ac6ea7f6afb558f96479237751c8d98367153..5914f251a506273d818d141277bd650d9557402d 100644 (file)
@@ -559,10 +559,6 @@ public:
   /// Signals that there are more chunks to handle
   void queue_scrub_next_chunk(PG* pg, Scrub::scrub_prio_t with_priority);
 
-  /// Signals that we have finished comparing the maps for this chunk
-  /// Note: required, as in Crimson this operation is 'futurized'.
-  void queue_scrub_maps_compared(PG* pg, Scrub::scrub_prio_t with_priority);
-
   void queue_for_rep_scrub(PG* pg,
                           Scrub::scrub_prio_t with_high_priority,
                           unsigned int qu_priority,
index dc346b8be027c81201144c1deabff82490a402e3..217ee1ad2ef05f9a7051f65f0bfa263f894702cd 100644 (file)
@@ -503,11 +503,6 @@ public:
                        "ReplicaPushesUpd");
   }
 
-  void scrub_send_maps_compared(epoch_t queued, ThreadPool::TPHandle& handle)
-  {
-    forward_scrub_event(&ScrubPgIF::send_maps_compared, queued, "MapsCompared");
-  }
-
   void scrub_send_get_next_chunk(epoch_t queued, ThreadPool::TPHandle& handle)
   {
     forward_scrub_event(&ScrubPgIF::send_get_next_chunk, queued, "NextChunk");
index 755bf2d31757b1f8f5c978c537d7661386ff4eea..d1abc264a8f8c5a1f491e8cc646c488a0a59264c 100644 (file)
@@ -149,15 +149,6 @@ void PGScrubGotReplMaps::run(OSD* osd,
   pg->unlock();
 }
 
-void PGScrubMapsCompared::run(OSD* osd,
-                            OSDShard* sdata,
-                            PGRef& pg,
-                            ThreadPool::TPHandle& handle)
-{
-  pg->scrub_send_maps_compared(epoch_queued, handle);
-  pg->unlock();
-}
-
 void PGRepScrub::run(OSD* osd, OSDShard* sdata, PGRef& pg, ThreadPool::TPHandle& handle)
 {
   pg->replica_scrub(epoch_queued, activation_index, handle);
index 3ae176e2b551bbde9f4fcc5166a798425a1ad13a..a9ec14de332c6af4575d38bd5d22374a49fcfa1d 100644 (file)
@@ -435,14 +435,6 @@ class PGScrubGotReplMaps : public PGScrubItem {
   void run(OSD* osd, OSDShard* sdata, PGRef& pg, ThreadPool::TPHandle& handle) final;
 };
 
-class PGScrubMapsCompared : public PGScrubItem {
- public:
-  PGScrubMapsCompared(spg_t pg, epoch_t epoch_queued)
-    : PGScrubItem{pg, epoch_queued, "PGScrubMapsCompared"}
-  {}
-  void run(OSD* osd, OSDShard* sdata, PGRef& pg, ThreadPool::TPHandle& handle) final;
-};
-
 class PGRepScrub : public PGScrubItem {
  public:
   PGRepScrub(spg_t pg, epoch_t epoch_queued, Scrub::act_token_t op_token)
index 56de19496ba03d886ee9265a897ad4aa6f82c9b2..327e9f2b2f87533515e45f2fe398c447dec9c35b 100644 (file)
@@ -421,16 +421,6 @@ void PgScrubber::send_scrub_is_finished(epoch_t epoch_queued)
   dout(10) << "scrubber event --<< " << __func__ << dendl;
 }
 
-void PgScrubber::send_maps_compared(epoch_t epoch_queued)
-{
-  dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued
-          << dendl;
-
-  m_fsm->process_event(Scrub::MapsCompared{});
-
-  dout(10) << "scrubber event --<< " << __func__ << dendl;
-}
-
 // -----------------
 
 bool PgScrubber::is_reserving() const
@@ -1377,7 +1367,6 @@ void PgScrubber::maps_compare_n_cleanup()
 
   // requeue the writes from the chunk that just finished
   requeue_waiting();
-  m_osds->queue_scrub_maps_compared(m_pg, Scrub::scrub_prio_t::low_priority);
 }
 
 Scrub::preemption_t& PgScrubber::get_preemptor()
index b44bf40e500ad01a6d8dbf81483249d2399ca791..42ac2b48807c414002f129555658f2a4dcb2ebad 100644 (file)
@@ -322,8 +322,6 @@ class PgScrubber : public ScrubPgIF,
 
   void send_local_map_done(epoch_t epoch_queued) final;
 
-  void send_maps_compared(epoch_t epoch_queued) final;
-
   void send_get_next_chunk(epoch_t epoch_queued) final;
 
   void send_scrub_is_finished(epoch_t epoch_queued) final;
index 9acb9c215e8ab96ffcf31894543c013a841615af..23fd08fef4a2c8a577a5d022ec71ccb5c011e8bb 100644 (file)
@@ -538,11 +538,8 @@ sc::result WaitReplicas::react(const GotReplicas&)
       return transit<PendingTimer>();
 
     } else {
-
-      // maps_compare_n_cleanup() will arrange for MapsCompared event to be
-      // sent:
       scrbr->maps_compare_n_cleanup();
-      return discard_event();
+      return transit<WaitDigestUpdate>();
     }
   } else {
     return discard_event();
index 7bea55c9235d5f27e141e6af303a8c8569878d1c..ffa6f4e6d3bf76366e306fb78ecfd4167f88e870 100644 (file)
@@ -112,9 +112,6 @@ MEV(IntLocalMapDone)
 /// scrub_snapshot_metadata()
 MEV(DigestUpdate)
 
-/// maps_compare_n_cleanup() transactions are done
-MEV(MapsCompared)
-
 /// event emitted when the replica grants a reservation to the primary
 MEV(ReplicaGrantReservation)
 
@@ -490,7 +487,6 @@ struct WaitReplicas : sc::state<WaitReplicas, ActiveScrubbing>, NamedSimply {
   using reactions = mpl::list<
     // all replicas are accounted for:
     sc::custom_reaction<GotReplicas>,
-    sc::transition<MapsCompared, WaitDigestUpdate>,
     sc::custom_reaction<DigestUpdate>>;
 
   sc::result react(const GotReplicas&);
index e1bd8c8eb0f6cf34218386f834c538248b5a7a97..945b77eb3fe1b6c0781edff4cb247715c13d18ca 100644 (file)
@@ -233,8 +233,6 @@ struct ScrubPgIF {
 
   virtual void send_scrub_is_finished(epoch_t epoch_queued) = 0;
 
-  virtual void send_maps_compared(epoch_t epoch_queued) = 0;
-
   virtual void on_applied_when_primary(const eversion_t& applied_version) = 0;
 
   // --------------------------------------------------