]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/.../snaptrim_event: remove pipeline stages located on event
authorSamuel Just <sjust@redhat.com>
Tue, 28 May 2024 16:48:42 +0000 (09:48 -0700)
committerSamuel Just <sjust@redhat.com>
Thu, 13 Jun 2024 18:44:34 +0000 (11:44 -0700)
WaitSubop, WaitTrimTimer, and WaitRepop are pipeline stages local to
the operation.  As such they don't actually provide any ordering
guarrantees as only one operation will ever enter them.  Rather, the
intent is to hook into the event system to expose information to an
administrator.

This poses a problem for OrderedConcurrentPhase as it is currently
implemented.  PipelineHandle::exit() is invoked prior to the op being
destructed.  PipelineHandle::exit() does:

  void exit() {
    barrier.reset();
  }

For OrderedConcurrentPhase, ~ExitBarrier() invokes ExitBarrier::exit():

    void exit() final {
      if (barrier) {
        assert(phase);
        assert(phase->core == seastar::this_shard_id());
        std::ignore = std::move(*barrier
        ).then([phase=this->phase] {
          phase->mutex.unlock();
        });
        barrier = std::nullopt;
        phase = nullptr;
      } else if (phase) {
        assert(phase->core == seastar::this_shard_id());
        phase->mutex.unlock();
        phase = nullptr;
      }
    }

The problem comes in not waiting for the phase->mutex.unlock() to occur.
For SnapTrimEvent, phase is actually in the operation itself.  It's
possible for that continuation

        ).then([phase=this->phase] {
          phase->mutex.unlock();

to occur after the last finally() in ShardServices::start_operation
completes and releases the final reference to SnapTrimEvent.  This is
harmless normally provided that the PG or connection outlives it,
but it's a problem for these stages.

For now, let's just remove these stages.  We can reintroduce another
mechanism later to set these event flags without an actual pipeline
stage.

This is likely a bug even with pipelines not embedded in an operation,
but we can fix it later -- https://tracker.ceph.com/issues/64545.

Fixes: https://tracker.ceph.com/issues/63647
Signed-off-by: Samuel Just <sjust@redhat.com>
src/crimson/osd/osd_operations/snaptrim_event.cc
src/crimson/osd/osd_operations/snaptrim_event.h

index fabf2b0eb7aa4bc89ff0427249c5367ba2ccde87..99fe42d9a822dce7c9d1f5b71528844e6e7c0177 100644 (file)
@@ -72,6 +72,8 @@ SnapTrimEvent::start()
     handle.exit();
   });
 
+  /* TODO: add a way to expose progress via the optracker without misusing
+   * pipeline stages. https://tracker.ceph.com/issues/66473 */
   ShardServices &shard_services = pg->get_shard_services();
   co_await enter_stage<interruptor>(client_pp().wait_for_active);
 
@@ -124,18 +126,11 @@ SnapTrimEvent::start()
          snapid));
     }
 
-    co_await enter_stage<interruptor>(wait_subop);
-
     logger().debug("{}: awaiting completion", *this);
     co_await subop_blocker.interruptible_wait_completion();
   }
 
   if (needs_pause) {
-    // let's know operators we're waiting
-    co_await enter_stage<interruptor>(
-      wait_trim_timer
-    );
-
     using crimson::common::local_conf;
     const auto time_to_sleep =
       local_conf().template get_val<double>("osd_snap_trim_sleep");
@@ -452,11 +447,7 @@ SnapTrimObjSubEvent::start()
              std::move(log_entries));
            return submitted.then_interruptible(
              [all_completed=std::move(all_completed), this] () mutable {
-               return enter_stage<interruptor>(
-                 wait_repop
-               ).then_interruptible([all_completed=std::move(all_completed)] () mutable {
-                 return std::move(all_completed);
-               });
+               return std::move(all_completed);
              });
          });
        });
index 759e000ff27e6a566323284bf15b4c4f12ff1ab6..6d5df1cca5d3823aeeb4c445764936a38e85eece 100644 (file)
@@ -61,19 +61,6 @@ private:
 
   SubOpBlocker<snap_trim_obj_subevent_ret_t> subop_blocker;
 
-  // we don't need to synchronize with other instances of SnapTrimEvent;
-  // it's here for the sake of op tracking.
-  struct WaitSubop : OrderedConcurrentPhaseT<WaitSubop> {
-    static constexpr auto type_name = "SnapTrimEvent::wait_subop";
-  } wait_subop;
-
-  // an instantiator can instruct us to go over this stage and then
-  // wait for the future to implement throttling. It is implemented
-  // that way to for the sake of tracking ops.
-  struct WaitTrimTimer : OrderedExclusivePhaseT<WaitTrimTimer> {
-    static constexpr auto type_name = "SnapTrimEvent::wait_trim_timer";
-  } wait_trim_timer;
-
   Ref<PG> pg;
   PipelineHandle handle;
   SnapMapper& snap_mapper;
@@ -89,9 +76,7 @@ public:
     PGActivationBlocker::BlockingEvent,
     CommonPGPipeline::GetOBC::BlockingEvent,
     CommonPGPipeline::Process::BlockingEvent,
-    WaitSubop::BlockingEvent,
     PG::BackgroundProcessLock::Wait::BlockingEvent,
-    WaitTrimTimer::BlockingEvent,
     CompletionEvent
   > tracking_events;
 
@@ -153,12 +138,6 @@ private:
   remove_or_update_iertr::future<ceph::os::Transaction>
   remove_or_update(ObjectContextRef obc, ObjectContextRef head_obc);
 
-  // we don't need to synchronize with other instances started by
-  // SnapTrimEvent; it's here for the sake of op tracking.
-  struct WaitRepop : OrderedConcurrentPhaseT<WaitRepop> {
-    static constexpr auto type_name = "SnapTrimObjSubEvent::wait_repop";
-  } wait_repop;
-
   void add_log_entry(
     int _op,
     const hobject_t& _soid,
@@ -195,7 +174,6 @@ public:
     PGActivationBlocker::BlockingEvent,
     CommonPGPipeline::GetOBC::BlockingEvent,
     CommonPGPipeline::Process::BlockingEvent,
-    WaitRepop::BlockingEvent,
     CompletionEvent
   > tracking_events;
 };