]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/os/seastore: break down submit phases
authorMatan Breizman <mbreizma@redhat.com>
Sun, 7 Jun 2026 12:11:49 +0000 (12:11 +0000)
committerMatan Breizman <mbreizma@redhat.com>
Sun, 7 Jun 2026 12:22:02 +0000 (12:22 +0000)
Signed-off-by: Matan Breizman <mbreizma@redhat.com>
src/crimson/os/seastore/seastore.cc
src/crimson/os/seastore/seastore.h
src/crimson/os/seastore/transaction.h
src/crimson/os/seastore/transaction_manager.cc

index 2464032ffae85f55753a8347461c723195ae24a2..3d88bb1c405a4faab4c643e5ff372d07fcdd7021 100644 (file)
@@ -226,10 +226,16 @@ void SeaStore::Shard::register_metrics(store_index_t store_index)
   );
 
   std::pair<txn_stage_t, sm::label_instance> labels_by_stage[] = {
-    {txn_stage_t::COLLOCK_WAIT,   sm::label_instance("stage", "collock_wait")},
-    {txn_stage_t::THROTTLER_WAIT, sm::label_instance("stage", "throttler_wait")},
-    {txn_stage_t::BUILD,          sm::label_instance("stage", "build")},
-    {txn_stage_t::SUBMIT,         sm::label_instance("stage", "submit")},
+    {txn_stage_t::COLLOCK_WAIT,          sm::label_instance("stage", "collock_wait")},
+    {txn_stage_t::THROTTLER_WAIT,        sm::label_instance("stage", "throttler_wait")},
+    {txn_stage_t::BUILD,                 sm::label_instance("stage", "build")},
+    {txn_stage_t::SUBMIT_TOTAL,          sm::label_instance("stage", "submit_total")},
+    {txn_stage_t::SUBMIT_RESERVE,        sm::label_instance("stage", "submit_reserve")},
+    {txn_stage_t::SUBMIT_OOL_WRITE,      sm::label_instance("stage", "submit_ool_write")},
+    {txn_stage_t::SUBMIT_LBA_UPDATE,     sm::label_instance("stage", "submit_lba_update")},
+    {txn_stage_t::SUBMIT_PREPARE_ENTER,  sm::label_instance("stage", "submit_prepare_enter")},
+    {txn_stage_t::SUBMIT_PREPARE_RECORD, sm::label_instance("stage", "submit_prepare_record")},
+    {txn_stage_t::SUBMIT_JOURNAL,        sm::label_instance("stage", "submit_journal")},
   };
   for (auto& [stage, label] : labels_by_stage) {
     auto idx = static_cast<std::size_t>(stage);
@@ -1774,7 +1780,16 @@ seastar::future<> SeaStore::Shard::do_transaction_no_callbacks(
   add_stage_latency_sample(txn_stage_t::COLLOCK_WAIT, collock_wait);
   add_stage_latency_sample(txn_stage_t::THROTTLER_WAIT, throttler_wait);
   add_stage_latency_sample(txn_stage_t::BUILD, ctx.build_time);
-  add_stage_latency_sample(txn_stage_t::SUBMIT, ctx.submit_time);
+  add_stage_latency_sample(txn_stage_t::SUBMIT_TOTAL, ctx.submit_time);
+  {
+    auto& pd = ctx.transaction->get_phase_durations();
+    add_stage_latency_sample(txn_stage_t::SUBMIT_RESERVE, pd.reserve);
+    add_stage_latency_sample(txn_stage_t::SUBMIT_OOL_WRITE, pd.ool_write);
+    add_stage_latency_sample(txn_stage_t::SUBMIT_LBA_UPDATE, pd.lba_update);
+    add_stage_latency_sample(txn_stage_t::SUBMIT_PREPARE_ENTER, pd.prepare_enter);
+    add_stage_latency_sample(txn_stage_t::SUBMIT_PREPARE_RECORD, pd.prepare_record);
+    add_stage_latency_sample(txn_stage_t::SUBMIT_JOURNAL, pd.journal);
+  }
   add_latency_sample(
     op_type_t::DO_TRANSACTION,
     std::chrono::steady_clock::now() - ctx.begin_timestamp);
index 61c59b8d554eb135553f505397d69fa5657a2a04..0cb63205052d2ea0d0df6dec0d8b52d4954b184f 100644 (file)
@@ -51,7 +51,14 @@ enum class txn_stage_t : uint8_t {
     COLLOCK_WAIT = 0,  // waiting on the collection ordering_lock
     THROTTLER_WAIT,    // waiting for a throttler slot
     BUILD,             // building the transaction (_do_transaction_step loop)
-    SUBMIT,            // submit_transaction (pipeline + journal write)
+    SUBMIT_TOTAL,      // the whole submit_transaction (pipeline + journal write)
+    // Sub-phases of submit_transaction:
+    SUBMIT_RESERVE,        // enter(reserve_projected_usage) + epm reserve_projected_usage
+    SUBMIT_OOL_WRITE,      // write_delayed + write_preallocated OOL extents (device I/O)
+    SUBMIT_LBA_UPDATE,     // update_lba_mappings
+    SUBMIT_PREPARE_ENTER,  // enter(prepare) pipeline stage (global OrderedExclusive wait)
+    SUBMIT_PREPARE_RECORD, // prepare_record (record encoding)
+    SUBMIT_JOURNAL,        // journal->submit_record -- POST-lock (not part of the hold)
     MAX
 };
 
index 8fa2d011f5e12dcf055b00c85742d09f2a85b01c..3c2be49db9ac09bcec7eb31efe9263a9815ad531 100644 (file)
@@ -3,6 +3,7 @@
 
 #pragma once
 
+#include <chrono>
 #include <iostream>
 
 #include <boost/intrusive/list.hpp>
@@ -470,6 +471,19 @@ public:
     return num_replays;
   }
 
+  // Time spent in each sub-phase of submit_transaction, accumulated across retries
+  struct phase_durations_t {
+    std::chrono::steady_clock::duration reserve{0};         // enter reserve + epm reserve
+    std::chrono::steady_clock::duration ool_write{0};       // delayed + preallocated OOL writes
+    std::chrono::steady_clock::duration lba_update{0};      // update_lba_mappings
+    std::chrono::steady_clock::duration prepare_enter{0};   // enter(prepare) pipeline stage
+    std::chrono::steady_clock::duration prepare_record{0};  // prepare_record
+    std::chrono::steady_clock::duration journal{0};         // journal->submit_record (post-lock)
+  };
+  phase_durations_t &get_phase_durations() {
+    return phase_durations;
+  }
+
   auto &get_handle() {
     return handle;
   }
@@ -890,6 +904,8 @@ private:
 
   std::size_t num_replays = 0;
 
+  phase_durations_t phase_durations;
+
   bool has_reset = false;
 
   OrderingHandle handle;
index af5d32930ac5e4ac6414a82d068d4653dbeacf36..f0fd3509cea5e2479b1ff22fbe7b1e2c2620b9bb 100644 (file)
@@ -471,6 +471,7 @@ TransactionManager::submit_transaction(
 {
   LOG_PREFIX(TransactionManager::submit_transaction);
   SUBDEBUGT(seastore_t, "start, entering reserve_projected_usage", t);
+  auto reserve_start = std::chrono::steady_clock::now();
   co_await trans_intr::make_interruptible(
     t.get_handle().enter(write_pipeline.reserve_projected_usage)
   );
@@ -483,6 +484,8 @@ TransactionManager::submit_transaction(
       projected_usage
     )
   );
+  t.get_phase_durations().reserve +=
+    std::chrono::steady_clock::now() - reserve_start;
   auto release_usage = seastar::defer([this, FNAME, projected_usage, &t] {
     SUBTRACET(seastore_t, "releasing projected_usage: {}", t, projected_usage);
     epm->release_projected_usage(projected_usage);
@@ -593,21 +596,33 @@ TransactionManager::do_submit_transaction(
   );
 
   SUBTRACET(seastore_t, "write delayed ool extents", tref);
+  auto ool_start = std::chrono::steady_clock::now();
   co_await epm->write_delayed_ool_extents(
     tref, dispatch_result.alloc_map
   );
+  tref.get_phase_durations().ool_write +=
+    std::chrono::steady_clock::now() - ool_start;
 
   auto allocated_extents = tref.get_valid_pre_alloc_list();
+  auto lba_start = std::chrono::steady_clock::now();
   co_await update_lba_mappings(tref, allocated_extents);
+  tref.get_phase_durations().lba_update +=
+    std::chrono::steady_clock::now() - lba_start;
 
   auto num_extents = allocated_extents.size();
   SUBTRACET(seastore_t, "process {} allocated extents", tref, num_extents);
+  ool_start = std::chrono::steady_clock::now();
   co_await epm->write_preallocated_ool_extents(tref, allocated_extents);
+  tref.get_phase_durations().ool_write +=
+    std::chrono::steady_clock::now() - ool_start;
 
   SUBTRACET(seastore_t, "entering prepare", tref);
+  auto prepare_enter_start = std::chrono::steady_clock::now();
   co_await trans_intr::make_interruptible(
     tref.get_handle().enter(write_pipeline.prepare)
   );
+  tref.get_phase_durations().prepare_enter +=
+    std::chrono::steady_clock::now() - prepare_enter_start;
 
   while (tref.need_wait_visibility) {
     co_await trans_intr::make_interruptible(seastar::yield());
@@ -617,10 +632,13 @@ TransactionManager::do_submit_transaction(
     cache->trim_backref_bufs(*trim_alloc_to);
   }
 
+  auto prepare_record_start = std::chrono::steady_clock::now();
   auto record = cache->prepare_record(
     tref,
     journal->get_trimmer().get_journal_head(),
     journal->get_trimmer().get_dirty_tail());
+  tref.get_phase_durations().prepare_record +=
+    std::chrono::steady_clock::now() - prepare_record_start;
 
   tref.get_handle().maybe_release_collection_lock();
   if (tref.get_src() == Transaction::src_t::MUTATE) {
@@ -629,6 +647,7 @@ TransactionManager::do_submit_transaction(
   }
 
   SUBTRACET(seastore_t, "submitting record", tref);
+  auto journal_start = std::chrono::steady_clock::now();
   co_await journal->submit_record(
     std::move(record),
     tref.get_handle(),
@@ -648,6 +667,8 @@ TransactionManager::do_submit_transaction(
       submit_transaction_iertr::pass_further{},
       crimson::ct_error::assert_all("Hit error submitting to journal")
     );
+  tref.get_phase_durations().journal +=
+    std::chrono::steady_clock::now() - journal_start;
 
   co_await trans_intr::make_interruptible(
     tref.get_handle().complete()