From: Matan Breizman Date: Sun, 7 Jun 2026 12:11:49 +0000 (+0000) Subject: crimson/os/seastore: break down submit phases X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=6f7f9779e787c1feef98090710cd0c162d41b2ef;p=ceph.git crimson/os/seastore: break down submit phases Signed-off-by: Matan Breizman --- diff --git a/src/crimson/os/seastore/seastore.cc b/src/crimson/os/seastore/seastore.cc index 2464032ffae..3d88bb1c405 100644 --- a/src/crimson/os/seastore/seastore.cc +++ b/src/crimson/os/seastore/seastore.cc @@ -226,10 +226,16 @@ void SeaStore::Shard::register_metrics(store_index_t store_index) ); std::pair labels_by_stage[] = { - {txn_stage_t::COLLOCK_WAIT, sm::label_instance("stage", "collock_wait")}, - {txn_stage_t::THROTTLER_WAIT, sm::label_instance("stage", "throttler_wait")}, - {txn_stage_t::BUILD, sm::label_instance("stage", "build")}, - {txn_stage_t::SUBMIT, sm::label_instance("stage", "submit")}, + {txn_stage_t::COLLOCK_WAIT, sm::label_instance("stage", "collock_wait")}, + {txn_stage_t::THROTTLER_WAIT, sm::label_instance("stage", "throttler_wait")}, + {txn_stage_t::BUILD, sm::label_instance("stage", "build")}, + {txn_stage_t::SUBMIT_TOTAL, sm::label_instance("stage", "submit_total")}, + {txn_stage_t::SUBMIT_RESERVE, sm::label_instance("stage", "submit_reserve")}, + {txn_stage_t::SUBMIT_OOL_WRITE, sm::label_instance("stage", "submit_ool_write")}, + {txn_stage_t::SUBMIT_LBA_UPDATE, sm::label_instance("stage", "submit_lba_update")}, + {txn_stage_t::SUBMIT_PREPARE_ENTER, sm::label_instance("stage", "submit_prepare_enter")}, + {txn_stage_t::SUBMIT_PREPARE_RECORD, sm::label_instance("stage", "submit_prepare_record")}, + {txn_stage_t::SUBMIT_JOURNAL, sm::label_instance("stage", "submit_journal")}, }; for (auto& [stage, label] : labels_by_stage) { auto idx = static_cast(stage); @@ -1774,7 +1780,16 @@ seastar::future<> SeaStore::Shard::do_transaction_no_callbacks( add_stage_latency_sample(txn_stage_t::COLLOCK_WAIT, collock_wait); add_stage_latency_sample(txn_stage_t::THROTTLER_WAIT, throttler_wait); add_stage_latency_sample(txn_stage_t::BUILD, ctx.build_time); - add_stage_latency_sample(txn_stage_t::SUBMIT, ctx.submit_time); + add_stage_latency_sample(txn_stage_t::SUBMIT_TOTAL, ctx.submit_time); + { + auto& pd = ctx.transaction->get_phase_durations(); + add_stage_latency_sample(txn_stage_t::SUBMIT_RESERVE, pd.reserve); + add_stage_latency_sample(txn_stage_t::SUBMIT_OOL_WRITE, pd.ool_write); + add_stage_latency_sample(txn_stage_t::SUBMIT_LBA_UPDATE, pd.lba_update); + add_stage_latency_sample(txn_stage_t::SUBMIT_PREPARE_ENTER, pd.prepare_enter); + add_stage_latency_sample(txn_stage_t::SUBMIT_PREPARE_RECORD, pd.prepare_record); + add_stage_latency_sample(txn_stage_t::SUBMIT_JOURNAL, pd.journal); + } add_latency_sample( op_type_t::DO_TRANSACTION, std::chrono::steady_clock::now() - ctx.begin_timestamp); diff --git a/src/crimson/os/seastore/seastore.h b/src/crimson/os/seastore/seastore.h index 61c59b8d554..0cb63205052 100644 --- a/src/crimson/os/seastore/seastore.h +++ b/src/crimson/os/seastore/seastore.h @@ -51,7 +51,14 @@ enum class txn_stage_t : uint8_t { COLLOCK_WAIT = 0, // waiting on the collection ordering_lock THROTTLER_WAIT, // waiting for a throttler slot BUILD, // building the transaction (_do_transaction_step loop) - SUBMIT, // submit_transaction (pipeline + journal write) + SUBMIT_TOTAL, // the whole submit_transaction (pipeline + journal write) + // Sub-phases of submit_transaction: + SUBMIT_RESERVE, // enter(reserve_projected_usage) + epm reserve_projected_usage + SUBMIT_OOL_WRITE, // write_delayed + write_preallocated OOL extents (device I/O) + SUBMIT_LBA_UPDATE, // update_lba_mappings + SUBMIT_PREPARE_ENTER, // enter(prepare) pipeline stage (global OrderedExclusive wait) + SUBMIT_PREPARE_RECORD, // prepare_record (record encoding) + SUBMIT_JOURNAL, // journal->submit_record -- POST-lock (not part of the hold) MAX }; diff --git a/src/crimson/os/seastore/transaction.h b/src/crimson/os/seastore/transaction.h index 8fa2d011f5e..3c2be49db9a 100644 --- a/src/crimson/os/seastore/transaction.h +++ b/src/crimson/os/seastore/transaction.h @@ -3,6 +3,7 @@ #pragma once +#include #include #include @@ -470,6 +471,19 @@ public: return num_replays; } + // Time spent in each sub-phase of submit_transaction, accumulated across retries + struct phase_durations_t { + std::chrono::steady_clock::duration reserve{0}; // enter reserve + epm reserve + std::chrono::steady_clock::duration ool_write{0}; // delayed + preallocated OOL writes + std::chrono::steady_clock::duration lba_update{0}; // update_lba_mappings + std::chrono::steady_clock::duration prepare_enter{0}; // enter(prepare) pipeline stage + std::chrono::steady_clock::duration prepare_record{0}; // prepare_record + std::chrono::steady_clock::duration journal{0}; // journal->submit_record (post-lock) + }; + phase_durations_t &get_phase_durations() { + return phase_durations; + } + auto &get_handle() { return handle; } @@ -890,6 +904,8 @@ private: std::size_t num_replays = 0; + phase_durations_t phase_durations; + bool has_reset = false; OrderingHandle handle; diff --git a/src/crimson/os/seastore/transaction_manager.cc b/src/crimson/os/seastore/transaction_manager.cc index af5d32930ac..f0fd3509cea 100644 --- a/src/crimson/os/seastore/transaction_manager.cc +++ b/src/crimson/os/seastore/transaction_manager.cc @@ -471,6 +471,7 @@ TransactionManager::submit_transaction( { LOG_PREFIX(TransactionManager::submit_transaction); SUBDEBUGT(seastore_t, "start, entering reserve_projected_usage", t); + auto reserve_start = std::chrono::steady_clock::now(); co_await trans_intr::make_interruptible( t.get_handle().enter(write_pipeline.reserve_projected_usage) ); @@ -483,6 +484,8 @@ TransactionManager::submit_transaction( projected_usage ) ); + t.get_phase_durations().reserve += + std::chrono::steady_clock::now() - reserve_start; auto release_usage = seastar::defer([this, FNAME, projected_usage, &t] { SUBTRACET(seastore_t, "releasing projected_usage: {}", t, projected_usage); epm->release_projected_usage(projected_usage); @@ -593,21 +596,33 @@ TransactionManager::do_submit_transaction( ); SUBTRACET(seastore_t, "write delayed ool extents", tref); + auto ool_start = std::chrono::steady_clock::now(); co_await epm->write_delayed_ool_extents( tref, dispatch_result.alloc_map ); + tref.get_phase_durations().ool_write += + std::chrono::steady_clock::now() - ool_start; auto allocated_extents = tref.get_valid_pre_alloc_list(); + auto lba_start = std::chrono::steady_clock::now(); co_await update_lba_mappings(tref, allocated_extents); + tref.get_phase_durations().lba_update += + std::chrono::steady_clock::now() - lba_start; auto num_extents = allocated_extents.size(); SUBTRACET(seastore_t, "process {} allocated extents", tref, num_extents); + ool_start = std::chrono::steady_clock::now(); co_await epm->write_preallocated_ool_extents(tref, allocated_extents); + tref.get_phase_durations().ool_write += + std::chrono::steady_clock::now() - ool_start; SUBTRACET(seastore_t, "entering prepare", tref); + auto prepare_enter_start = std::chrono::steady_clock::now(); co_await trans_intr::make_interruptible( tref.get_handle().enter(write_pipeline.prepare) ); + tref.get_phase_durations().prepare_enter += + std::chrono::steady_clock::now() - prepare_enter_start; while (tref.need_wait_visibility) { co_await trans_intr::make_interruptible(seastar::yield()); @@ -617,10 +632,13 @@ TransactionManager::do_submit_transaction( cache->trim_backref_bufs(*trim_alloc_to); } + auto prepare_record_start = std::chrono::steady_clock::now(); auto record = cache->prepare_record( tref, journal->get_trimmer().get_journal_head(), journal->get_trimmer().get_dirty_tail()); + tref.get_phase_durations().prepare_record += + std::chrono::steady_clock::now() - prepare_record_start; tref.get_handle().maybe_release_collection_lock(); if (tref.get_src() == Transaction::src_t::MUTATE) { @@ -629,6 +647,7 @@ TransactionManager::do_submit_transaction( } SUBTRACET(seastore_t, "submitting record", tref); + auto journal_start = std::chrono::steady_clock::now(); co_await journal->submit_record( std::move(record), tref.get_handle(), @@ -648,6 +667,8 @@ TransactionManager::do_submit_transaction( submit_transaction_iertr::pass_further{}, crimson::ct_error::assert_all("Hit error submitting to journal") ); + tref.get_phase_durations().journal += + std::chrono::steady_clock::now() - journal_start; co_await trans_intr::make_interruptible( tref.get_handle().complete()