From c3edc9ce0d9ec17525c1b22df528179f7462b3b7 Mon Sep 17 00:00:00 2001 From: Alex Ainscow Date: Fri, 3 Oct 2025 14:15:32 +0100 Subject: [PATCH] osd: Generalise can_serve_replica_read for consumption by EC. The can_serve_replica_read() function is called by replica to determine whether there are any uncommitted writes. If such writes exist, then the system will reject the IO to avoid the risk of reading data from a write which may yet be rolled back. The same code is going to be useful for EC direct reads. The string_view code is not expensive. Signed-off-by: Alex Ainscow --- src/crimson/osd/osd_operations/client_request.cc | 4 ++-- src/messages/MOSDRepOp.h | 2 +- src/osd/PeeringState.cc | 12 ++++++++---- src/osd/PeeringState.h | 2 +- src/osd/PrimaryLogPG.cc | 11 ++++++++--- 5 files changed, 20 insertions(+), 11 deletions(-) diff --git a/src/crimson/osd/osd_operations/client_request.cc b/src/crimson/osd/osd_operations/client_request.cc index 38c1b890123..1073b55d7a2 100644 --- a/src/crimson/osd/osd_operations/client_request.cc +++ b/src/crimson/osd/osd_operations/client_request.cc @@ -209,8 +209,8 @@ ClientRequest::interruptible_future<> ClientRequest::with_pg_process_interruptib pg.get_perf_logger().inc(l_osd_replica_read_redirect_missing); co_await reply_op_error(pgref, -EAGAIN); co_return; - } else if (!pg.get_peering_state().can_serve_replica_read(m->get_hobj())) { - // Note: can_serve_replica_read checks for writes on the head object + } else if (!pg.get_peering_state().can_serve_read(m->get_hobj())) { + // Note: can_serve_read checks for writes on the head object // as writes can only occur to head. DEBUGDPP("{}.{}: unstable write on replica, bouncing to primary", pg, *this, this_instance_id); diff --git a/src/messages/MOSDRepOp.h b/src/messages/MOSDRepOp.h index fb6247bca53..5d001998b2a 100644 --- a/src/messages/MOSDRepOp.h +++ b/src/messages/MOSDRepOp.h @@ -65,7 +65,7 @@ public: * Because updates <= pg_committed_to cannot become divergent, replicas * may safely serve reads on objects which do not have more recent updates. * - * See PeeringState::pg_committed_to, PeeringState::can_serve_replica_read + * See PeeringState::pg_committed_to, PeeringState::can_serve_read * * Historical note: Prior to early 2024, this field was named * min_last_complete_ondisk. The replica, however, only actually relied on diff --git a/src/osd/PeeringState.cc b/src/osd/PeeringState.cc index 25d53a010e7..698b44ed5fa 100644 --- a/src/osd/PeeringState.cc +++ b/src/osd/PeeringState.cc @@ -1549,17 +1549,21 @@ bool PeeringState::needs_backfill() const } /** -* Returns whether a particular object can be safely read on this replica +* Returns whether a particular object can be safely read */ -bool PeeringState::can_serve_replica_read(const hobject_t &hoid) +bool PeeringState::can_serve_read(const hobject_t &hoid) { ceph_assert(!is_primary()); + std::string_view storage_object = "replica"; + if (pool.info.is_erasure()) { + storage_object = "shard"; + } if (!pg_log.get_log().has_write_since( hoid, pg_committed_to)) { - psdout(20) << "can be safely read on this replica" << dendl; + psdout(20) << "can be safely read on this " << storage_object << dendl; return true; } else { - psdout(20) << "can't read object on this replica" << dendl; + psdout(20) << "can't read object on this " << storage_object << dendl; return false; } } diff --git a/src/osd/PeeringState.h b/src/osd/PeeringState.h index a0c55750ac1..226ba4281dc 100644 --- a/src/osd/PeeringState.h +++ b/src/osd/PeeringState.h @@ -2441,7 +2441,7 @@ public: bool needs_recovery() const; bool needs_backfill() const; - bool can_serve_replica_read(const hobject_t &hoid); + bool can_serve_read(const hobject_t &hoid); /** * Returns whether the current acting set is able to go active diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index f5e7273ed3a..b808ce95ebd 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -2352,15 +2352,20 @@ void PrimaryLogPG::do_op(OpRequestRef& op) } if (!is_primary()) { - if (!recovery_state.can_serve_replica_read(oid)) { + if (!recovery_state.can_serve_read(oid)) { + std::string_view storage_object = "replica"; + if (pool.info.is_erasure()) { + storage_object = "shard"; + } dout(20) << __func__ - << ": unstable write on replica, bouncing to primary " + << ": unstable write on " << storage_object + << ", bouncing to primary " << *m << dendl; osd->logger->inc(l_osd_replica_read_redirect_conflict); osd->reply_op_error(op, -EAGAIN); return; } - dout(20) << __func__ << ": serving replica read on oid " << oid + dout(20) << __func__ << ": serving read on oid " << oid << dendl; osd->logger->inc(l_osd_replica_read_served); } -- 2.39.5