From: Alex Ainscow Date: Fri, 3 Oct 2025 13:15:32 +0000 (+0100) Subject: osd: Generalise can_serve_replica_read for consumption by EC. X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=ce0b1ed93d1345d372d1b742f86e4cf683a9329e;p=ceph.git osd: Generalise can_serve_replica_read for consumption by EC. The can_serve_replica_read() function is called by replica to determine whether there are any uncommitted writes. If such writes exist, then the system will reject the IO to avoid the risk of reading data from a write which may yet be rolled back. The same code is going to be useful for EC direct reads. The string_view code is not expensive. Signed-off-by: Alex Ainscow --- diff --git a/src/crimson/osd/osd_operations/client_request.cc b/src/crimson/osd/osd_operations/client_request.cc index 38c1b890123a..1073b55d7a20 100644 --- a/src/crimson/osd/osd_operations/client_request.cc +++ b/src/crimson/osd/osd_operations/client_request.cc @@ -209,8 +209,8 @@ ClientRequest::interruptible_future<> ClientRequest::with_pg_process_interruptib pg.get_perf_logger().inc(l_osd_replica_read_redirect_missing); co_await reply_op_error(pgref, -EAGAIN); co_return; - } else if (!pg.get_peering_state().can_serve_replica_read(m->get_hobj())) { - // Note: can_serve_replica_read checks for writes on the head object + } else if (!pg.get_peering_state().can_serve_read(m->get_hobj())) { + // Note: can_serve_read checks for writes on the head object // as writes can only occur to head. DEBUGDPP("{}.{}: unstable write on replica, bouncing to primary", pg, *this, this_instance_id); diff --git a/src/messages/MOSDRepOp.h b/src/messages/MOSDRepOp.h index fb6247bca537..5d001998b2a4 100644 --- a/src/messages/MOSDRepOp.h +++ b/src/messages/MOSDRepOp.h @@ -65,7 +65,7 @@ public: * Because updates <= pg_committed_to cannot become divergent, replicas * may safely serve reads on objects which do not have more recent updates. * - * See PeeringState::pg_committed_to, PeeringState::can_serve_replica_read + * See PeeringState::pg_committed_to, PeeringState::can_serve_read * * Historical note: Prior to early 2024, this field was named * min_last_complete_ondisk. The replica, however, only actually relied on diff --git a/src/osd/PeeringState.cc b/src/osd/PeeringState.cc index 7a122925da83..53041078adc9 100644 --- a/src/osd/PeeringState.cc +++ b/src/osd/PeeringState.cc @@ -1546,17 +1546,21 @@ bool PeeringState::needs_backfill() const } /** -* Returns whether a particular object can be safely read on this replica +* Returns whether a particular object can be safely read */ -bool PeeringState::can_serve_replica_read(const hobject_t &hoid) +bool PeeringState::can_serve_read(const hobject_t &hoid) { ceph_assert(!is_primary()); + std::string_view storage_object = "replica"; + if (pool.info.is_erasure()) { + storage_object = "shard"; + } if (!pg_log.get_log().has_write_since( hoid, pg_committed_to)) { - psdout(20) << "can be safely read on this replica" << dendl; + psdout(20) << "can be safely read on this " << storage_object << dendl; return true; } else { - psdout(20) << "can't read object on this replica" << dendl; + psdout(20) << "can't read object on this " << storage_object << dendl; return false; } } diff --git a/src/osd/PeeringState.h b/src/osd/PeeringState.h index 9b442a58c54b..9e386188ba46 100644 --- a/src/osd/PeeringState.h +++ b/src/osd/PeeringState.h @@ -2439,7 +2439,7 @@ public: bool needs_recovery() const; bool needs_backfill() const; - bool can_serve_replica_read(const hobject_t &hoid); + bool can_serve_read(const hobject_t &hoid); /** * Returns whether the current acting set is able to go active diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index d5afb804d7e9..37c5d670cf64 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -2352,15 +2352,20 @@ void PrimaryLogPG::do_op(OpRequestRef& op) } if (!is_primary()) { - if (!recovery_state.can_serve_replica_read(oid)) { + if (!recovery_state.can_serve_read(oid)) { + std::string_view storage_object = "replica"; + if (pool.info.is_erasure()) { + storage_object = "shard"; + } dout(20) << __func__ - << ": unstable write on replica, bouncing to primary " + << ": unstable write on " << storage_object + << ", bouncing to primary " << *m << dendl; osd->logger->inc(l_osd_replica_read_redirect_conflict); osd->reply_op_error(op, -EAGAIN); return; } - dout(20) << __func__ << ": serving replica read on oid " << oid + dout(20) << __func__ << ": serving read on oid " << oid << dendl; osd->logger->inc(l_osd_replica_read_served); }