From 480b7c6e19d9061fa279a0255eaab3a532101f42 Mon Sep 17 00:00:00 2001 From: Matan Breizman Date: Wed, 2 Apr 2025 09:30:45 +0000 Subject: [PATCH] crimson/osd/../client_request: add note to can_serve_replica_read can_serve_replica_read uses PGLog::has_write_since, when checking for writes we actually check if any pglog entry belongs to the head object. The only two pglog entries that are of a clone object are: 1) At creation (pg_log_entry_t::CLONE) 2) At trimming (See remove_or_update) In both cases, the there would be another pg log entry of the head. --- Add assertion in prepare_head_update to assert that the above is true. The obc passed to prepare_head_update (by OpsExecuter) could also be a clone object (after being resolved). However, write operations should only occur to head - so let's verify that. Signed-off-by: Matan Breizman --- src/crimson/osd/ops_executer.cc | 1 + src/crimson/osd/osd_operations/client_request.cc | 2 ++ 2 files changed, 3 insertions(+) diff --git a/src/crimson/osd/ops_executer.cc b/src/crimson/osd/ops_executer.cc index 991c16d7aaea8..e289a87733ae4 100644 --- a/src/crimson/osd/ops_executer.cc +++ b/src/crimson/osd/ops_executer.cc @@ -903,6 +903,7 @@ pg_log_entry_t OpsExecuter::prepare_head_update( { LOG_PREFIX(OpsExecuter::prepare_head_update); assert(obc->obs.oi.soid.snap >= CEPH_MAXSNAP); + assert(obc->obs.oi.soid.is_head()); update_clone_overlap(); if (cloning_ctx) { diff --git a/src/crimson/osd/osd_operations/client_request.cc b/src/crimson/osd/osd_operations/client_request.cc index f7a6b81d374ce..e97525f9ad13b 100644 --- a/src/crimson/osd/osd_operations/client_request.cc +++ b/src/crimson/osd/osd_operations/client_request.cc @@ -205,6 +205,8 @@ ClientRequest::interruptible_future<> ClientRequest::with_pg_process_interruptib co_await reply_op_error(pgref, -EAGAIN); co_return; } else if (!pg.get_peering_state().can_serve_replica_read(m->get_hobj())) { + // Note: can_serve_replica_read checks for writes on the head object + // as writes can only occur to head. DEBUGDPP("{}.{}: unstable write on replica, bouncing to primary", pg, *this, this_instance_id); pg.get_perf_logger().inc(l_osd_replica_read_redirect_conflict); -- 2.39.5