]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
osd: Generalise can_serve_replica_read for consumption by EC.
authorAlex Ainscow <aainscow@uk.ibm.com>
Fri, 3 Oct 2025 13:15:32 +0000 (14:15 +0100)
committerAlex Ainscow <aainscow@uk.ibm.com>
Tue, 14 Oct 2025 10:42:14 +0000 (11:42 +0100)
The can_serve_replica_read() function is called by replica to determine whether there are
any uncommitted writes.  If such writes exist, then the system will reject the IO to avoid
the risk of reading data from a write which may yet be rolled back.

The same code is going to be useful for EC direct reads.

The string_view code is not expensive.

Signed-off-by: Alex Ainscow <aainscow@uk.ibm.com>
src/crimson/osd/osd_operations/client_request.cc
src/messages/MOSDRepOp.h
src/osd/PeeringState.cc
src/osd/PeeringState.h
src/osd/PrimaryLogPG.cc

index 38c1b890123ad869f2fa42846ffe2e0b4c9312de..1073b55d7a20bebe31a0dc84280072b1c004462d 100644 (file)
@@ -209,8 +209,8 @@ ClientRequest::interruptible_future<> ClientRequest::with_pg_process_interruptib
       pg.get_perf_logger().inc(l_osd_replica_read_redirect_missing);
       co_await reply_op_error(pgref, -EAGAIN);
       co_return;
-    } else if (!pg.get_peering_state().can_serve_replica_read(m->get_hobj())) {
-      // Note: can_serve_replica_read checks for writes on the head object
+    } else if (!pg.get_peering_state().can_serve_read(m->get_hobj())) {
+      // Note: can_serve_read checks for writes on the head object
       //       as writes can only occur to head.
       DEBUGDPP("{}.{}: unstable write on replica, bouncing to primary",
               pg, *this, this_instance_id);
index fb6247bca53701a3930de4504baecf5801dd517f..5d001998b2a4896bf37215c6bbc9d5e0e9be23d5 100644 (file)
@@ -65,7 +65,7 @@ public:
    * Because updates <= pg_committed_to cannot become divergent, replicas
    * may safely serve reads on objects which do not have more recent updates.
    *
-   * See PeeringState::pg_committed_to, PeeringState::can_serve_replica_read
+   * See PeeringState::pg_committed_to, PeeringState::can_serve_read
    *
    * Historical note: Prior to early 2024, this field was named
    * min_last_complete_ondisk.  The replica, however, only actually relied on
index 25d53a010e74cde05a8a5c5a4df38c630ce4b37b..698b44ed5fa38cedc193fa8d925e4e0d16bc356b 100644 (file)
@@ -1549,17 +1549,21 @@ bool PeeringState::needs_backfill() const
 }
 
 /**
-* Returns whether a particular object can be safely read on this replica
+* Returns whether a particular object can be safely read
 */
-bool PeeringState::can_serve_replica_read(const hobject_t &hoid)
+bool PeeringState::can_serve_read(const hobject_t &hoid)
 {
   ceph_assert(!is_primary());
+  std::string_view storage_object = "replica";
+  if (pool.info.is_erasure()) {
+    storage_object = "shard";
+  }
   if (!pg_log.get_log().has_write_since(
       hoid, pg_committed_to)) {
-    psdout(20) << "can be safely read on this replica" << dendl;
+    psdout(20) << "can be safely read on this " << storage_object << dendl;
     return true;
   } else {
-    psdout(20) << "can't read object on this replica" << dendl;
+    psdout(20) << "can't read object on this " << storage_object << dendl;
     return false;
   }
 }
index a0c55750ac11b5aef35b5f7dfada10b4c1d12d69..226ba4281dca90a15f2fac8d4b30e44aad479170 100644 (file)
@@ -2441,7 +2441,7 @@ public:
   bool needs_recovery() const;
   bool needs_backfill() const;
 
-  bool can_serve_replica_read(const hobject_t &hoid);
+  bool can_serve_read(const hobject_t &hoid);
 
   /**
    * Returns whether the current acting set is able to go active
index f5e7273ed3ab5cdaff09a3a8ea24b0ea5e26c0ec..b808ce95ebd60ad589dcdb7e811ee4ad8ff1280b 100644 (file)
@@ -2352,15 +2352,20 @@ void PrimaryLogPG::do_op(OpRequestRef& op)
   }
 
   if (!is_primary()) {
-    if (!recovery_state.can_serve_replica_read(oid)) {
+    if (!recovery_state.can_serve_read(oid)) {
+      std::string_view storage_object = "replica";
+      if (pool.info.is_erasure()) {
+        storage_object = "shard";
+      }
       dout(20) << __func__
-               << ": unstable write on replica, bouncing to primary "
+               << ": unstable write on " << storage_object
+               << ", bouncing to primary "
               << *m << dendl;
       osd->logger->inc(l_osd_replica_read_redirect_conflict);
       osd->reply_op_error(op, -EAGAIN);
       return;
     }
-    dout(20) << __func__ << ": serving replica read on oid " << oid
+    dout(20) << __func__ << ": serving read on oid " << oid
              << dendl;
     osd->logger->inc(l_osd_replica_read_served);
   }