]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: Generalise can_serve_replica_read for consumption by EC.
authorAlex Ainscow <aainscow@uk.ibm.com>
Fri, 3 Oct 2025 13:15:32 +0000 (14:15 +0100)
committerAlex Ainscow <aainscow@uk.ibm.com>
Wed, 26 Nov 2025 11:28:25 +0000 (11:28 +0000)
The can_serve_replica_read() function is called by replica to determine whether there are
any uncommitted writes.  If such writes exist, then the system will reject the IO to avoid
the risk of reading data from a write which may yet be rolled back.

The same code is going to be useful for EC direct reads.

The string_view code is not expensive.

Signed-off-by: Alex Ainscow <aainscow@uk.ibm.com>
src/crimson/osd/osd_operations/client_request.cc
src/messages/MOSDRepOp.h
src/osd/PeeringState.cc
src/osd/PeeringState.h
src/osd/PrimaryLogPG.cc

index 38c1b890123ad869f2fa42846ffe2e0b4c9312de..1073b55d7a20bebe31a0dc84280072b1c004462d 100644 (file)
@@ -209,8 +209,8 @@ ClientRequest::interruptible_future<> ClientRequest::with_pg_process_interruptib
       pg.get_perf_logger().inc(l_osd_replica_read_redirect_missing);
       co_await reply_op_error(pgref, -EAGAIN);
       co_return;
-    } else if (!pg.get_peering_state().can_serve_replica_read(m->get_hobj())) {
-      // Note: can_serve_replica_read checks for writes on the head object
+    } else if (!pg.get_peering_state().can_serve_read(m->get_hobj())) {
+      // Note: can_serve_read checks for writes on the head object
       //       as writes can only occur to head.
       DEBUGDPP("{}.{}: unstable write on replica, bouncing to primary",
               pg, *this, this_instance_id);
index fb6247bca53701a3930de4504baecf5801dd517f..5d001998b2a4896bf37215c6bbc9d5e0e9be23d5 100644 (file)
@@ -65,7 +65,7 @@ public:
    * Because updates <= pg_committed_to cannot become divergent, replicas
    * may safely serve reads on objects which do not have more recent updates.
    *
-   * See PeeringState::pg_committed_to, PeeringState::can_serve_replica_read
+   * See PeeringState::pg_committed_to, PeeringState::can_serve_read
    *
    * Historical note: Prior to early 2024, this field was named
    * min_last_complete_ondisk.  The replica, however, only actually relied on
index 7a122925da83ce49a1f840721e31833d4dfb8390..53041078adc9324a09f321d5d99928c36f6aba27 100644 (file)
@@ -1546,17 +1546,21 @@ bool PeeringState::needs_backfill() const
 }
 
 /**
-* Returns whether a particular object can be safely read on this replica
+* Returns whether a particular object can be safely read
 */
-bool PeeringState::can_serve_replica_read(const hobject_t &hoid)
+bool PeeringState::can_serve_read(const hobject_t &hoid)
 {
   ceph_assert(!is_primary());
+  std::string_view storage_object = "replica";
+  if (pool.info.is_erasure()) {
+    storage_object = "shard";
+  }
   if (!pg_log.get_log().has_write_since(
       hoid, pg_committed_to)) {
-    psdout(20) << "can be safely read on this replica" << dendl;
+    psdout(20) << "can be safely read on this " << storage_object << dendl;
     return true;
   } else {
-    psdout(20) << "can't read object on this replica" << dendl;
+    psdout(20) << "can't read object on this " << storage_object << dendl;
     return false;
   }
 }
index 9b442a58c54bdf93944d6cbf6b141a8881052580..9e386188ba46d6112799da98e64418d7610e58c8 100644 (file)
@@ -2439,7 +2439,7 @@ public:
   bool needs_recovery() const;
   bool needs_backfill() const;
 
-  bool can_serve_replica_read(const hobject_t &hoid);
+  bool can_serve_read(const hobject_t &hoid);
 
   /**
    * Returns whether the current acting set is able to go active
index d5afb804d7e97713adbaae6f943d46c65265c0c0..37c5d670cf648b6e0b66fb4ac91e7128ad6be158 100644 (file)
@@ -2352,15 +2352,20 @@ void PrimaryLogPG::do_op(OpRequestRef& op)
   }
 
   if (!is_primary()) {
-    if (!recovery_state.can_serve_replica_read(oid)) {
+    if (!recovery_state.can_serve_read(oid)) {
+      std::string_view storage_object = "replica";
+      if (pool.info.is_erasure()) {
+        storage_object = "shard";
+      }
       dout(20) << __func__
-               << ": unstable write on replica, bouncing to primary "
+               << ": unstable write on " << storage_object
+               << ", bouncing to primary "
               << *m << dendl;
       osd->logger->inc(l_osd_replica_read_redirect_conflict);
       osd->reply_op_error(op, -EAGAIN);
       return;
     }
-    dout(20) << __func__ << ": serving replica read on oid " << oid
+    dout(20) << __func__ << ": serving read on oid " << oid
              << dendl;
     osd->logger->inc(l_osd_replica_read_served);
   }