From 91f8c0228a9169a0d26d7a81fd3f7c0dd0162c29 Mon Sep 17 00:00:00 2001 From: Alex Ainscow Date: Fri, 3 Oct 2025 14:00:10 +0100 Subject: [PATCH] osd: Create EC Direct Read flag and pass through to EC. This is in preperation for supporting sparse and sync reads in EC. Such ops will only be supported for "balance reads". Signed-off-by: Alex Ainscow --- src/crimson/osd/pg.cc | 3 +-- src/include/rados.h | 4 ++++ src/osd/OpRequest.h | 2 ++ src/osd/PG.cc | 3 +-- src/osd/PrimaryLogPG.cc | 14 ++++++++++---- src/osd/osd_op_util.cc | 4 ++++ src/osd/osd_op_util.h | 2 ++ src/osd/osd_types.cc | 1 + src/osd/osd_types.h | 1 + 9 files changed, 26 insertions(+), 8 deletions(-) diff --git a/src/crimson/osd/pg.cc b/src/crimson/osd/pg.cc index 5154749bcaa..09ad5f1946d 100644 --- a/src/crimson/osd/pg.cc +++ b/src/crimson/osd/pg.cc @@ -1604,8 +1604,7 @@ bool PG::can_discard_op(const MOSDOp& m) const { return true; } - if ((m.get_flags() & (CEPH_OSD_FLAG_BALANCE_READS | - CEPH_OSD_FLAG_LOCALIZE_READS)) + if ((m.get_flags() & CEPH_OSD_FLAG_DIRECT_READ) && !is_primary() && (m.get_map_epoch() < peering_state.get_info().history.same_interval_since)) diff --git a/src/include/rados.h b/src/include/rados.h index e4c58faf59c..7680fbdcb1d 100644 --- a/src/include/rados.h +++ b/src/include/rados.h @@ -481,8 +481,12 @@ enum { CEPH_OSD_FLAG_IGNORE_REDIRECT = 0x2000000, /* ignore redirection */ CEPH_OSD_FLAG_RETURNVEC = 0x4000000, /* allow overall result >= 0, and return >= 0 and buffer for each op in opvec */ CEPH_OSD_FLAG_SUPPORTSPOOLEIO = 0x8000000, /* client understands pool EIO flag */ + CEPH_OSD_FLAG_EC_DIRECT_READ = 0x10000000, /* Erasure code doing a partial read direct to OSD. */ }; +// Indicates an IO which is direct-to-OSD and may not be on the primary. +#define CEPH_OSD_FLAG_DIRECT_READ (CEPH_OSD_FLAG_BALANCE_READS | CEPH_OSD_FLAG_LOCALIZE_READS | CEPH_OSD_FLAG_EC_DIRECT_READ) + enum { CEPH_OSD_OP_FLAG_EXCL = 0x1, /* EXCL object create */ CEPH_OSD_OP_FLAG_FAILOK = 0x2, /* continue despite failure */ diff --git a/src/osd/OpRequest.h b/src/osd/OpRequest.h index 2766a7aded5..255aa59d530 100644 --- a/src/osd/OpRequest.h +++ b/src/osd/OpRequest.h @@ -48,6 +48,8 @@ public: bool need_skip_handle_cache() const { return op_info.need_skip_handle_cache(); } bool need_skip_promote() const { return op_info.need_skip_promote(); } bool allows_returnvec() const { return op_info.allows_returnvec(); } + bool ec_direct_read() const { return op_info.ec_direct_read(); } + void set_ec_direct_read() { return op_info.set_ec_direct_read(); } std::vector classes() const { return op_info.get_classes(); diff --git a/src/osd/PG.cc b/src/osd/PG.cc index fe1c51e7595..1c57e4cddd8 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -1938,8 +1938,7 @@ bool PG::can_discard_op(OpRequestRef& op) return true; } - if ((m->get_flags() & (CEPH_OSD_FLAG_BALANCE_READS | - CEPH_OSD_FLAG_LOCALIZE_READS)) && + if ((m->get_flags() & (CEPH_OSD_FLAG_DIRECT_READ)) && !is_primary() && m->get_map_epoch() < info.history.same_interval_since) { // Note: the Objecter will resend on interval change without the primary diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index 4f28cbbc6e1..f5e7273ed3a 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -2047,16 +2047,22 @@ void PrimaryLogPG::do_op(OpRequestRef& op) } // check for op with rwordered and rebalance or localize reads - if ((m->has_flag(CEPH_OSD_FLAG_BALANCE_READS) || m->has_flag(CEPH_OSD_FLAG_LOCALIZE_READS)) && - op->rwordered()) { + if (m->has_flag(CEPH_OSD_FLAG_DIRECT_READ) && op->rwordered()) { dout(4) << __func__ << ": rebelance or localized reads with rwordered not allowed " << *m << dendl; osd->reply_op_error(op, -EINVAL); return; } - if ((m->get_flags() & (CEPH_OSD_FLAG_BALANCE_READS | - CEPH_OSD_FLAG_LOCALIZE_READS)) && + if (m->get_flags() & CEPH_OSD_FLAG_EC_DIRECT_READ) { + if (is_primary() || is_nonprimary()) { + op->set_ec_direct_read(); + } else { + osd->handle_misdirected_op(this, op); + return; + } + } else if ((m->get_flags() & (CEPH_OSD_FLAG_BALANCE_READS | + CEPH_OSD_FLAG_LOCALIZE_READS)) && op->may_read() && !(op->may_write() || op->may_cache())) { // balanced reads; any replica will do diff --git a/src/osd/osd_op_util.cc b/src/osd/osd_op_util.cc index b0fbe8f370a..2c2ad8e3ec9 100644 --- a/src/osd/osd_op_util.cc +++ b/src/osd/osd_op_util.cc @@ -52,6 +52,9 @@ bool OpInfo::need_skip_promote() const { bool OpInfo::allows_returnvec() const { return check_rmw(CEPH_OSD_RMW_FLAG_RETURNVEC); } +bool OpInfo::ec_direct_read() const { + return check_rmw(CEPH_OSD_RMW_FLAG_EC_DIRECT_READ); +} /** * may_read_data() * @@ -79,6 +82,7 @@ void OpInfo::set_skip_promote() { set_rmw_flags(CEPH_OSD_RMW_FLAG_SKIP_PROMOTE); void OpInfo::set_force_rwordered() { set_rmw_flags(CEPH_OSD_RMW_FLAG_RWORDERED); } void OpInfo::set_returnvec() { set_rmw_flags(CEPH_OSD_RMW_FLAG_RETURNVEC); } void OpInfo::set_read_data() { set_rmw_flags(CEPH_OSD_RMW_FLAG_READ_DATA); } +void OpInfo::set_ec_direct_read() { set_rmw_flags(CEPH_OSD_RMW_FLAG_EC_DIRECT_READ); } int OpInfo::set_from_op( diff --git a/src/osd/osd_op_util.h b/src/osd/osd_op_util.h index 118934206d8..ba1acae4c9e 100644 --- a/src/osd/osd_op_util.h +++ b/src/osd/osd_op_util.h @@ -59,6 +59,7 @@ public: bool need_skip_handle_cache() const; bool need_skip_promote() const; bool allows_returnvec() const; + bool ec_direct_read() const; void set_read(); void set_write(); @@ -72,6 +73,7 @@ public: void set_force_rwordered(); void set_returnvec(); void set_read_data(); + void set_ec_direct_read(); int set_from_op( const MOSDOp *m, diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index 3e51bed17be..d3b39355d87 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -103,6 +103,7 @@ const char *ceph_osd_flag_name(unsigned flag) case CEPH_OSD_FLAG_IGNORE_REDIRECT: return "ignore_redirect"; case CEPH_OSD_FLAG_RETURNVEC: return "returnvec"; case CEPH_OSD_FLAG_SUPPORTSPOOLEIO: return "supports_pool_eio"; + case CEPH_OSD_FLAG_EC_DIRECT_READ: return "ec_direct_read"; default: return "???"; } } diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index f0495c6e277..af58a9cb5e0 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -383,6 +383,7 @@ enum { CEPH_OSD_RMW_FLAG_RWORDERED = (1 << 10), CEPH_OSD_RMW_FLAG_RETURNVEC = (1 << 11), CEPH_OSD_RMW_FLAG_READ_DATA = (1 << 12), + CEPH_OSD_RMW_FLAG_EC_DIRECT_READ = (1 << 13), }; -- 2.39.5