From: Alex Ainscow Date: Fri, 3 Oct 2025 13:00:10 +0000 (+0100) Subject: osd: Create EC Direct Read flag and pass through to EC. X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=d9f4ee459667d25ccc0fdcf61eea18d862d5656b;p=ceph.git osd: Create EC Direct Read flag and pass through to EC. This is in preperation for supporting sparse and sync reads in EC. Such ops will only be supported for "balance reads". Signed-off-by: Alex Ainscow --- diff --git a/src/crimson/osd/pg.cc b/src/crimson/osd/pg.cc index 56f1120dd6d7..dd77f9601a1f 100644 --- a/src/crimson/osd/pg.cc +++ b/src/crimson/osd/pg.cc @@ -1615,8 +1615,7 @@ bool PG::can_discard_op(const MOSDOp& m) const { return true; } - if ((m.get_flags() & (CEPH_OSD_FLAG_BALANCE_READS | - CEPH_OSD_FLAG_LOCALIZE_READS)) + if ((m.get_flags() & CEPH_OSD_FLAGS_DIRECT_READ) && !is_primary() && (m.get_map_epoch() < peering_state.get_info().history.same_interval_since)) diff --git a/src/include/rados.h b/src/include/rados.h index e4c58faf59c8..65092d42328b 100644 --- a/src/include/rados.h +++ b/src/include/rados.h @@ -481,8 +481,12 @@ enum { CEPH_OSD_FLAG_IGNORE_REDIRECT = 0x2000000, /* ignore redirection */ CEPH_OSD_FLAG_RETURNVEC = 0x4000000, /* allow overall result >= 0, and return >= 0 and buffer for each op in opvec */ CEPH_OSD_FLAG_SUPPORTSPOOLEIO = 0x8000000, /* client understands pool EIO flag */ + CEPH_OSD_FLAG_EC_DIRECT_READ = 0x10000000, /* Erasure code doing a partial read direct to OSD. */ }; +// Indicates an IO which is direct-to-OSD and may not be on the primary. +#define CEPH_OSD_FLAGS_DIRECT_READ (CEPH_OSD_FLAG_BALANCE_READS | CEPH_OSD_FLAG_LOCALIZE_READS | CEPH_OSD_FLAG_EC_DIRECT_READ) + enum { CEPH_OSD_OP_FLAG_EXCL = 0x1, /* EXCL object create */ CEPH_OSD_OP_FLAG_FAILOK = 0x2, /* continue despite failure */ diff --git a/src/osd/OpRequest.h b/src/osd/OpRequest.h index 2766a7aded5e..255aa59d5307 100644 --- a/src/osd/OpRequest.h +++ b/src/osd/OpRequest.h @@ -48,6 +48,8 @@ public: bool need_skip_handle_cache() const { return op_info.need_skip_handle_cache(); } bool need_skip_promote() const { return op_info.need_skip_promote(); } bool allows_returnvec() const { return op_info.allows_returnvec(); } + bool ec_direct_read() const { return op_info.ec_direct_read(); } + void set_ec_direct_read() { return op_info.set_ec_direct_read(); } std::vector classes() const { return op_info.get_classes(); diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 0c0e95c55972..95a7c26cf8f1 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -1940,8 +1940,7 @@ bool PG::can_discard_op(OpRequestRef& op) return true; } - if ((m->get_flags() & (CEPH_OSD_FLAG_BALANCE_READS | - CEPH_OSD_FLAG_LOCALIZE_READS)) && + if ((m->get_flags() & CEPH_OSD_FLAGS_DIRECT_READ) && !is_primary() && m->get_map_epoch() < info.history.same_interval_since) { // Note: the Objecter will resend on interval change without the primary diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index 4f28cbbc6e15..d5afb804d7e9 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -2047,16 +2047,22 @@ void PrimaryLogPG::do_op(OpRequestRef& op) } // check for op with rwordered and rebalance or localize reads - if ((m->has_flag(CEPH_OSD_FLAG_BALANCE_READS) || m->has_flag(CEPH_OSD_FLAG_LOCALIZE_READS)) && - op->rwordered()) { + if (m->has_flag(CEPH_OSD_FLAGS_DIRECT_READ) && op->rwordered()) { dout(4) << __func__ << ": rebelance or localized reads with rwordered not allowed " << *m << dendl; osd->reply_op_error(op, -EINVAL); return; } - if ((m->get_flags() & (CEPH_OSD_FLAG_BALANCE_READS | - CEPH_OSD_FLAG_LOCALIZE_READS)) && + if (m->get_flags() & CEPH_OSD_FLAG_EC_DIRECT_READ) { + if (is_primary() || is_nonprimary()) { + op->set_ec_direct_read(); + } else { + osd->handle_misdirected_op(this, op); + return; + } + } else if ((m->get_flags() & (CEPH_OSD_FLAG_BALANCE_READS | + CEPH_OSD_FLAG_LOCALIZE_READS)) && op->may_read() && !(op->may_write() || op->may_cache())) { // balanced reads; any replica will do diff --git a/src/osd/osd_op_util.cc b/src/osd/osd_op_util.cc index b0fbe8f370a5..2c2ad8e3ec90 100644 --- a/src/osd/osd_op_util.cc +++ b/src/osd/osd_op_util.cc @@ -52,6 +52,9 @@ bool OpInfo::need_skip_promote() const { bool OpInfo::allows_returnvec() const { return check_rmw(CEPH_OSD_RMW_FLAG_RETURNVEC); } +bool OpInfo::ec_direct_read() const { + return check_rmw(CEPH_OSD_RMW_FLAG_EC_DIRECT_READ); +} /** * may_read_data() * @@ -79,6 +82,7 @@ void OpInfo::set_skip_promote() { set_rmw_flags(CEPH_OSD_RMW_FLAG_SKIP_PROMOTE); void OpInfo::set_force_rwordered() { set_rmw_flags(CEPH_OSD_RMW_FLAG_RWORDERED); } void OpInfo::set_returnvec() { set_rmw_flags(CEPH_OSD_RMW_FLAG_RETURNVEC); } void OpInfo::set_read_data() { set_rmw_flags(CEPH_OSD_RMW_FLAG_READ_DATA); } +void OpInfo::set_ec_direct_read() { set_rmw_flags(CEPH_OSD_RMW_FLAG_EC_DIRECT_READ); } int OpInfo::set_from_op( diff --git a/src/osd/osd_op_util.h b/src/osd/osd_op_util.h index 118934206d8c..ba1acae4c9e4 100644 --- a/src/osd/osd_op_util.h +++ b/src/osd/osd_op_util.h @@ -59,6 +59,7 @@ public: bool need_skip_handle_cache() const; bool need_skip_promote() const; bool allows_returnvec() const; + bool ec_direct_read() const; void set_read(); void set_write(); @@ -72,6 +73,7 @@ public: void set_force_rwordered(); void set_returnvec(); void set_read_data(); + void set_ec_direct_read(); int set_from_op( const MOSDOp *m, diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index 3e51bed17be5..d3b39355d87f 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -103,6 +103,7 @@ const char *ceph_osd_flag_name(unsigned flag) case CEPH_OSD_FLAG_IGNORE_REDIRECT: return "ignore_redirect"; case CEPH_OSD_FLAG_RETURNVEC: return "returnvec"; case CEPH_OSD_FLAG_SUPPORTSPOOLEIO: return "supports_pool_eio"; + case CEPH_OSD_FLAG_EC_DIRECT_READ: return "ec_direct_read"; default: return "???"; } } diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index dc651e8ad15f..679aaa7042e1 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -383,6 +383,7 @@ enum { CEPH_OSD_RMW_FLAG_RWORDERED = (1 << 10), CEPH_OSD_RMW_FLAG_RETURNVEC = (1 << 11), CEPH_OSD_RMW_FLAG_READ_DATA = (1 << 12), + CEPH_OSD_RMW_FLAG_EC_DIRECT_READ = (1 << 13), };