From: Radoslaw Zarzynski Date: Thu, 27 Nov 2025 15:28:58 +0000 (+0000) Subject: crimson/osd: implement handling of RADOS' extended attirbutes in FastEC X-Git-Tag: v21.0.0~3^2~9 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=c63c64c6afe9d1bf5fd15b5ac51918fb9fa66f55;p=ceph.git crimson/osd: implement handling of RADOS' extended attirbutes in FastEC Support for `attr_cache` in replicated backend is planned for later. Signed-off-by: Radoslaw Zarzynski --- diff --git a/src/crimson/osd/ec_backend.cc b/src/crimson/osd/ec_backend.cc index 7d7dadcbc613..da2082db3ce0 100644 --- a/src/crimson/osd/ec_backend.cc +++ b/src/crimson/osd/ec_backend.cc @@ -790,4 +790,13 @@ ECBackend::handle_rep_read_reply(ECSubReadReply& mop) return ll_read_ierrorator::now(); } +PGBackend::get_attr_ierrorator::future +ECBackend::getxattr( + const hobject_t& soid, + std::string&& key) const +{ + // ENOSUPP! ECBackend reads xattr solely from the ObjectContext::attr_cache + return crimson::ct_error::enodata::make(); +} + } // namespace crimson::osd diff --git a/src/crimson/osd/ec_backend.h b/src/crimson/osd/ec_backend.h index 1885ad92d91e..8a13cff2ae5f 100644 --- a/src/crimson/osd/ec_backend.h +++ b/src/crimson/osd/ec_backend.h @@ -52,6 +52,10 @@ public: ll_read_ierrorator::future<> handle_rep_read_reply(ECSubReadReply& mop); ll_read_ierrorator::future<> handle_rep_read_reply(Ref); + PGBackend::get_attr_ierrorator::future getxattr( + const hobject_t& soid, + std::string&& key) const final; + private: friend class ECRecoveryBackend; @@ -118,6 +122,8 @@ private: ECCommon::ReadPipeline read_pipeline; ECCommon::RMWPipeline rmw_pipeline; + + bool is_erasure() const override { return true; } }; } diff --git a/src/crimson/osd/object_context.h b/src/crimson/osd/object_context.h index 3714913a7253..1845cb179b3d 100644 --- a/src/crimson/osd/object_context.h +++ b/src/crimson/osd/object_context.h @@ -76,7 +76,8 @@ public: std::map> watchers; // attr cache. ECTransaction is the initial user - std::map> attr_cache; + using attr_cache_t = std::map>; + attr_cache_t attr_cache; CommonOBCPipeline obc_pipeline; diff --git a/src/crimson/osd/ops_executer.cc b/src/crimson/osd/ops_executer.cc index 3b6819278aff..082c12ca92b7 100644 --- a/src/crimson/osd/ops_executer.cc +++ b/src/crimson/osd/ops_executer.cc @@ -472,6 +472,17 @@ auto OpsExecuter::do_const_op(Func&& f) { return std::forward(f)(pg->get_backend(), std::as_const(obc->obs)); } +template +auto OpsExecuter::do_read_attr_cache(Func&& f) { + ++num_read; + // TODO: pass backend as read-only + return std::invoke( + std::forward(f), + pg->get_backend(), + std::as_const(obc->attr_cache), + std::as_const(obc->obs)); +} + // Defined here because there is a circular dependency between OpsExecuter and PG template auto OpsExecuter::do_write_op(Func&& f, OpsExecuter::modified_by m) { @@ -479,6 +490,12 @@ auto OpsExecuter::do_write_op(Func&& f, OpsExecuter::modified_by m) { check_init_op_params(m); return std::forward(f)(pg->get_backend(), obc->obs, txn); } +template +auto OpsExecuter::do_write_op_attr_cache(Func&& f, OpsExecuter::modified_by m) { + ++num_write; + check_init_op_params(m); + return std::forward(f)(pg->get_backend(), obc->obs, txn, obc->attr_cache); +} OpsExecuter::call_errorator::future<> OpsExecuter::do_assert_ver( OSDOp& osd_op, const ObjectState& os) @@ -617,20 +634,24 @@ OpsExecuter::do_execute_op(OSDOp& osd_op) return backend.cmp_ext(os, osd_op); }); case CEPH_OSD_OP_GETXATTR: - return do_read_op([this, &osd_op](auto& backend, const auto& os) { - return backend.getxattr(os, osd_op, delta_stats); + return do_read_attr_cache([this, &osd_op](auto& backend, + const auto& attr_cache, + const auto& os) { + return backend.getxattr(os, attr_cache, osd_op, delta_stats); }); case CEPH_OSD_OP_GETXATTRS: - return do_read_op([this, &osd_op](auto& backend, const auto& os) { - return backend.get_xattrs(os, osd_op, delta_stats); + return do_read_attr_cache([this, &osd_op](auto& backend, + const auto& attr_cache, + const auto& os) { + return backend.get_xattrs(os, attr_cache, osd_op, delta_stats); }); case CEPH_OSD_OP_CMPXATTR: return do_read_op([this, &osd_op](auto& backend, const auto& os) { return backend.cmp_xattr(os, osd_op, delta_stats); }); case CEPH_OSD_OP_RMXATTR: - return do_write_op([&osd_op](auto& backend, auto& os, auto& txn) { - return backend.rm_xattr(os, osd_op, txn); + return do_write_op_attr_cache([&osd_op](auto& backend, auto& os, auto& txn, auto& attr_cache) { + return backend.rm_xattr(os, osd_op, txn, attr_cache); }); case CEPH_OSD_OP_CREATE: return do_write_op([this, &osd_op](auto& backend, auto& os, auto& txn) { @@ -675,8 +696,8 @@ OpsExecuter::do_execute_op(OSDOp& osd_op) return backend.set_allochint(os, osd_op, txn, delta_stats); }); case CEPH_OSD_OP_SETXATTR: - return do_write_op([this, &osd_op](auto& backend, auto& os, auto& txn) { - return backend.setxattr(os, osd_op, txn, delta_stats); + return do_write_op_attr_cache([this, &osd_op](auto& backend, auto& os, auto& txn, auto& attr_cache) { + return backend.setxattr(os, osd_op, txn, delta_stats, attr_cache); }); case CEPH_OSD_OP_DELETE: { diff --git a/src/crimson/osd/ops_executer.h b/src/crimson/osd/ops_executer.h index 00d9b1883da6..3cb7f93a4d4d 100644 --- a/src/crimson/osd/ops_executer.h +++ b/src/crimson/osd/ops_executer.h @@ -336,6 +336,9 @@ private: return do_const_op(std::forward(f)); } + template + auto do_read_attr_cache(Func&& f); + template auto do_snapset_op(Func&& f) { ++num_read; @@ -352,6 +355,8 @@ private: template auto do_write_op(Func&& f, modified_by m = modified_by::user); + template + auto do_write_op_attr_cache(Func&& f, modified_by m = modified_by::user); decltype(auto) dont_do_legacy_op() { return crimson::ct_error::operation_not_supported::make(); diff --git a/src/crimson/osd/pg_backend.cc b/src/crimson/osd/pg_backend.cc index 7f6137a55f01..5bf96d612046 100644 --- a/src/crimson/osd/pg_backend.cc +++ b/src/crimson/osd/pg_backend.cc @@ -1111,7 +1111,8 @@ PGBackend::setxattr_ierrorator::future<> PGBackend::setxattr( ObjectState& os, const OSDOp& osd_op, ceph::os::Transaction& txn, - object_stat_sum_t& delta_stats) + object_stat_sum_t& delta_stats, + ObjectContext::attr_cache_t& attr_cache) { if (local_conf()->osd_max_attr_size > 0 && osd_op.op.xattr.value_len > local_conf()->osd_max_attr_size) { @@ -1137,6 +1138,7 @@ PGBackend::setxattr_ierrorator::future<> PGBackend::setxattr( } logger().debug("setxattr on obj={} for attr={}", os.oi.soid, name); txn.setattr(coll->get_cid(), ghobject_t{os.oi.soid}, name, val); + attr_cache[name] = val; delta_stats.num_wr++; return setxattr_ierrorator::future<>(seastar::now()); }); @@ -1155,8 +1157,20 @@ PGBackend::get_attr_ierrorator::future<> PGBackend::getxattr( bp.copy(osd_op.op.xattr.name_len, aname); name = "_" + aname; } - logger().debug("getxattr on obj={} for attr={}", os.oi.soid, name); - return getxattr(os.oi.soid, std::move(name)).safe_then_interruptible( + auto get_attr_maybe_from_cache = + [&] () mutable -> get_attr_ierrorator::future { + if (!is_erasure()) { + logger().debug("getxattr on obj={} for attr={}", os.oi.soid, name); + return getxattr(os.oi.soid, std::move(name)); + } + if (auto cache_it = attr_cache.find(name); cache_it != std::end(attr_cache)) { + return get_attr_ierrorator::make_ready_future( + cache_it->second); + } + logger().debug("getxattr on obj={} for attr={}", os.oi.soid, name); + return crimson::ct_error::enodata::make(); + }; + return get_attr_maybe_from_cache().safe_then_interruptible( [&delta_stats, &osd_op] (ceph::bufferlist&& val) { osd_op.outdata = std::move(val); osd_op.op.xattr.value_len = osd_op.outdata.length(); @@ -1166,24 +1180,22 @@ PGBackend::get_attr_ierrorator::future<> PGBackend::getxattr( }); } -PGBackend::get_attr_ierrorator::future -PGBackend::getxattr( - const hobject_t& soid, - std::string&& key) const -{ - return seastar::do_with(key, [this, &soid](auto &key) { - return crimson::os::with_store<&crimson::os::FuturizedStore::Shard::get_attr>( - store, coll, ghobject_t{soid}, key, 0); - }); -} - PGBackend::get_attr_ierrorator::future<> PGBackend::get_xattrs( const ObjectState& os, + const ObjectContext::attr_cache_t& attr_cache, OSDOp& osd_op, object_stat_sum_t& delta_stats) const { - return crimson::os::with_store<&crimson::os::FuturizedStore::Shard::get_attrs>( - store, coll, ghobject_t{os.oi.soid}, 0).safe_then( + auto get_attrs_maybe_from_cache = + [&] () { + if (!is_erasure()) { + logger().debug("getxattrx on obj={} goes into objstore", os.oi.soid); + return store->get_attrs(coll, ghobject_t{os.oi.soid}); + } + return crimson::os::FuturizedStore::Shard::get_attrs_ertr::make_ready_future< + crimson::os::FuturizedStore::Shard::attrs_t>(attr_cache); + }; + return get_attrs_maybe_from_cache().safe_then( [&delta_stats, &osd_op](auto&& attrs) { std::vector> user_xattrs; ceph::bufferlist bl; @@ -1312,7 +1324,8 @@ PGBackend::rm_xattr_iertr::future<> PGBackend::rm_xattr( ObjectState& os, const OSDOp& osd_op, - ceph::os::Transaction& txn) + ceph::os::Transaction& txn, + ObjectContext::attr_cache_t& attr_cache) { if (!os.exists || os.oi.is_whiteout()) { logger().debug("{}: {} DNE", __func__, os.oi.soid); @@ -1322,6 +1335,7 @@ PGBackend::rm_xattr( string attr_name{"_"}; bp.copy(osd_op.op.xattr.name_len, attr_name); txn.rmattr(coll->get_cid(), ghobject_t{os.oi.soid}, attr_name); + attr_cache.erase(attr_name); return rm_xattr_iertr::now(); } diff --git a/src/crimson/osd/pg_backend.h b/src/crimson/osd/pg_backend.h index 3f4f7c9de854..bda7d73ac1b9 100644 --- a/src/crimson/osd/pg_backend.h +++ b/src/crimson/osd/pg_backend.h @@ -272,7 +272,8 @@ public: ObjectState& os, const OSDOp& osd_op, ceph::os::Transaction& trans, - object_stat_sum_t& delta_stats); + object_stat_sum_t& delta_stats, + ObjectContext::attr_cache_t& attr_cache); using get_attr_errorator = crimson::os::FuturizedStore::Shard::get_attr_errorator; using get_attr_ierrorator = ::crimson::interruptible::interruptible_errorator< @@ -280,13 +281,15 @@ public: get_attr_errorator>; get_attr_ierrorator::future<> getxattr( const ObjectState& os, + const ObjectContext::attr_cache_t& attr_cache, OSDOp& osd_op, object_stat_sum_t& delta_stats) const; - get_attr_ierrorator::future getxattr( + virtual get_attr_ierrorator::future getxattr( const hobject_t& soid, - std::string&& key) const; + std::string&& key) const = 0; get_attr_ierrorator::future<> get_xattrs( const ObjectState& os, + const ObjectContext::attr_cache_t& attr_cache, OSDOp& osd_op, object_stat_sum_t& delta_stats) const; using cmp_xattr_errorator = get_attr_errorator::extend< @@ -308,7 +311,8 @@ public: rm_xattr_iertr::future<> rm_xattr( ObjectState& os, const OSDOp& osd_op, - ceph::os::Transaction& trans); + ceph::os::Transaction& trans, + ObjectContext::attr_cache_t& attr_cache); interruptible_future stat( CollectionRef c, const ghobject_t& oid) const; @@ -528,6 +532,8 @@ private: boost::container::flat_set temp_contents; friend class RecoveryBackend; + + virtual bool is_erasure() const { return false; } }; } diff --git a/src/crimson/osd/replicated_backend.cc b/src/crimson/osd/replicated_backend.cc index 1ef711c1f583..a3dbfd52b522 100644 --- a/src/crimson/osd/replicated_backend.cc +++ b/src/crimson/osd/replicated_backend.cc @@ -364,4 +364,14 @@ void ReplicatedBackend::do_pct(const MOSDPGPCT &m) pg.peering_state.update_pct(m.pg_committed_to); } +PGBackend::get_attr_ierrorator::future +ReplicatedBackend::getxattr( + const hobject_t& soid, + std::string&& key) const +{ + return seastar::do_with(key, [this, &soid](auto &key) { + return store->get_attr(coll, ghobject_t{soid}, key); + }); +} + } diff --git a/src/crimson/osd/replicated_backend.h b/src/crimson/osd/replicated_backend.h index 7bf067ffc71c..a726699ac004 100644 --- a/src/crimson/osd/replicated_backend.h +++ b/src/crimson/osd/replicated_backend.h @@ -34,6 +34,11 @@ public: void got_rep_op_reply(const MOSDRepOpReply& reply) final; seastar::future<> stop() final; void on_actingset_changed(bool same_primary) final; + + PGBackend::get_attr_ierrorator::future getxattr( + const hobject_t& soid, + std::string&& key) const final; + private: ll_read_ierrorator::future _read(const hobject_t& hoid,