From df6054088978c8934fe61382775329e07f61a079 Mon Sep 17 00:00:00 2001 From: Alex Ainscow Date: Mon, 7 Apr 2025 09:20:44 +0100 Subject: [PATCH] osd: Install stub extent cache in OSD. The extent cache in new EC is a per OSD-shard cache will caches reads used by read-modify-write to improve performance of sequential IO. We want to provide a single PR with all of EC in it, so this PR provides a non-functional stub to allow all the non-EC code to be installed. Signed-off-by: Alex Ainscow --- src/osd/ECBackend.cc | 3 ++- src/osd/ECBackend.h | 3 ++- src/osd/ECExtentCache.h | 10 ++++++++++ src/osd/ECSwitch.h | 5 +++-- src/osd/OSD.cc | 13 +++++++++++-- src/osd/OSD.h | 11 +++++++++++ src/osd/PGBackend.cc | 6 ++++-- src/osd/PGBackend.h | 4 +++- src/osd/PrimaryLogPG.cc | 5 +++-- src/osd/PrimaryLogPG.h | 3 ++- 10 files changed, 51 insertions(+), 12 deletions(-) create mode 100644 src/osd/ECExtentCache.h diff --git a/src/osd/ECBackend.cc b/src/osd/ECBackend.cc index 47cb7c52657..63dfc99015d 100644 --- a/src/osd/ECBackend.cc +++ b/src/osd/ECBackend.cc @@ -128,7 +128,8 @@ ECBackend::ECBackend( CephContext *cct, ErasureCodeInterfaceRef ec_impl, uint64_t stripe_width, - ECSwitch *s) + ECSwitch *s, + ECExtentCache::LRU &ignored) : parent(pg), cct(cct), switcher(s), read_pipeline(cct, ec_impl, this->sinfo, get_parent()->get_eclistener()), rmw_pipeline(cct, ec_impl, this->sinfo, get_parent()->get_eclistener(), *this), diff --git a/src/osd/ECBackend.h b/src/osd/ECBackend.h index 715eb97c743..b11b946183f 100644 --- a/src/osd/ECBackend.h +++ b/src/osd/ECBackend.h @@ -433,7 +433,8 @@ public: CephContext *cct, ceph::ErasureCodeInterfaceRef ec_impl, uint64_t stripe_width, - ECSwitch *s); + ECSwitch *s, + ECExtentCache::LRU &ignored); int objects_get_attrs( const hobject_t &hoid, diff --git a/src/osd/ECExtentCache.h b/src/osd/ECExtentCache.h new file mode 100644 index 00000000000..b02afec4a11 --- /dev/null +++ b/src/osd/ECExtentCache.h @@ -0,0 +1,10 @@ +#pragma once + +// Temporary stubs +class ECExtentCache { + public: + class LRU { + public: + LRU(uint64_t) {} + }; +}; diff --git a/src/osd/ECSwitch.h b/src/osd/ECSwitch.h index 4069662ade0..c643b7e5cd4 100644 --- a/src/osd/ECSwitch.h +++ b/src/osd/ECSwitch.h @@ -40,10 +40,11 @@ public: ObjectStore *store, CephContext *cct, ceph::ErasureCodeInterfaceRef ec_impl, - uint64_t stripe_width) : + uint64_t stripe_width, + ECExtentCache::LRU &lru) : PGBackend(cct, pg, store, coll, ch), legacy(pg, cct, ec_impl, stripe_width, this), - optimized(pg, cct, ec_impl, stripe_width, this), + optimized(pg, cct, ec_impl, stripe_width, this, lru), is_optimized_actual(get_parent()->get_pool().allows_ecoptimizations()) {} bool is_optimized() const diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 2ea6d25f442..dc68548c159 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -5220,7 +5220,7 @@ PG* OSD::_make_pg( PG *pg; if (pi.type == pg_pool_t::TYPE_REPLICATED || pi.type == pg_pool_t::TYPE_ERASURE) - pg = new PrimaryLogPG(&service, createmap, pool, ec_profile, pgid); + pg = new PrimaryLogPG(&service, createmap, pool, ec_profile, pgid, lookup_ec_extent_cache_lru(pgid)); else ceph_abort(); return pg; @@ -5307,6 +5307,13 @@ bool OSD::try_finish_pg_delete(PG *pg, unsigned old_pg_num) return true; } +ECExtentCache::LRU &OSD::lookup_ec_extent_cache_lru(spg_t pgid) const +{ + uint32_t shard_index = pgid.hash_to_shard(num_shards); + auto sdata = shards[shard_index]; + return sdata->ec_extent_cache_lru; +} + PGRef OSD::_lookup_pg(spg_t pgid) { uint32_t shard_index = pgid.hash_to_shard(num_shards); @@ -11066,7 +11073,9 @@ OSDShard::OSDShard( scheduler(ceph::osd::scheduler::make_scheduler( cct, osd->whoami, osd->num_shards, id, osd->store->is_rotational(), osd->store->get_type(), osd_op_queue, osd_op_queue_cut_off, osd->monc)), - context_queue(sdata_wait_lock, sdata_cond) + context_queue(sdata_wait_lock, sdata_cond), + ec_extent_cache_lru(cct->_conf.get_val( + "ec_extent_cache_size")) { dout(0) << "using op scheduler " << *scheduler << dendl; } diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 1d41a44f917..84f70c86a40 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -1021,6 +1021,11 @@ struct OSDShard { ContextQueue context_queue; + //This is an extent cache for the erasure coding. Specifically, this acts as + //a least-recently-used cache invalidator, allowing for cache shards to last + //longer than the most recent IO in each object. + ECExtentCache::LRU ec_extent_cache_lru; + void _attach_pg(OSDShardPGSlot *slot, PG *pg); void _detach_pg(OSDShardPGSlot *slot); @@ -1206,6 +1211,12 @@ public: */ static CompatSet get_osd_compat_set(); + /** + * lookup_ec_extent_cache_lru() + * @param pgid - + * @return extent cache for LRU + */ + ECExtentCache::LRU &lookup_ec_extent_cache_lru(spg_t pgid) const; private: class C_Tick; diff --git a/src/osd/PGBackend.cc b/src/osd/PGBackend.cc index 19ac1ab70ea..f7eeb225e61 100644 --- a/src/osd/PGBackend.cc +++ b/src/osd/PGBackend.cc @@ -739,7 +739,8 @@ PGBackend *PGBackend::build_pg_backend( coll_t coll, ObjectStore::CollectionHandle &ch, ObjectStore *store, - CephContext *cct) + CephContext *cct, + ECExtentCache::LRU &ec_extent_cache_lru) { ErasureCodeProfile ec_profile = profile; switch (pool.type) { @@ -763,7 +764,8 @@ PGBackend *PGBackend::build_pg_backend( store, cct, ec_impl, - pool.stripe_width); + pool.stripe_width, + ec_extent_cache_lru); } default: ceph_abort(); diff --git a/src/osd/PGBackend.h b/src/osd/PGBackend.h index c809469ae50..a69a7df0c22 100644 --- a/src/osd/PGBackend.h +++ b/src/osd/PGBackend.h @@ -20,6 +20,7 @@ #include "ECListener.h" #include "ECTypes.h" +#include "ECExtentCache.h" #include "osd_types.h" #include "pg_features.h" #include "common/intrusive_timer.h" @@ -618,7 +619,8 @@ typedef std::shared_ptr OSDMapRef; coll_t coll, ObjectStore::CollectionHandle &ch, ObjectStore *store, - CephContext *cct); + CephContext *cct, + ECExtentCache::LRU &ec_extent_cache_lru); }; #endif diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index 1340ec88679..3dfc0302994 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -1772,11 +1772,12 @@ void PrimaryLogPG::release_object_locks( PrimaryLogPG::PrimaryLogPG(OSDService *o, OSDMapRef curmap, const PGPool &_pool, - const map& ec_profile, spg_t p) : + const map& ec_profile, spg_t p, + ECExtentCache::LRU &ec_extent_cache_lru) : PG(o, curmap, _pool, p), pgbackend( PGBackend::build_pg_backend( - _pool.info, ec_profile, this, coll_t(p), ch, o->store, cct)), + _pool.info, ec_profile, this, coll_t(p), ch, o->store, cct, ec_extent_cache_lru)), object_contexts(o->cct, o->cct->_conf->osd_pg_object_context_cache_count), new_backfill(false), temp_seq(0), diff --git a/src/osd/PrimaryLogPG.h b/src/osd/PrimaryLogPG.h index b365cf29457..c1c92e0353a 100644 --- a/src/osd/PrimaryLogPG.h +++ b/src/osd/PrimaryLogPG.h @@ -1528,7 +1528,8 @@ public: PrimaryLogPG(OSDService *o, OSDMapRef curmap, const PGPool &_pool, const std::map& ec_profile, - spg_t p); + spg_t p, + ECExtentCache::LRU &ec_extent_cache_lru); ~PrimaryLogPG() override; void do_command( -- 2.39.5