From: Zhang Song Date: Thu, 3 Jul 2025 10:09:17 +0000 (+0800) Subject: crimson/os/seastore: introduce logical bucket and demote process X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=5383e127727fd20ea4562d533f90a0ec726a0336;p=ceph-ci.git crimson/os/seastore: introduce logical bucket and demote process Signed-off-by: Zhang Song Signed-off-by: Xuehan Xu --- diff --git a/src/crimson/os/seastore/CMakeLists.txt b/src/crimson/os/seastore/CMakeLists.txt index 89a2ae0b5a6..dbac365a3b2 100644 --- a/src/crimson/os/seastore/CMakeLists.txt +++ b/src/crimson/os/seastore/CMakeLists.txt @@ -9,6 +9,7 @@ set(crimson_seastore_srcs transaction_manager.cc cache.cc extent_pinboard.cc + logical_bucket.cc root_block.cc lba_manager.cc async_cleaner.cc diff --git a/src/crimson/os/seastore/async_cleaner.h b/src/crimson/os/seastore/async_cleaner.h index 623a1ccbfa0..a55bdf3574b 100644 --- a/src/crimson/os/seastore/async_cleaner.h +++ b/src/crimson/os/seastore/async_cleaner.h @@ -367,6 +367,25 @@ public: Transaction &t, CachedExtentRef extent) = 0; + /** + * demote_region + * + * Demote the logical extents promoted from the slower device and evict + * the extents to the cold tier under the given laddr prefix. + */ + struct demote_region_res_t { + std::size_t demoted_size = 0; + std::size_t evicted_size = 0; + bool complete = false; + }; + using demote_region_iertr = base_iertr; + using demote_region_ret = demote_region_iertr::future< + demote_region_res_t>; + virtual demote_region_ret demote_region( + Transaction &t, + laddr_t prefix, + std::size_t max_proceed_size) = 0; + /** * get_extents_if_live * diff --git a/src/crimson/os/seastore/logical_bucket.cc b/src/crimson/os/seastore/logical_bucket.cc new file mode 100644 index 00000000000..7f45ac870d8 --- /dev/null +++ b/src/crimson/os/seastore/logical_bucket.cc @@ -0,0 +1,213 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "seastar/core/metrics.hh" + +#include "crimson/common/coroutine.h" +#include "crimson/os/seastore/logging.h" +#include "crimson/os/seastore/logical_bucket.h" +#include "crimson/os/seastore/transaction_manager.h" + +#include + +namespace crimson::os::seastore { + +SET_SUBSYS(seastore_cache); + +class LogicalBucketCache : public LogicalBucket { +public: + LogicalBucketCache(std::size_t memory_capacity, + std::size_t proceed_size_per_cycle) + : memory_capacity(memory_capacity), + proceed_size_per_cycle(proceed_size_per_cycle) { + LOG_PREFIX(LogicalBucketCache); + INFO("init memory_capacity={}, proceed_size_per_cycle={}", + memory_capacity, proceed_size_per_cycle); + register_metrics(); + } + + ~LogicalBucketCache() { + clear(); + } + + void move_to_top(laddr_t laddr) final { + LOG_PREFIX(LogicalBucketCache::move_to_top); + assert(laddr != L_ADDR_NULL); + assert(laddr == laddr.get_object_prefix()); + auto iter = index.find(laddr); + if (iter != index.end()) { + TRACE("find bucket: {}", iter->first); + lru.splice(lru.end(), lru, iter->second); + } else { + TRACE("create bucket: {}", laddr); + index[laddr] = lru.emplace(lru.end(), laddr); + } + } + + void remove(laddr_t laddr) final { + LOG_PREFIX(LogicalBucketCache::remove); + TRACE("laddr: {}", laddr); + assert(laddr != L_ADDR_NULL); + assert(laddr == laddr.get_object_prefix()); + auto iter = index.find(laddr); + if (iter != index.end()) { + TRACE("remove bucket: {}", laddr); + lru.erase(iter->second); + index.erase(iter); + } + } + + bool is_cached(laddr_t laddr) final { + assert(laddr != L_ADDR_NULL); + assert(laddr == laddr.get_object_prefix()); + return index.contains(laddr); + } + + void clear() final { + index.clear(); + lru.clear(); + } + + void set_background_callback(BackgroundListener *l) final { + listener = l; + } + + void set_extent_callback(ExtentCallbackInterface *cb) final { + ecb = cb; + } + + bool could_demote() const final { + return !lru.empty(); + } + + bool should_demote() const { + // lru element: laddr_t + pointer * 2 + // index element: laddr_t + lru iterator(void*) + auto element_size = (sizeof(laddr_t) * 2 + sizeof(void*) * 3); + return element_size * lru.size() > memory_capacity; + } + + using run_demote_iertr = base_iertr; + using run_demote_ret = run_demote_iertr::future<>; + run_demote_ret run_demote(Transaction &t) { + LOG_PREFIX(LogicalBucketCache::demote); + std::vector pending_buckets; + std::vector completed_buckets; + ceph_assert(pending_buckets_target > 0); + ceph_assert(!lru.empty()); + for (auto &b : lru) { + if (pending_buckets.size() == (uint32_t)pending_buckets_target) { + break; + } + pending_buckets.push_back(b); + } + + DEBUGT("start demote {} buckets", t, pending_buckets.size()); + std::size_t demoted_size = 0; + std::size_t evicted_size = 0; + for (auto &bucket : pending_buckets) { + TRACET("start demote {}", t, bucket); + auto res = co_await ecb->demote_region( + t, + bucket, + proceed_size_per_cycle - demoted_size - evicted_size); + + TRACET("demote_size: {}, evicted_size: {}, complete: {}", + t, res.demoted_size, res.evicted_size, res.complete); + demoted_size += res.demoted_size; + evicted_size += res.evicted_size; + if (res.complete) { + completed_buckets.push_back(bucket); + } + if (demoted_size + evicted_size >= proceed_size_per_cycle) { + break; + } + } + + co_await ecb->submit_transaction_direct(t); + + DEBUGT("finish demoting {} buckets with {} bytes evicted and {} bytes demoted", + t, completed_buckets.size(), evicted_size, demoted_size); + stat.demoted_bucket_count += completed_buckets.size(); + stat.demoted_size += demoted_size; + stat.evicted_size += evicted_size; + for (auto &p : completed_buckets) { + remove(p); + } + auto old_count = pending_buckets_target; + if (demoted_size != 0 && !completed_buckets.empty()) { + auto demote_ratio = (double)demoted_size / + (double)completed_buckets.size(); + assert(!std::isnan(demote_ratio)); + pending_buckets_target = (proceed_size_per_cycle / demote_ratio) + 1; + } + DEBUGT("update init buckets count {} -> {}", + t, old_count, pending_buckets_target); + co_return; + } + + seastar::future<> demote() final { + return repeat_eagain([this] { + return ecb->with_transaction_intr( + Transaction::src_t::DEMOTE, + "demote", cache_hint_t::get_nocache(), + [this](auto &t) { + return run_demote(t); + }); + }).handle_error(crimson::ct_error::assert_all{ "impossible" }); + } + +private: + using laddr_lru_t = std::list; + laddr_lru_t lru; + boost::unordered_flat_map index; + int32_t pending_buckets_target = 20; + + void register_metrics() { + namespace sm = seastar::metrics; + metrics.add_group( + "cache", + { + sm::make_gauge( + "non_volatile_cache_buckets_count", + [this] { return lru.size(); }, + sm::description("the count of laddr bucket used by non volatile cache")), + sm::make_counter( + "non_volatile_cache_evicted_size", + [this] { return stat.evicted_size; }, + sm::description("total bytes of extents evicted by non volatile cache")), + sm::make_counter( + "non_volatile_cache_demoted_size", + [this] { return stat.demoted_size; }, + sm::description("total bytes of extents demoted by non volatile cache")), + sm::make_counter( + "non_volatile_cache_demoted_bucket_count", + [this] { return stat.demoted_bucket_count; }, + sm::description("the count of laddr bucket demoted by non volatile cache")), + }); + } + + struct { + uint64_t evicted_size = 0; + uint64_t demoted_size = 0; + uint64_t demoted_bucket_count = 0; + } stat; + + seastar::metrics::metric_group metrics; + + const std::size_t memory_capacity; + const std::size_t proceed_size_per_cycle; + + ExtentCallbackInterface *ecb; + BackgroundListener *listener; +}; + +LogicalBucketRef create_logical_bucket( + std::size_t memory_capacity, + std::size_t proceed_size_per_cycle) +{ + return std::make_unique( + memory_capacity, proceed_size_per_cycle); +} + +} diff --git a/src/crimson/os/seastore/logical_bucket.h b/src/crimson/os/seastore/logical_bucket.h new file mode 100644 index 00000000000..efa2f21a4fc --- /dev/null +++ b/src/crimson/os/seastore/logical_bucket.h @@ -0,0 +1,28 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab expandtab + +#pragma once + +#include "crimson/os/seastore/seastore_types.h" + +namespace crimson::os::seastore { +class BackgroundListener; +class ExtentCallbackInterface; + +struct LogicalBucket { + virtual ~LogicalBucket() = default; + virtual void move_to_top(laddr_t laddr) = 0; + virtual void remove(laddr_t laddr) = 0; + virtual bool is_cached(laddr_t laddr) = 0; + virtual void clear() = 0; + virtual void set_background_callback(BackgroundListener *listener) = 0; + virtual void set_extent_callback(ExtentCallbackInterface *cb) = 0; + virtual bool could_demote() const = 0; + virtual bool should_demote() const = 0; + virtual seastar::future<> demote() = 0; +}; +using LogicalBucketRef = std::unique_ptr; +LogicalBucketRef create_logical_bucket( + std::size_t memory_capacity, + std::size_t demote_size_per_cycle); +} diff --git a/src/crimson/os/seastore/transaction_manager.cc b/src/crimson/os/seastore/transaction_manager.cc index 2af3d2d2a1a..5479549190e 100644 --- a/src/crimson/os/seastore/transaction_manager.cc +++ b/src/crimson/os/seastore/transaction_manager.cc @@ -1001,6 +1001,17 @@ TransactionManager::promote_extent( return rewrite_extent_iertr::make_ready_future(); } +TransactionManager::demote_region_ret +TransactionManager::demote_region( + Transaction &t, + laddr_t start, + loffset_t max_proceed_size) +{ + // TODO + return demote_region_iertr::make_ready_future( + demote_region_res_t{0, false}); +} + TransactionManager::get_extents_if_live_ret TransactionManager::get_extents_if_live( Transaction &t, diff --git a/src/crimson/os/seastore/transaction_manager.h b/src/crimson/os/seastore/transaction_manager.h index 1c94df8e927..5e5b1b64f21 100644 --- a/src/crimson/os/seastore/transaction_manager.h +++ b/src/crimson/os/seastore/transaction_manager.h @@ -905,6 +905,13 @@ public: Transaction &t, CachedExtentRef extent); + using ExtentCallbackInterface::demote_region_res_t; + using ExtentCallbackInterface::demote_region_ret; + demote_region_ret demote_region( + Transaction &t, + laddr_t start, + std::size_t max_proceed_size) final; + using ExtentCallbackInterface::get_extents_if_live_ret; get_extents_if_live_ret get_extents_if_live( Transaction &t,