#include "crimson/os/seastore/extent_pinboard.h"
#include "crimson/os/seastore/transaction.h"
+#include "crimson/os/seastore/transaction_manager.h"
#include <boost/unordered/unordered_flat_map.hpp>
last_overall_io = overall_io;
}
+class ExtentPromoter {
+public:
+ ExtentPromoter(size_t promotion_size, ExtentPlacementManager &epm)
+ : promotion_size(promotion_size), epm(epm) {}
+
+ ~ExtentPromoter() {
+ clear();
+ }
+
+ bool enabled() const {
+ return ecb != nullptr;
+ }
+
+ bool should_promote_extent(const CachedExtent &extent) {
+ return enabled() && epm.is_cold_device(extent.get_paddr().get_device_id());
+ }
+
+ size_t get_promotion_size() const {
+ return current_contents;
+ }
+
+ void set_background_callback(BackgroundListener *l) {
+ listener = l;
+ }
+
+ void set_extent_callback(ExtentCallbackInterface *cb) {
+ ecb = cb;
+ }
+
+ bool should_run_promote() const {
+ return enabled() && current_contents >= promotion_size;
+ }
+
+ std::size_t get_promoted_size() const {
+ return promoted_size;
+ }
+
+ std::size_t get_promoted_count() const {
+ return promoted_count;
+ }
+
+ void add_extent(CachedExtent &extent) {
+ assert(!extent.is_linked_to_list());
+ assert(extent.is_stable_clean());
+ extent.set_pin_state(extent_pin_state_t::PendingPromote);
+ list.push_back(extent);
+ current_contents += extent.get_length();
+ intrusive_ptr_add_ref(&extent);
+ while (current_contents > promotion_size) {
+ remove_extent(list.front(), extent_pin_state_t::Fresh);
+ }
+ if (should_run_promote()) {
+ // TODO: wake promote background process
+ }
+ }
+
+ void remove_extent(CachedExtent &extent, extent_pin_state_t new_state) {
+ assert(extent.is_linked_to_list());
+ assert(extent.get_pin_state() == extent_pin_state_t::PendingPromote);
+ assert(current_contents >= extent.get_length());
+ extent.set_pin_state(new_state);
+ list.erase(list.s_iterator_to(extent));
+ current_contents -= extent.get_length();
+ intrusive_ptr_release(&extent);
+ }
+
+ void clear() {
+ for (auto iter = list.begin(); iter != list.end();) {
+ remove_extent(*(iter++), extent_pin_state_t::Fresh);
+ }
+ }
+
+ using run_promote_ret = base_iertr::future<>;
+ run_promote_ret run_promote(Transaction &t) {
+ LOG_PREFIX(ExtentPromoter::run_promote);
+ std::size_t promote_size = 0;
+ std::list<CachedExtentRef> extents;
+ DEBUGT("start promote", t);
+ for (auto &extent : list) {
+ DEBUGT("promote {} to the hot tier", t, extent);
+ ceph_assert(extent.is_stable_clean());
+ ceph_assert(extent.get_pin_state() == extent_pin_state_t::PendingPromote);
+ promote_size += extent.get_length();
+ t.add_to_read_set(&extent);
+ extents.emplace_back(&extent);
+ }
+ for (auto &extent : extents) {
+ remove_extent(*extent, extent_pin_state_t::Fresh);
+ }
+ for (auto &extent : extents) {
+ co_await trans_intr::make_interruptible(extent->wait_io());
+ co_await ecb->promote_extent(t, extent);
+ }
+ // existing extents in lru will be retired after transaction submitted
+ co_await ecb->submit_transaction_direct(t);
+ promoted_count += extents.size();
+ promoted_size += promote_size;
+ DEBUGT("finish promoting {} {}B extents", t, extents.size(), promote_size);
+ co_return;
+ }
+
+ seastar::future<> promote() {
+ assert(enabled());
+ return repeat_eagain([this] {
+ return ecb->with_transaction_intr(
+ Transaction::src_t::PROMOTE,
+ "promote", cache_hint_t::get_nocache(),
+ [this](auto &t) {
+ return run_promote(t);
+ });
+ }).handle_error(crimson::ct_error::assert_all{"error occupied during promotion"});
+ }
+
+private:
+ const size_t promotion_size;
+ ExtentPlacementManager &epm;
+ ExtentCallbackInterface *ecb = nullptr;
+ BackgroundListener *listener = nullptr;
+ CachedExtent::primary_ref_list list;
+ size_t current_contents;
+
+ size_t promoted_count;
+ size_t promoted_size;
+};
+
class ExtentPinboardLRU : public ExtentPinboard {
ExtentQueue lru;
+ ExtentPromoter promoter;
seastar::metrics::metric_group metrics;
// hit and miss indicates if an extent is linked when touching it
uint64_t miss = 0;
public:
- ExtentPinboardLRU(std::size_t capacity) : lru(capacity) {
+ ExtentPinboardLRU(std::size_t capacity, size_t promotion_size, ExtentPlacementManager &epm)
+ : lru(capacity), promoter(promotion_size, epm) {
LOG_PREFIX(ExtentPinboardLRU::ExtentPinboardLRU);
- INFO("created, lru_capacity=0x{:x}B", capacity);
+ INFO("created, lru_capacity=0x{:x}B, promotion_size=0x{:x}B",
+ capacity, promotion_size);
}
std::size_t get_capacity_bytes() const {
),
}
);
+ if (promoter.enabled()) {
+ metrics.add_group(
+ "cache",
+ {
+ sm::make_counter(
+ "promoted_size",
+ [this] {
+ return promoter.get_promoted_size();
+ },
+ sm::description("total bytes promoted by the lru")),
+ sm::make_counter(
+ "promoted_count",
+ [this] {
+ return promoter.get_promoted_count();
+ },
+ sm::description("total extents promoted by the lru")),
+ });
+ }
}
void get_stats(
}
void remove(CachedExtent &extent) final {
+ auto s = extent.get_pin_state();
if (extent.is_linked_to_list()) {
- lru.remove(extent);
+ if (s == extent_pin_state_t::Fresh) {
+ lru.remove(extent);
+ } else {
+ promoter.remove_extent(extent, extent_pin_state_t::Fresh);
+ }
+ } else {
+ ceph_assert(s == extent_pin_state_t::Fresh);
+ }
+ }
+
+ void add_to_top(
+ CachedExtent &extent,
+ const Transaction::src_t* p_src) {
+ auto trimmed = lru.add_to_top(extent, p_src);
+ if (promoter.enabled()) {
+ for (auto &extent : trimmed) {
+ if (promoter.should_promote_extent(*extent)) {
+ promoter.add_extent(*extent);
+ }
+ }
}
}
extent_len_t /*load_start*/,
extent_len_t /*load_length*/) final {
if (extent.is_linked_to_list()) {
- lru.move_to_top(extent, p_src);
+ auto s = extent.get_pin_state();
+ assert(s <= extent_pin_state_t::PendingPromote);
+ if (s == extent_pin_state_t::Fresh) {
+ lru.move_to_top(extent, p_src);
+ } else {
+ promoter.remove_extent(extent, extent_pin_state_t::Fresh);
+ add_to_top(extent, p_src);
+ }
hit++;
} else {
- lru.add_to_top(extent, p_src);
+ add_to_top(extent, p_src);
miss++;
}
}
CachedExtent &extent,
extent_len_t increased_length,
const Transaction::src_t* p_src) final {
- if (extent.is_linked_to_list()) {
+ if (extent.is_linked_to_list() &&
+ extent.get_pin_state() == extent_pin_state_t::Fresh) {
lru.increase_cached_size(extent, increased_length, p_src);
+ } else {
+ // promoter take the complete extent size for content size calculation
+ assert(extent.get_pin_state() <= extent_pin_state_t::PendingPromote);
}
}
void clear() final {
lru.clear();
+ promoter.clear();
+ }
+
+ void set_background_callback(BackgroundListener *listener) final {
+ promoter.set_background_callback(listener);
+ }
+
+ void set_extent_callback(ExtentCallbackInterface *cb) final {
+ promoter.set_extent_callback(cb);
+ }
+
+ std::size_t get_promotion_size() const final {
+ return promoter.get_promotion_size();
+ }
+
+ bool should_promote() const final {
+ return promoter.should_run_promote();
+ }
+
+ seastar::future<> promote() final {
+ return promoter.promote();
}
~ExtentPinboardLRU() {
ExtentPinboardTwoQ(
std::size_t warm_in_capacity,
std::size_t warm_out_capacity,
- std::size_t hot_capacity)
+ std::size_t hot_capacity,
+ std::size_t promotion_size,
+ ExtentPlacementManager &epm)
: warm_in(warm_in_capacity),
warm_out(warm_out_capacity),
- hot(hot_capacity)
+ hot(hot_capacity),
+ promoter(promotion_size, epm)
{
LOG_PREFIX(ExtentPinboardTwoQ::ExtentPinboardTwoQ);
- INFO("created, warm_in_capacity=0x{:x}B, "
- "warm_out_capacity=0x{:x}B, hot_capacity=0x{:x}B",
- warm_in_capacity, warm_out_capacity, hot_capacity);
+ INFO("created, warm_in_capacity=0x{:x}B, warm_out_capacity=0x{:x}B, "
+ "hot_capacity=0x{:x}B, promotion_size=0x{:x}B",
+ warm_in_capacity, warm_out_capacity, hot_capacity, promotion_size);
}
std::size_t get_capacity_bytes() const {
if (extent.is_linked_to_list()) {
if (s == extent_pin_state_t::WarmIn) {
warm_in.remove(extent);
+ } else if (s == extent_pin_state_t::PendingPromote) {
+ promoter.remove_extent(extent, extent_pin_state_t::Fresh);
} else {
ceph_assert(s == extent_pin_state_t::Hot);
hot.remove(extent);
if (state == extent_pin_state_t::Hot) {
hot.move_to_top(extent, p_src);
hit_queue(overall_hits.hot_hits, p_src, type);
+ } else if (state == extent_pin_state_t::PendingPromote) {
+ promoter.remove_extent(extent, extent_pin_state_t::Hot);
+ auto trimmed_extents = hot.add_to_top(extent, p_src);
+ on_update_hot(trimmed_extents);
+ hit_queue(overall_hits.hot_hits, p_src, type);
} else {
ceph_assert(state == extent_pin_state_t::WarmIn);
hit_queue(overall_hits.warm_in_hits, p_src, type);
auto trimmed_extents = warm_in.increase_cached_size(
extent, increased_length, p_src);
on_update_warm_in(trimmed_extents);
- } else {
- ceph_assert(state == extent_pin_state_t::Hot);
+ } else if (state == extent_pin_state_t::Hot) {
auto trimmed_extents = hot.increase_cached_size(
extent, increased_length, p_src);
on_update_hot(trimmed_extents);
+ } else {
+ ceph_assert(extent.get_pin_state() ==
+ extent_pin_state_t::PendingPromote);
}
}
}
warm_in.clear();
warm_out.clear();
hot.clear();
+ promoter.clear();
+ }
+
+ void set_background_callback(BackgroundListener *listener) final {
+ promoter.set_background_callback(listener);
+ }
+ void set_extent_callback(ExtentCallbackInterface *cb) final {
+ promoter.set_extent_callback(cb);
+ }
+ std::size_t get_promotion_size() const final {
+ return promoter.get_promotion_size();
+ }
+ bool should_promote() const final {
+ return promoter.should_run_promote();
+ }
+ seastar::future<> promote() final {
+ return promoter.promote();
}
~ExtentPinboardTwoQ() {
private:
void on_update_hot(std::list<CachedExtentRef> &extents) {
for (auto extent : extents) {
- extent->set_pin_state(extent_pin_state_t::Fresh);
+ if (promoter.should_promote_extent(*extent)) {
+ promoter.add_extent(*extent);
+ } else {
+ extent->set_pin_state(extent_pin_state_t::Fresh);
+ }
}
}
void on_update_warm_in(std::list<CachedExtentRef> &extents) {
ExtentQueue warm_in;
IndexedFifoQueue warm_out;
ExtentQueue hot;
+ ExtentPromoter promoter;
seastar::metrics::metric_group metrics;
struct QueueCounter {
),
}
);
+ if (promoter.enabled()) {
+ metrics.add_group(
+ "cache",
+ {
+ sm::make_counter(
+ "promoted_size",
+ [this] {
+ return promoter.get_promoted_size();
+ },
+ sm::description("total bytes promoted by the lru")),
+ sm::make_counter(
+ "promoted_count",
+ [this] {
+ return promoter.get_promoted_count();
+ },
+ sm::description("total extents promoted by the lru")),
+ });
+ }
}
-ExtentPinboardRef create_extent_pinboard(std::size_t capacity) {
+ExtentPinboardRef create_extent_pinboard(std::size_t capacity, ExtentPlacementManager *epm) {
using crimson::common::get_conf;
+ size_t promotion_size = get_conf<Option::size_t>("seastore_cache_promotion_size");
auto algorithm = get_conf<std::string>("seastore_cachepin_type");
if (algorithm == "LRU") {
- return std::make_unique<ExtentPinboardLRU>(capacity);
+ return std::make_unique<ExtentPinboardLRU>(capacity, promotion_size, *epm);
} else if (algorithm == "2Q") {
auto warm_in_ratio = get_conf<double>("seastore_cachepin_2q_in_ratio");
auto warm_out_ratio = get_conf<double>("seastore_cachepin_2q_out_ratio");
return std::make_unique<ExtentPinboardTwoQ>(
capacity * warm_in_ratio,
capacity * warm_out_ratio,
- capacity * (1 - warm_in_ratio));
+ capacity * (1 - warm_in_ratio),
+ promotion_size, *epm);
} else {
ceph_abort("invalid seastore_cachepin_type(LRU or 2Q)");
return nullptr;