level: advanced
desc: Begin fast eviction when the used ratio of the main tier reaches this value.
default: 0.7
+- name: seastore_hot_tier_generations
+ type: uint
+ level: advanced
+ desc: The number of generations in the hot tier or the whole SeaStore instance if there's only one tier.
+ default: 5
+- name: seastore_cold_tier_generations
+ type: uint
+ level: advanced
+ desc: The number of generations in the cold tier if it exists.
+ default: 3
- name: seastore_data_delta_based_overwrite
type: size
level: dev
ceph_assert(_seq != NULL_SEG_SEQ);
ceph_assert(_type != segment_type_t::NULL_SEG);
ceph_assert(_category != data_category_t::NUM);
- ceph_assert(is_rewrite_generation(_generation));
state = Segment::segment_state_t::OPEN;
seq = _seq;
type = _type;
ceph_assert(_seq != NULL_SEG_SEQ);
ceph_assert(_type != segment_type_t::NULL_SEG);
ceph_assert(_category != data_category_t::NUM);
- ceph_assert(is_rewrite_generation(_generation));
state = Segment::segment_state_t::CLOSED;
seq = _seq;
type = _type;
SegmentManagerGroupRef&& sm_group,
BackrefManager &backref_manager,
SegmentSeqAllocator &segment_seq_allocator,
+ rewrite_gen_t max_rewrite_generation,
bool detailed,
bool is_cold)
: detailed(detailed),
config(config),
sm_group(std::move(sm_group)),
backref_manager(backref_manager),
- ool_segment_seq_allocator(segment_seq_allocator)
+ ool_segment_seq_allocator(segment_seq_allocator),
+ max_rewrite_generation(max_rewrite_generation)
{
config.validate();
}
auto& segment_info = it->second;
if (segment_info.is_empty()) {
auto old_usage = calc_utilization(seg_id);
+ ceph_assert(is_rewrite_generation(generation, max_rewrite_generation));
segments.mark_open(seg_id, seq, type, category, generation);
if (type == segment_type_t::JOURNAL) {
assert(trimmer != nullptr);
space_tracker->calc_utilization(seg_id),
sea_time_point_printer_t{segments.get_time_bound()});
ceph_assert(segment_info.is_closed());
+ ceph_assert(is_rewrite_generation(
+ segment_info.generation, max_rewrite_generation));
reclaim_state = reclaim_state_t::create(
seg_id, segment_info.generation, segments.get_segment_size());
+ assert(is_target_rewrite_generation(
+ reclaim_state->target_generation, max_rewrite_generation));
}
reclaim_state->advance(config.reclaim_bytes_per_cycle);
SegmentManagerGroupRef&& sm_group,
BackrefManager &backref_manager,
SegmentSeqAllocator &segment_seq_allocator,
+ rewrite_gen_t max_rewrite_generation,
bool detailed,
bool is_cold);
SegmentManagerGroupRef&& sm_group,
BackrefManager &backref_manager,
SegmentSeqAllocator &ool_seq_allocator,
+ rewrite_gen_t max_rewrite_generation,
bool detailed,
bool is_cold = false) {
return std::make_unique<SegmentCleaner>(
config, std::move(sm_group), backref_manager,
- ool_seq_allocator, detailed, is_cold);
+ ool_seq_allocator, max_rewrite_generation,
+ detailed, is_cold);
}
/*
segment_id_t segment_id,
rewrite_gen_t generation,
segment_off_t segment_size) {
- ceph_assert(is_rewrite_generation(generation));
rewrite_gen_t target_gen;
if (generation < MIN_REWRITE_GENERATION) {
target_gen = generation + 1;
}
- assert(is_target_rewrite_generation(target_gen));
return {generation,
target_gen,
segment_size,
ceph_assert(s_type == segment_type_t::OOL ||
trimmer != nullptr); // segment_type_t::JOURNAL
auto old_usage = calc_utilization(segment);
+ ceph_assert(is_rewrite_generation(generation, max_rewrite_generation));
segments.init_closed(segment, seq, s_type, category, generation);
auto new_usage = calc_utilization(segment);
adjust_segment_util(old_usage, new_usage);
// TODO: drop once paddr->journal_seq_t is introduced
SegmentSeqAllocator &ool_segment_seq_allocator;
+ const rewrite_gen_t max_rewrite_generation = NULL_GENERATION;
};
class RBMCleaner;
std::rethrow_exception(crimson::ct_error::enospc::exception_ptr());
}
auto ret = CachedExtent::make_cached_extent_ref<T>(std::move(result->bp));
+ assert(is_rewrite_generation(
+ result->gen,
+ epm.dynamic_max_rewrite_generation));
ret->init(CachedExtent::extent_state_t::INITIAL_WRITE_PENDING,
result->paddr,
hint,
std::vector<TCachedExtentRef<T>> extents;
for (auto &result : results) {
auto ret = CachedExtent::make_cached_extent_ref<T>(std::move(result.bp));
+ assert(is_rewrite_generation(
+ result.gen,
+ epm.dynamic_max_rewrite_generation));
ret->init(CachedExtent::extent_state_t::INITIAL_WRITE_PENDING,
result.paddr,
hint,
placement_hint_t hint,
rewrite_gen_t gen,
transaction_id_t trans_id) {
- assert(gen == NULL_GENERATION || is_rewrite_generation(gen));
state = _state;
set_paddr(paddr);
user_hint = hint;
/// assign the target rewrite generation for the followup rewrite
void set_target_rewrite_generation(rewrite_gen_t gen) {
- assert(is_target_rewrite_generation(gen));
-
user_hint = placement_hint_t::REWRITE;
rewrite_generation = gen;
}
{
writer_refs.clear();
auto cold_segment_cleaner = dynamic_cast<SegmentCleaner*>(cold_cleaner.get());
- dynamic_max_rewrite_generation = MIN_COLD_GENERATION - 1;
+ dynamic_max_rewrite_generation = hot_tier_generations - 1;
if (cold_segment_cleaner) {
- dynamic_max_rewrite_generation = MAX_REWRITE_GENERATION;
+ dynamic_max_rewrite_generation = hot_tier_generations + cold_tier_generations - 1;
}
+ ceph_assert(dynamic_max_rewrite_generation > MIN_REWRITE_GENERATION);
if (trimmer->get_backend_type() == backend_type_t::SEGMENTED) {
auto segment_cleaner = dynamic_cast<SegmentCleaner*>(cleaner.get());
auto num_writers = generation_to_writer(dynamic_max_rewrite_generation + 1);
data_writers_by_gen.resize(num_writers, nullptr);
- for (rewrite_gen_t gen = OOL_GENERATION; gen < MIN_COLD_GENERATION; ++gen) {
+ for (rewrite_gen_t gen = OOL_GENERATION; gen < hot_tier_generations; ++gen) {
writer_refs.emplace_back(std::make_unique<SegmentedOolWriter>(
data_category_t::DATA, gen, *segment_cleaner,
*ool_segment_seq_allocator));
}
md_writers_by_gen.resize(num_writers, {});
- for (rewrite_gen_t gen = OOL_GENERATION; gen < MIN_COLD_GENERATION; ++gen) {
+ for (rewrite_gen_t gen = OOL_GENERATION; gen < hot_tier_generations; ++gen) {
writer_refs.emplace_back(std::make_unique<SegmentedOolWriter>(
data_category_t::METADATA, gen, *segment_cleaner,
*ool_segment_seq_allocator));
}
if (cold_segment_cleaner) {
- for (rewrite_gen_t gen = MIN_COLD_GENERATION; gen < REWRITE_GENERATIONS; ++gen) {
+ for (rewrite_gen_t gen = hot_tier_generations; gen <= dynamic_max_rewrite_generation; ++gen) {
writer_refs.emplace_back(std::make_unique<SegmentedOolWriter>(
data_category_t::DATA, gen, *cold_segment_cleaner,
*ool_segment_seq_allocator));
data_writers_by_gen[generation_to_writer(gen)] = writer_refs.back().get();
}
- for (rewrite_gen_t gen = MIN_COLD_GENERATION; gen < REWRITE_GENERATIONS; ++gen) {
+ for (rewrite_gen_t gen = hot_tier_generations; gen <= dynamic_max_rewrite_generation; ++gen) {
writer_refs.emplace_back(std::make_unique<SegmentedOolWriter>(
data_category_t::METADATA, gen, *cold_segment_cleaner,
*ool_segment_seq_allocator));
background_process.init(std::move(trimmer),
std::move(cleaner),
- std::move(cold_cleaner));
+ std::move(cold_cleaner),
+ hot_tier_generations);
if (cold_segment_cleaner) {
ceph_assert(get_main_backend_type() == backend_type_t::SEGMENTED);
ceph_assert(background_process.has_cold_tier());
main_stats.add(main_writer_stats.back());
// 2. mainmdat
main_writer_stats.emplace_back();
- for (rewrite_gen_t gen = MIN_REWRITE_GENERATION; gen < MIN_COLD_GENERATION; ++gen) {
+ for (rewrite_gen_t gen = MIN_REWRITE_GENERATION; gen < hot_tier_generations; ++gen) {
const auto &writer = get_writer(METADATA, gen);
ceph_assert(writer->get_type() == backend_type_t::SEGMENTED);
main_writer_stats.back().add(writer->get_stats());
main_stats.add(main_writer_stats.back());
// 3. maindata
main_writer_stats.emplace_back();
- for (rewrite_gen_t gen = MIN_REWRITE_GENERATION; gen < MIN_COLD_GENERATION; ++gen) {
+ for (rewrite_gen_t gen = MIN_REWRITE_GENERATION; gen < hot_tier_generations; ++gen) {
const auto &writer = get_writer(DATA, gen);
ceph_assert(writer->get_type() == backend_type_t::SEGMENTED);
main_writer_stats.back().add(writer->get_stats());
if (has_cold_tier) {
// 0. coldmdat
cold_writer_stats.emplace_back();
- for (rewrite_gen_t gen = MIN_COLD_GENERATION; gen < REWRITE_GENERATIONS; ++gen) {
+ for (rewrite_gen_t gen = hot_tier_generations;
+ gen <= dynamic_max_rewrite_generation;
+ ++gen) {
const auto &writer = get_writer(METADATA, gen);
ceph_assert(writer->get_type() == backend_type_t::SEGMENTED);
cold_writer_stats.back().add(writer->get_stats());
cold_stats.add(cold_writer_stats.back());
// 1. colddata
cold_writer_stats.emplace_back();
- for (rewrite_gen_t gen = MIN_COLD_GENERATION; gen < REWRITE_GENERATIONS; ++gen) {
+ for (rewrite_gen_t gen = hot_tier_generations;
+ gen <= dynamic_max_rewrite_generation;
+ ++gen) {
const auto &writer = get_writer(DATA, gen);
ceph_assert(writer->get_type() == backend_type_t::SEGMENTED);
cold_writer_stats.back().add(writer->get_stats());
res.usage.cleaner_usage.main_usage += extent->get_length();
t.mark_delayed_extent_inline(extent);
} else {
- if (extent->get_rewrite_generation() < MIN_COLD_GENERATION) {
+ if (extent->get_rewrite_generation() < hot_tier_generations) {
res.usage.cleaner_usage.main_usage += extent->get_length();
} else {
assert(background_process.has_cold_tier());
namespace crimson::os::seastore {
+class Cache;
+
/**
* ExtentOolWriter
*
class ExtentPlacementManager {
public:
- ExtentPlacementManager()
- : ool_segment_seq_allocator(
+ ExtentPlacementManager(
+ rewrite_gen_t hot_tier_generations,
+ rewrite_gen_t cold_tier_generations)
+ : hot_tier_generations(hot_tier_generations),
+ cold_tier_generations(cold_tier_generations),
+ dynamic_max_rewrite_generation(cold_tier_generations),
+ ool_segment_seq_allocator(
std::make_unique<SegmentSeqAllocator>(segment_type_t::OOL)),
max_data_allocation_size(crimson::common::get_conf<Option::size_t>(
"seastore_max_data_allocation_size"))
{
+ LOG_PREFIX(ExtentPlacementManager::ExtentPlacementManager);
devices_by_id.resize(DEVICE_ID_MAX, nullptr);
+ SUBINFO(seastore_epm, "cold_tier_generations={}, hot_tier_generations={}",
+ cold_tier_generations, hot_tier_generations);
}
void init(JournalTrimmerImplRef &&, AsyncCleanerRef &&, AsyncCleanerRef &&);
#endif
) {
assert(hint < placement_hint_t::NUM_HINTS);
- assert(is_target_rewrite_generation(gen));
+ assert(is_target_rewrite_generation(gen, dynamic_max_rewrite_generation));
assert(gen == INIT_GENERATION || hint == placement_hint_t::REWRITE);
data_category_t category = get_extent_category(type);
) {
LOG_PREFIX(ExtentPlacementManager::alloc_new_data_extents);
assert(hint < placement_hint_t::NUM_HINTS);
- assert(is_target_rewrite_generation(gen));
+ assert(is_target_rewrite_generation(gen, dynamic_max_rewrite_generation));
assert(gen == INIT_GENERATION || hint == placement_hint_t::REWRITE);
data_category_t category = get_extent_category(type);
} else if (hint == placement_hint_t::COLD) {
assert(gen == INIT_GENERATION);
if (background_process.has_cold_tier()) {
- gen = MIN_COLD_GENERATION;
+ gen = hot_tier_generations;
} else {
gen = MIN_REWRITE_GENERATION;
}
ExtentOolWriter* get_writer(data_category_t category,
rewrite_gen_t gen) {
- assert(is_rewrite_generation(gen));
+ assert(is_rewrite_generation(gen, dynamic_max_rewrite_generation));
assert(gen != INLINE_GENERATION);
assert(gen <= dynamic_max_rewrite_generation);
ExtentOolWriter* ret = nullptr;
const ExtentOolWriter* get_writer(data_category_t category,
rewrite_gen_t gen) const {
- assert(is_rewrite_generation(gen));
+ assert(is_rewrite_generation(gen, dynamic_max_rewrite_generation));
assert(gen != INLINE_GENERATION);
assert(gen <= dynamic_max_rewrite_generation);
ExtentOolWriter* ret = nullptr;
void init(JournalTrimmerImplRef &&_trimmer,
AsyncCleanerRef &&_cleaner,
- AsyncCleanerRef &&_cold_cleaner) {
+ AsyncCleanerRef &&_cold_cleaner,
+ rewrite_gen_t hot_tier_generations) {
trimmer = std::move(_trimmer);
trimmer->set_background_callback(this);
main_cleaner = std::move(_cleaner);
crimson::common::get_conf<double>(
"seastore_multiple_tiers_default_evict_ratio"),
crimson::common::get_conf<double>(
- "seastore_multiple_tiers_fast_evict_ratio"));
+ "seastore_multiple_tiers_fast_evict_ratio"),
+ hot_tier_generations);
}
}
double stop_evict_ratio;
double default_evict_ratio;
double fast_evict_ratio;
+ rewrite_gen_t hot_tier_generations;
void init(double stop_ratio,
double default_ratio,
- double fast_ratio) {
+ double fast_ratio,
+ rewrite_gen_t hot_tier_generations) {
ceph_assert(0 <= stop_ratio);
ceph_assert(stop_ratio < default_ratio);
ceph_assert(default_ratio < fast_ratio);
stop_evict_ratio = stop_ratio;
default_evict_ratio = default_ratio;
fast_evict_ratio = fast_ratio;
+ this->hot_tier_generations = hot_tier_generations;
}
bool is_stop_mode() const {
rewrite_gen_t ret = gen;
switch(eviction_mode) {
case eviction_mode_t::STOP:
- if (gen == MIN_COLD_GENERATION) {
- ret = MIN_COLD_GENERATION - 1;
+ if (gen == hot_tier_generations) {
+ ret = hot_tier_generations - 1;
}
break;
case eviction_mode_t::DEFAULT:
break;
case eviction_mode_t::FAST:
- if (gen >= MIN_REWRITE_GENERATION && gen < MIN_COLD_GENERATION) {
- ret = MIN_COLD_GENERATION;
+ if (gen >= MIN_REWRITE_GENERATION && gen < hot_tier_generations) {
+ ret = hot_tier_generations;
}
break;
default:
Device* primary_device = nullptr;
std::size_t num_devices = 0;
- rewrite_gen_t dynamic_max_rewrite_generation = REWRITE_GENERATIONS;
+ // without cold tier, the largest generation is less than hot_tier_generations
+ const rewrite_gen_t hot_tier_generations = NULL_GENERATION;
+ const rewrite_gen_t cold_tier_generations = NULL_GENERATION;
+ rewrite_gen_t dynamic_max_rewrite_generation = NULL_GENERATION;
BackgroundProcess background_process;
// TODO: drop once paddr->journal_seq_t is introduced
SegmentSeqAllocatorRef ool_segment_seq_allocator;
extent_len_t max_data_allocation_size = 0;
friend class ::transaction_manager_test_t;
+ friend class Cache;
};
using ExtentPlacementManagerRef = std::unique_ptr<ExtentPlacementManager>;
return out << "GEN_INL";
} else if (gen.gen == OOL_GENERATION) {
return out << "GEN_OOL";
- } else if (gen.gen > REWRITE_GENERATIONS) {
- return out << "GEN_INVALID(" << (unsigned)gen.gen << ")!";
} else {
return out << "GEN(" << (unsigned)gen.gen << ")";
}
// All the rewritten extents start with MIN_REWRITE_GENERATION
constexpr rewrite_gen_t MIN_REWRITE_GENERATION = 3;
-// without cold tier, the largest generation is less than MIN_COLD_GENERATION
-constexpr rewrite_gen_t MIN_COLD_GENERATION = 5;
-constexpr rewrite_gen_t MAX_REWRITE_GENERATION = 7;
-constexpr rewrite_gen_t REWRITE_GENERATIONS = MAX_REWRITE_GENERATION + 1;
constexpr rewrite_gen_t NULL_GENERATION =
std::numeric_limits<rewrite_gen_t>::max();
}
// before EPM decision
-constexpr bool is_target_rewrite_generation(rewrite_gen_t gen) {
+constexpr bool is_target_rewrite_generation(
+ rewrite_gen_t gen,
+ rewrite_gen_t max_gen) {
return gen == INIT_GENERATION ||
(gen >= MIN_REWRITE_GENERATION &&
- gen <= REWRITE_GENERATIONS);
+ gen <= max_gen + 1);
}
// after EPM decision
-constexpr bool is_rewrite_generation(rewrite_gen_t gen) {
+constexpr bool is_rewrite_generation(
+ rewrite_gen_t gen,
+ rewrite_gen_t max_gen) {
return gen >= INLINE_GENERATION &&
- gen < REWRITE_GENERATIONS;
+ gen <= max_gen;
}
enum class data_category_t : uint8_t {
shard_stats_t& shard_stats,
bool is_test)
{
- auto epm = std::make_unique<ExtentPlacementManager>();
+ rewrite_gen_t hot_tier_generations = crimson::common::get_conf<uint64_t>(
+ "seastore_hot_tier_generations");
+ rewrite_gen_t cold_tier_generations = crimson::common::get_conf<uint64_t>(
+ "seastore_cold_tier_generations");
+ auto epm = std::make_unique<ExtentPlacementManager>(
+ hot_tier_generations, cold_tier_generations);
auto cache = std::make_unique<Cache>(*epm);
auto lba_manager = lba::create_lba_manager(*cache);
auto sms = std::make_unique<SegmentManagerGroup>();
std::move(cold_sms),
*backref_manager,
epm->get_ool_segment_seq_allocator(),
+ hot_tier_generations + cold_tier_generations - 1,
cleaner_is_detailed,
/* is_cold = */ true);
if (backend_type == backend_type_t::SEGMENTED) {
std::move(sms),
*backref_manager,
epm->get_ool_segment_seq_allocator(),
+ hot_tier_generations - 1,
cleaner_is_detailed);
auto segment_cleaner = static_cast<SegmentCleaner*>(cleaner.get());
for (auto id : segment_cleaner->get_device_ids()) {
}).safe_then([this] {
sms.reset(new SegmentManagerGroup());
journal = journal::make_segmented(*this, *this);
- epm.reset(new ExtentPlacementManager());
+ rewrite_gen_t hot_tier_generations = crimson::common::get_conf<uint64_t>(
+ "seastore_hot_tier_generations");
+ rewrite_gen_t cold_tier_generations = crimson::common::get_conf<uint64_t>(
+ "seastore_cold_tier_generations");
+ epm.reset(new ExtentPlacementManager(
+ hot_tier_generations, cold_tier_generations));
cache.reset(new Cache(*epm));
block_size = segment_manager->get_block_size();
return segment_manager->mkfs(
segment_manager::get_ephemeral_device_config(0, 1, 0));
}).safe_then([this] {
- epm.reset(new ExtentPlacementManager());
+ rewrite_gen_t hot_tier_generations = crimson::common::get_conf<uint64_t>(
+ "seastore_hot_tier_generations");
+ rewrite_gen_t cold_tier_generations = crimson::common::get_conf<uint64_t>(
+ "seastore_cold_tier_generations");
+ epm.reset(new ExtentPlacementManager(
+ hot_tier_generations, cold_tier_generations));
cache.reset(new Cache(*epm));
current = paddr_t::make_seg_paddr(segment_id_t(segment_manager->get_device_id(), 0), 0);
epm->test_init_no_background(segment_manager.get());
epm->background_process
.eviction_state
- .init(stop_ratio, default_ratio, fast_ratio);
+ .init(stop_ratio, default_ratio, fast_ratio, epm->hot_tier_generations);
// these variables are described in
// EPM::BackgroundProcess::eviction_state_t::maybe_update_eviction_mode
assert(all_extent_types.size() == EXTENT_TYPES_MAX - 4);
std::vector<rewrite_gen_t> all_generations;
- for (auto i = INIT_GENERATION; i < REWRITE_GENERATIONS; i++) {
+ for (auto i = INIT_GENERATION; i <= epm->dynamic_max_rewrite_generation; i++) {
all_generations.push_back(i);
}
expected_generations[t][INIT_GENERATION] = OOL_GENERATION;
}
- for (auto i = INIT_GENERATION + 1; i < REWRITE_GENERATIONS; i++) {
+ for (auto i = INIT_GENERATION + 1; i <= epm->dynamic_max_rewrite_generation; i++) {
expected_generations[t][i] = i;
}
}
if (is_root_type(t) || is_lba_backref_node(t)) {
continue;
}
- for (auto i = INIT_GENERATION + 1; i < REWRITE_GENERATIONS; i++) {
+ for (auto i = INIT_GENERATION + 1; i <= epm->dynamic_max_rewrite_generation; i++) {
expected_generations[t][i] = func(i);
}
}
};
// verify that no data should go to the cold tier
- update_data_gen_mapping([](rewrite_gen_t gen) -> rewrite_gen_t {
- if (gen == MIN_COLD_GENERATION) {
- return MIN_COLD_GENERATION - 1;
+ update_data_gen_mapping([this](rewrite_gen_t gen) -> rewrite_gen_t {
+ if (gen == epm->hot_tier_generations) {
+ return epm->hot_tier_generations - 1;
} else {
return gen;
}
// verify that data must go to the cold tier
run_until(ratio_D_size).get();
- update_data_gen_mapping([](rewrite_gen_t gen) {
- if (gen >= MIN_REWRITE_GENERATION && gen < MIN_COLD_GENERATION) {
- return MIN_COLD_GENERATION;
+ update_data_gen_mapping([this](rewrite_gen_t gen) {
+ if (gen >= MIN_REWRITE_GENERATION && gen < epm->hot_tier_generations) {
+ return epm->hot_tier_generations;
} else {
return gen;
}