level: advanced
desc: Size in bytes of extents to be demoted from logical bucket
default: 2_M
+- name: seastore_write_through_size
+ type: size
+ level: dev
+ desc: select write through policy when data length is greater than this value.
+ default: 512_K
case extent_types_t::OBJECT_DATA_BLOCK:
{
auto extents = alloc_new_data_extents<
- ObjectDataBlock>(t, length, {hint, gen, is_tracked});
+ ObjectDataBlock>(t, length, {hint, gen, is_tracked,
+ epm.get_write_policy(type, length)});
res.insert(res.begin(), extents.begin(), extents.end());
}
return res;
case extent_types_t::TEST_BLOCK:
{
auto extents = alloc_new_data_extents<
- TestBlock>(t, length, {hint, gen, is_tracked});
+ TestBlock>(t, length, {hint, gen, is_tracked,
+ epm.get_write_policy(type, length)});
res.insert(res.begin(), extents.begin(), extents.end());
}
return res;
P_ADDR_ROOT,
PLACEMENT_HINT_NULL,
NULL_GENERATION,
- TRANS_ID_NULL);
+ TRANS_ID_NULL,
+ write_policy_t::WRITE_BACK);
root->set_modify_time(seastar::lowres_system_clock::now());
INFO("init root -- {}", *root);
add_extent(root);
offset,
PLACEMENT_HINT_NULL,
NULL_GENERATION,
- TRANS_ID_NULL);
+ TRANS_ID_NULL,
+ write_policy_t::WRITE_BACK);
DEBUGT("{} length=0x{:x} is absent, add extent ... -- {}",
t, type, length, *ret);
add_extent(ret);
offset,
PLACEMENT_HINT_NULL,
NULL_GENERATION,
- TRANS_ID_NULL);
+ TRANS_ID_NULL,
+ write_policy_t::WRITE_BACK);
SUBDEBUGT(seastore_cache,
"{} {}~0x{:x} is absent, add extent and reading range 0x{:x}~0x{:x} ... -- {}",
t, T::TYPE, offset, length, partial_off, partial_len, *ret);
offset,
PLACEMENT_HINT_NULL,
NULL_GENERATION,
- TRANS_ID_NULL);
+ TRANS_ID_NULL,
+ write_policy_t::WRITE_BACK);
SUBDEBUG(seastore_cache,
"{} {}~0x{:x} is absent, add extent and reading range 0x{:x}~0x{:x} ... -- {}",
T::TYPE, offset, length, partial_off, partial_len, *ret);
result->paddr,
opt.hint,
result->gen,
- t.get_trans_id());
+ t.get_trans_id(),
+ write_policy_t::WRITE_BACK);
t.add_fresh_extent(ret);
SUBDEBUGT(seastore_cache,
"allocated {} 0x{:x}B extent at {}, hint={}, gen={} -- {}",
result.paddr,
opt.hint,
result.gen,
- t.get_trans_id());
+ t.get_trans_id(),
+ opt.write_policy);
t.add_fresh_extent(ret);
SUBDEBUGT(seastore_cache,
"allocated {} 0x{:x}B extent at {}, hint={}, gen={} -- {}",
remap_paddr,
PLACEMENT_HINT_NULL,
NULL_GENERATION,
- t.get_trans_id());
+ t.get_trans_id(),
+ write_policy_t::WRITE_BACK);
auto extent = ext->template cast<T>();
extent->set_laddr(remap_laddr);
paddr_t paddr,
placement_hint_t hint,
rewrite_gen_t gen,
- transaction_id_t trans_id) {
+ transaction_id_t trans_id,
+ write_policy_t policy) {
state = _state;
set_paddr(paddr);
user_hint = hint;
rewrite_generation = gen;
pending_for_transaction = trans_id;
+ write_policy = policy;
}
void set_modify_time(sea_time_point t) {
<< ", last_committed_crc=" << last_committed_crc
<< ", refcount=" << use_count()
<< ", user_hint=" << user_hint
+ << ", write_policy=" << write_policy
<< ", rewrite_gen=" << rewrite_gen_printer_t{rewrite_generation}
<< ", pending_io=";
if (is_pending_io()) {
is_shadow = b;
}
+ write_policy_t get_write_policy() const {
+ return write_policy;
+ }
+
+ void set_write_policy(write_policy_t w) {
+ write_policy = w;
+ }
+
+ void reset_write_policy() {
+ write_policy = write_policy_t::WRITE_BACK;
+ }
+
private:
template <typename T>
friend class read_set_item_t;
placement_hint_t user_hint = PLACEMENT_HINT_NULL;
+ write_policy_t write_policy = write_policy_t::WRITE_BACK;
+
// the target rewrite generation for the followup rewrite
// or the rewrite generation for the fresh write
rewrite_gen_t rewrite_generation = NULL_GENERATION;
ool_segment_seq_allocator(
std::make_unique<SegmentSeqAllocator>(segment_type_t::OOL)),
max_data_allocation_size(crimson::common::get_conf<Option::size_t>(
- "seastore_max_data_allocation_size"))
+ "seastore_max_data_allocation_size")),
+ write_through_size(crimson::common::get_conf<Option::size_t>(
+ "seastore_write_through_size"))
{
LOG_PREFIX(ExtentPlacementManager::ExtentPlacementManager);
devices_by_id.resize(DEVICE_ID_MAX, nullptr);
placement_hint_t hint;
rewrite_gen_t gen;
bool is_tracked;
+ write_policy_t write_policy = write_policy_t::WRITE_BACK;
#ifdef UNIT_TESTS_BUILT
std::optional<paddr_t> external_paddr = std::nullopt;
#endif
assert(opt.gen == INIT_GENERATION || opt.hint == placement_hint_t::REWRITE);
data_category_t category = get_extent_category(type);
- opt.gen = adjust_generation(category, type, opt.hint, opt.gen, opt.is_tracked);
+ opt.gen = adjust_generation(
+ category, type, opt.hint, opt.gen, opt.write_policy, opt.is_tracked);
paddr_t addr;
#ifdef UNIT_TESTS_BUILT
assert(opt.gen == INIT_GENERATION || opt.hint == placement_hint_t::REWRITE);
data_category_t category = get_extent_category(type);
- opt.gen = adjust_generation(category, type, opt.hint, opt.gen, opt.is_tracked);
+ opt.gen = adjust_generation(
+ category, type, opt.hint, opt.gen, opt.write_policy, opt.is_tracked);
assert(opt.gen != INLINE_GENERATION);
// XXX: bp might be extended to point to different memory (e.g. PMem)
return allocs;
}
+ write_policy_t get_write_policy(extent_types_t type, extent_len_t length) const {
+ if (has_cold_tier() && length >= write_through_size && is_data_type(type)) {
+ return write_policy_t::WRITE_THROUGH;
+ }
+ return write_policy_t::WRITE_BACK;
+ }
+
#ifdef UNIT_TESTS_BUILT
void prefill_fragmented_devices() {
LOG_PREFIX(ExtentPlacementManager::prefill_fragmented_devices);
extent_types_t type,
placement_hint_t hint,
rewrite_gen_t gen,
+ write_policy_t policy,
bool is_tracked) {
assert(is_real_type(type));
if (is_root_type(type)) {
}
} else {
assert(category == data_category_t::DATA);
- gen = OOL_GENERATION;
+ if (background_process.has_cold_tier() &&
+ policy == write_policy_t::WRITE_THROUGH) {
+ gen = hot_tier_generations;
+ } else {
+ assert(policy != write_policy_t::WRITE_THROUGH);
+ gen = OOL_GENERATION;
+ }
}
} else if (background_process.has_cold_tier()) {
gen = background_process.adjust_generation(gen);
+ if (gen <= hot_tier_generations &&
+ policy == write_policy_t::WRITE_THROUGH) {
+ gen = hot_tier_generations;
+ }
}
if (is_tracked && gen >= hot_tier_generations &&
// TODO: drop once paddr->journal_seq_t is introduced
SegmentSeqAllocatorRef ool_segment_seq_allocator;
extent_len_t max_data_allocation_size = 0;
+ std::size_t write_through_size;
friend class ::transaction_manager_test_t;
friend class Cache;
}
}
+std::ostream& operator<<(std::ostream& out, write_policy_t w)
+{
+ switch(w) {
+ case write_policy_t::WRITE_BACK:
+ return out << "WRITE_BACK";
+ case write_policy_t::WRITE_THROUGH:
+ return out << "WRITE_THROUGH";
+ }
+}
+
bool can_delay_allocation(device_type_t type) {
// Some types of device may not support delayed allocation, for example PMEM.
// All types of device currently support delayed allocation.
std::ostream &operator<<(std::ostream &out, extent_types_t t);
+enum class write_policy_t {
+ WRITE_BACK,
+ WRITE_THROUGH
+};
+
+std::ostream& operator<<(std::ostream& out, write_policy_t w);
+
/**
* rewrite_gen_t
*
template <> struct fmt::formatter<crimson::os::seastore::write_result_t> : fmt::ostream_formatter {};
template <> struct fmt::formatter<crimson::os::seastore::omap_type_t> : fmt::ostream_formatter {};
template <> struct fmt::formatter<ceph::buffer::list> : fmt::ostream_formatter {};
+template <> struct fmt::formatter<crimson::os::seastore::write_policy_t> : fmt::ostream_formatter {};
#endif
template <>
SUBDEBUGT(seastore_tm, "{} hint {}~0x{:x} phint={} ...",
t, T::TYPE, laddr_hint, len, placement_hint);
auto exts = cache->alloc_new_data_extents<T>(
- t, len, {placement_hint, INIT_GENERATION});
+ t, len,
+ {
+ placement_hint, INIT_GENERATION, false,
+ epm->get_write_policy(T::TYPE, len)
+ });
// user must initialize the logical extent themselves
assert(is_user_transaction(t.get_src()));
for (auto& ext : exts) {
check.emplace(addr, get_map_val(len, TestBlock::TYPE));
lba_btree_update([=, this](auto &btree, auto &t) {
auto extents = cache->alloc_new_data_extents<TestBlock>(
- t, TestBlock::SIZE, {placement_hint_t::HOT, 0, false, get_paddr()});
+ t, TestBlock::SIZE,
+ {placement_hint_t::HOT, 0, false,
+ write_policy_t::WRITE_BACK, get_paddr()});
return seastar::do_with(
std::move(extents),
[this, addr, &t, len, &btree](auto &extents) {
*t.t,
[=, this](auto &t) {
auto extents = cache->alloc_new_data_extents<TestBlock>(
- t, TestBlock::SIZE, {placement_hint_t::HOT, 0, false, get_paddr()});
+ t, TestBlock::SIZE,
+ {placement_hint_t::HOT, 0, false,
+ write_policy_t::WRITE_BACK, get_paddr()});
return seastar::do_with(
std::vector<LogicalChildNodeRef>(
extents.begin(), extents.end()),
t,
placement_hint_t::HOT,
gen,
+ write_policy_t::WRITE_BACK,
false);
if (expected_generations[t][gen] != epm_gen) {
logger().error("caller: {}, extent type: {}, input generation: {}, "