&reclaimed, &t, modify_time, target_generation] {
DEBUGT("reclaim {} extents", t, extents.size());
// rewrite live extents
- return trans_intr::do_for_each(
- extents,
- [&extent_callback, modify_time, &t,
- &reclaimed, target_generation](auto ext)
- {
- reclaimed += ext->get_length();
- return extent_callback.rewrite_extent(
- t, ext, target_generation, modify_time);
- });
+ for (auto &ext : extents) {
+ reclaimed += ext->get_length();
+ }
+ return extent_callback.rewrite_extents(
+ t, extents, target_generation, modify_time);
});
}).si_then([&extent_callback, &t] {
return extent_callback.submit_transaction_direct(t);
rewrite_gen_t target_generation,
sea_time_point modify_time) = 0;
+ using rewrite_extents_iertr = base_iertr;
+ using rewrite_extents_ret = rewrite_extents_iertr::future<>;
+ virtual rewrite_extents_ret rewrite_extents(
+ Transaction &t,
+ std::vector<CachedExtentRef> &extents,
+ rewrite_gen_t target_generation,
+ sea_time_point modify_time) = 0;
+
/**
* promote_extent
*
return paddr;
}
- std::list<alloc_paddr_result> alloc_paddrs(extent_len_t length) {
+ std::list<alloc_paddr_result> alloc_paddrs(
+ extent_len_t length, paddr_t hint) {
// TODO: implement allocation strategy (dirty metadata and multiple devices)
auto rbs = rb_group->get_rb_managers();
- auto ret = rbs[0]->alloc_extents(length);
+ auto ret = rbs[0]->alloc_extents(length, hint);
if (!ret.empty()) {
stats.used_bytes += length;
}
extent_len_t length, ///< [in] length
placement_hint_t hint, ///< [in] user hint
rewrite_gen_t gen, ///< [in] rewrite generation
+ paddr_t paddr_hint,
bool is_tracked
)
{
SUBDEBUGT(seastore_cache, "allocate {} 0x{:x}B, hint={}, gen={}",
t, type, length, hint, rewrite_gen_printer_t{gen});
ceph_assert(get_extent_category(type) == data_category_t::METADATA);
- auto opt = alloc_option_t{hint, gen, is_tracked};
+ auto opt = alloc_option_t{hint, gen, is_tracked, paddr_hint};
switch (type) {
case extent_types_t::ROOT:
ceph_assert(0 == "ROOT is never directly alloc'd");
extent_len_t length, ///< [in] length
placement_hint_t hint, ///< [in] user hint
rewrite_gen_t gen, ///< [in] rewrite generation
+ paddr_t paddr_hint,
bool is_tracked
)
{
case extent_types_t::OBJECT_DATA_BLOCK:
{
auto extents = alloc_new_data_extents<
- ObjectDataBlock>(t, length, {hint, gen, is_tracked,
+ ObjectDataBlock>(t, length, {hint, gen, is_tracked, paddr_hint,
epm.get_write_policy(type, length)});
res.insert(res.begin(), extents.begin(), extents.end());
}
case extent_types_t::TEST_BLOCK:
{
auto extents = alloc_new_data_extents<
- TestBlock>(t, length, {hint, gen, is_tracked,
+ TestBlock>(t, length, {hint, gen, is_tracked, paddr_hint,
epm.get_write_policy(type, length)});
res.insert(res.begin(), extents.begin(), extents.end());
}
extent_len_t length, ///< [in] length
placement_hint_t hint, ///< [in] user hint
rewrite_gen_t gen, ///< [in] rewrite generation
+ paddr_t paddr_hint,
bool is_tracked
);
extent_len_t length, ///< [in] length
placement_hint_t hint, ///< [in] user hint
rewrite_gen_t gen, ///< [in] rewrite generation
+ paddr_t paddr_hint,
bool is_tracked
);
virtual paddr_t alloc_paddr(extent_len_t length) = 0;
- virtual std::list<alloc_paddr_result> alloc_paddrs(extent_len_t length) = 0;
+ virtual std::list<alloc_paddr_result> alloc_paddrs(
+ extent_len_t length,
+ paddr_t hint) = 0;
using alloc_write_ertr = base_ertr;
using alloc_write_iertr = trans_iertr<alloc_write_ertr>;
return make_delayed_temp_paddr(0);
}
- std::list<alloc_paddr_result> alloc_paddrs(extent_len_t length) final {
+ std::list<alloc_paddr_result> alloc_paddrs(extent_len_t length, paddr_t) final {
return {alloc_paddr_result{make_delayed_temp_paddr(0), length}};
}
return rb_cleaner->alloc_paddr(length);
}
- std::list<alloc_paddr_result> alloc_paddrs(extent_len_t length) final {
+ std::list<alloc_paddr_result> alloc_paddrs(
+ extent_len_t length, paddr_t hint) final {
assert(rb_cleaner);
- return rb_cleaner->alloc_paddrs(length);
+ return rb_cleaner->alloc_paddrs(length, hint);
}
bool can_inplace_rewrite(Transaction& t,
placement_hint_t hint;
rewrite_gen_t gen;
bool is_tracked;
+ paddr_t paddr_hint = P_ADDR_NULL;
write_policy_t write_policy = write_policy_t::WRITE_BACK;
#ifdef UNIT_TESTS_BUILT
std::optional<paddr_t> external_paddr = std::nullopt;
#endif
{
assert(category == data_category_t::DATA);
- auto addrs = get_writer(opt.hint, category, opt.gen)->alloc_paddrs(length);
+ auto addrs = get_writer(opt.hint, category, opt.gen)->alloc_paddrs(
+ length, opt.paddr_hint);
for (auto &ext : addrs) {
auto left = ext.len;
while (left > 0) {
using allocate_ret_bare = std::list<alloc_paddr_result>;
using allo_extents_ret = allocate_ertr::future<allocate_ret_bare>;
- virtual allocate_ret_bare alloc_extents(size_t size) = 0;
+ virtual allocate_ret_bare alloc_extents(size_t size, paddr_t hint) = 0;
virtual void mark_space_used(paddr_t paddr, size_t len) = 0;
virtual void mark_space_free(paddr_t paddr, size_t len) = 0;
if (p != extent_size_tree.rend()) {
max_size = p->end - p->start;
}
+ const auto compare = extent_tree.key_comp();
+ auto rs = extent_tree.lower_bound(extent_range_t{start, size}, compare);
+ if (rs != extent_tree.end()) {
+ uint64_t offset = rs->start;
+ if (offset + size <= rs->end) {
+ start = offset;
+ return size;
+ }
+ }
assert(max_size);
if (max_size <= size) {
}
std::optional<interval_set<rbm_abs_addr>> AvlAllocator::alloc_extents(
- size_t size)
+ size_t size, rbm_abs_addr hint)
{
LOG_PREFIX(AvlAllocator::alloc_extents);
if (available_size < size) {
interval_set<rbm_abs_addr> result;
- auto try_to_alloc_block = [this, &result, FNAME] (uint64_t alloc_size)
+ auto try_to_alloc_block = [this, hint, &result, FNAME] (uint64_t alloc_size)
{
+ rbm_abs_addr start = hint;
while (alloc_size) {
- rbm_abs_addr start = 0;
extent_len_t len = find_block(std::min(max_alloc_size, alloc_size), start);
ceph_assert(len);
_remove_from_tree(start, len);
std::optional<interval_set<rbm_abs_addr>> alloc_extent(
size_t size) final;
std::optional<interval_set<rbm_abs_addr>> alloc_extents(
- size_t size) final;
+ size_t size, rbm_abs_addr hint) final;
void free_extent(rbm_abs_addr addr, size_t size) final;
void mark_extent_used(rbm_abs_addr addr, size_t size) final;
}
BlockRBManager::allocate_ret_bare
-BlockRBManager::alloc_extents(size_t size)
+BlockRBManager::alloc_extents(size_t size, paddr_t hint)
{
LOG_PREFIX(BlockRBManager::alloc_extents);
assert(allocator);
- auto alloc = allocator->alloc_extents(size);
+ rbm_abs_addr rbm_hint =
+ (hint == P_ADDR_NULL ? 0 : convert_paddr_to_abs_addr(hint));
+ auto alloc = allocator->alloc_extents(size, rbm_hint);
if (!alloc) {
return {};
}
*/
paddr_t alloc_extent(size_t size) override; // allocator, return blocks
- allocate_ret_bare alloc_extents(size_t size) override; // allocator, return blocks
+ allocate_ret_bare alloc_extents(
+ size_t size, paddr_t hint) override; // allocator, return blocks
void complete_allocation(paddr_t addr, size_t size) override;
*
*/
virtual std::optional<interval_set<rbm_abs_addr>> alloc_extents(
- size_t size) = 0;
+ size_t size, rbm_abs_addr hint) = 0;
/**
* free_extent
return cache->get_next_dirty_extents(t, seq, max_bytes);
}
-TransactionManager::rewrite_extent_ret
+TransactionManager::rewrite_extent_iertr::future<
+ std::vector<CachedExtentRef>>
TransactionManager::rewrite_logical_extent(
Transaction& t,
- LogicalChildNodeRef extent)
+ LogicalChildNodeRef extent,
+ paddr_t paddr_hint)
{
LOG_PREFIX(TransactionManager::rewrite_logical_extent);
if (extent->has_been_invalidated()) {
extent->get_user_hint(),
// get target rewrite generation
extent->get_rewrite_generation(),
+ paddr_hint,
is_tracked)->cast<LogicalChildNode>();
nextent->rewrite(t, *extent, 0);
extent->get_paddr(),
*nextent
);
+ co_return std::vector<CachedExtentRef>{nextent};
} else {
assert(get_extent_category(extent->get_type()) == data_category_t::DATA);
extent->get_user_hint(),
// get target rewrite generation
extent->get_rewrite_generation(),
+ paddr_hint,
is_tracked);
extent_len_t off = 0;
auto left = extent->get_length();
off += nextent->get_length();
left -= nextent->get_length();
}
+ co_return std::move(extents);
}
}
auto fut = rewrite_extent_iertr::now();
if (extent->is_logical()) {
assert(is_logical_type(extent->get_type()));
- fut = rewrite_logical_extent(t, extent->cast<LogicalChildNode>());
+ fut = rewrite_logical_extent(
+ t, extent->cast<LogicalChildNode>(), P_ADDR_NULL
+ ).discard_result();
} else if (is_backref_node(extent->get_type())) {
fut = backref_manager->rewrite_extent(t, extent);
} else {
orig_ext->get_length(),
placement_hint_t::HOT,
INIT_GENERATION,
+ P_ADDR_NULL,
true);
t.touch_laddr_prefix(orig_ext->get_laddr().get_object_prefix());
orig_ext->get_length(),
placement_hint_t::HOT,
INIT_GENERATION,
+ P_ADDR_NULL,
true);
auto lext = promoted_extent->cast<LogicalChildNode>();
lext->set_laddr(orig_ext->get_laddr());
t, mapping, std::move(promoted_extents));
}
+TransactionManager::rewrite_extents_ret TransactionManager::rewrite_extents(
+ Transaction &t,
+ std::vector<CachedExtentRef> &extents,
+ rewrite_gen_t target_generation,
+ sea_time_point modify_time)
+{
+ LOG_PREFIX(TransactionManager::rewrite_extents);
+ return seastar::do_with(
+ P_ADDR_NULL,
+ L_ADDR_NULL,
+ [this, &t, target_generation, modify_time, &extents, FNAME]
+ (auto &paddr_hint, auto &next_laddr) {
+ return trans_intr::do_for_each(
+ extents,
+ [this, &t, target_generation, modify_time, FNAME,
+ &paddr_hint, &next_laddr](auto &extent) {
+ {
+ auto updated = cache->update_extent_from_transaction(t, extent);
+ if (!updated) {
+ DEBUGT("extent is already retired, skipping -- {}", t, *extent);
+ return rewrite_extent_iertr::now();
+ }
+ extent = updated;
+ ceph_assert(!extent->is_pending_io());
+ }
+
+ assert(extent->is_valid() && !extent->is_initial_pending());
+ if (extent->is_stable_dirty()) {
+ if (epm->can_inplace_rewrite(t, extent)) {
+ DEBUGT("delta overwriting extent -- {}", t, *extent);
+ t.add_inplace_rewrite_extent(extent);
+ extent->set_inplace_rewrite_generation();
+ return rewrite_extent_iertr::now();
+ }
+ if (extent->get_version() == 1 && extent->has_mutation()) {
+ t.get_rewrite_stats().account_n_dirty();
+ } else {
+ // extent->get_version() > 1 or DIRTY
+ t.get_rewrite_stats().account_dirty(extent->get_version());
+ }
+ extent->set_target_rewrite_generation(INIT_GENERATION);
+ } else {
+ extent->set_target_rewrite_generation(target_generation);
+ ceph_assert(modify_time != NULL_TIME);
+ extent->set_modify_time(modify_time);
+ }
+
+ if (is_backref_node(extent->get_type())) {
+ DEBUGT("rewriting backref extent -- {}", t, *extent);
+ return backref_manager->rewrite_extent(t, extent);
+ }
+
+ if (extent->get_type() == extent_types_t::ROOT) {
+ DEBUGT("rewriting root extent -- {}", t, *extent);
+ cache->duplicate_for_write(t, extent);
+ return rewrite_extent_iertr::now();
+ }
+
+ if (extent->is_logical()) {
+ auto ext = extent->template cast<LogicalChildNode>();
+ if (next_laddr != ext->get_laddr()) {
+ paddr_hint = P_ADDR_NULL;
+ }
+ next_laddr = (ext->get_laddr() + ext->get_length()).checked_to_laddr();
+ return rewrite_logical_extent(t, ext, paddr_hint
+ ).si_then([&paddr_hint](auto nlextents) {
+ for (auto &nlextent : nlextents) {
+ paddr_hint = nlextent->get_paddr() + nlextent->get_length();
+ }
+ });
+ } else {
+ DEBUGT("rewriting physical extent -- {}", t, *extent);
+ return lba_manager->rewrite_extent(t, extent);
+ }
+ });
+ });
+}
TransactionManager::demote_region_ret
TransactionManager::demote_region(
Transaction &t,
demote_region_iertr::pass_further{},
crimson::ct_error::assert_all("unexpected enoent"));
demote_region_res_t ret{0, 0, false};
+ std::vector<CachedExtentRef> extents;
while ((ret.demoted_size + ret.evicted_size) < max_proceed_size) {
if (it.is_end() || it.get_key().get_object_prefix() != prefix) {
ret.complete = true;
auto extent = co_await read_cursor_by_type(
t, it.direct_cursor, it.get_extent_type());
ret.evicted_size += extent->get_length();
- extent->set_target_rewrite_generation(epm->get_max_hot_gen() + 1);
- co_await rewrite_logical_extent(t, extent);
+ extents.push_back(extent);
it = co_await it.next();
} else {
DEBUGT("skip {}", t, it);
}
}
+ co_await rewrite_extents(
+ t, extents, epm->get_max_hot_gen() + 1,
+ seastar::lowres_system_clock::now());
+
co_return ret;
}
auto exts = cache->alloc_new_data_extents<T>(
t, len,
{
- placement_hint, INIT_GENERATION, false,
+ placement_hint, INIT_GENERATION, false, P_ADDR_NULL,
epm->get_write_policy(T::TYPE, len)
});
// user must initialize the logical extent themselves
rewrite_gen_t target_generation,
sea_time_point modify_time) final;
+ using ExtentCallbackInterface::rewrite_extents_ret;
+ rewrite_extents_ret rewrite_extents(
+ Transaction &t,
+ std::vector<CachedExtentRef> &extents,
+ rewrite_gen_t target_generation,
+ sea_time_point modify_time) final;
+
using ExtentCallbackInterface::promote_extent_ret;
promote_extent_ret promote_extent(
Transaction &t,
Transaction &t,
LBAMapping mapping);
- rewrite_extent_ret rewrite_logical_extent(
+ rewrite_extent_iertr::future<std::vector<CachedExtentRef>>
+ rewrite_logical_extent(
Transaction& t,
- LogicalChildNodeRef extent);
+ LogicalChildNodeRef extent,
+ paddr_t hint);
submit_transaction_direct_ret do_submit_transaction(
Transaction &t,
lba_btree_update([=, this](auto &btree, auto &t) {
auto extents = cache->alloc_new_data_extents<TestBlock>(
t, TestBlock::SIZE,
- {placement_hint_t::HOT, 0, false,
+ {placement_hint_t::HOT, 0, false, P_ADDR_NULL,
write_policy_t::WRITE_BACK, get_paddr()});
return seastar::do_with(
std::move(extents),
[=, this](auto &t) {
auto extents = cache->alloc_new_data_extents<TestBlock>(
t, TestBlock::SIZE,
- {placement_hint_t::HOT, 0, false,
+ {placement_hint_t::HOT, 0, false, P_ADDR_NULL,
write_policy_t::WRITE_BACK, get_paddr()});
return seastar::do_with(
std::vector<LogicalChildNodeRef>(
return allocator->alloc_extent(size);
}
auto allocates(size_t size) {
- return allocator->alloc_extents(size);
+ return allocator->alloc_extents(size, 0);
}
void free(uint64_t start, uint64_t length) {
allocator->free_extent(start, length);