if (i->is_exist_clean()) {
assert(i->version == 0);
- assert(!i->prior_instance);
+ assert(!i->prior_instance || t.get_src() == transaction_type_t::DEMOTE);
// no set_io_wait(), skip complete_commit()
assert(!i->is_pending_io());
i->pending_for_transaction = TRANS_ID_NULL;
should_use_no_conflict_publish(t.get_src(), i->get_type()));
}
- // exist mutation pending extents must be in t.mutated_block_list
- add_extent(i);
- const auto t_src = t.get_src();
- if (i->is_stable_dirty()) {
- add_to_dirty(i, &t_src);
+ assert(i->is_logical());
+ if (t.get_src() == transaction_type_t::DEMOTE) {
+ assert(!i->committer);
+ assert(!i->get_prior_instance()->committer);
+ i->new_committer(t);
+ assert(i->committer);
+ i->get_prior_instance()->committer = i->committer;
+ auto &committer = *i->committer;
+ committer.block_trans(t);
+ i->get_prior_instance()->set_io_wait(
+ CachedExtent::extent_state_t::CLEAN, true);
} else {
- touch_extent_fully(*i, &t_src, t.get_cache_hint());
+ // exist mutation pending extents must be in t.mutated_block_list
+ add_extent(i);
+ const auto t_src = t.get_src();
+ if (i->is_stable_dirty()) {
+ add_to_dirty(i, &t_src);
+ } else {
+ touch_extent_fully(*i, &t_src, t.get_cache_hint());
+ }
}
alloc_delta.alloc_blk_ranges.emplace_back(
if (is_lba_backref_node(i->get_type())) {
committer.commit_data();
}
+ if (i->is_logical() &&
+ t.get_src() == transaction_type_t::PROMOTE) {
+ committer.commit_shadow_promote(t);
+ }
touch_extent_fully(prior, &t_src, t.get_cache_hint());
committer.sync_version();
committer.unblock_trans(t);
continue;
}
epm.mark_space_used(i->get_paddr(), i->get_length());
+ assert(i->is_logical());
+ auto t_src = t.get_src();
+ if (t.get_src() == transaction_type_t::DEMOTE) {
+ assert(i->committer);
+ auto &committer = *i->committer;
+ auto &prior = static_cast<LogicalChildNode&>(
+ *i->get_prior_instance());
+ ceph_assert(prior.is_valid());
+ TRACET("committing rewritten extent into "
+ "existing -- {}, prior={}",
+ t, *i, prior);
+ prior.pending_for_transaction = TRANS_ID_NULL;
+ if (auto shadow = prior.get_shadow(); shadow) {
+ committer.commit_shadow_demote(t);
+ prior.reset_shadow();
+ }
+ committer.commit_state();
+ committer.sync_checksum();
+ committer.commit_and_share_paddr();
+ touch_extent_fully(prior, &t_src, t.get_cache_hint());
+ committer.sync_version();
+ committer.unblock_trans(t);
+ prior.complete_io();
+ i->committer.reset();
+ prior.committer.reset();
+ }
}
for (auto &i: t.pre_alloc_list) {
if (!i->is_valid()) {
read_extent_futs, [](auto &fut) { return std::move(fut); });
}
+ bool is_on_cold_tier(paddr_t paddr) const {
+ return epm.is_cold_device(paddr.get_device_id());
+ }
private:
void touch_extent_fully(
CachedExtent &ext,
}
}
+void ExtentCommitter::commit_shadow_demote(Transaction &t) {
+ LOG_PREFIX(ExtentCommitter::commit_shadow_demote);
+ assert(t.get_src() == transaction_type_t::DEMOTE);
+ auto &prior = *extent.prior_instance->template cast<LogicalChildNode>();
+ auto shadow = prior.get_shadow();
+ assert(shadow);
+ for (auto &trans_view : prior.retired_transactions) {
+ assert(trans_view.t != nullptr);
+ auto view_tid = trans_view.t->get_trans_id();
+ if (view_tid == t.get_trans_id()) {
+ continue;
+ }
+ TRACET("removing shadow {} from retired_set of t.{}", t, *shadow, view_tid);
+ [[maybe_unused]] bool removed =
+ trans_view.t->remove_from_retired_set(*shadow);
+ assert(removed);
+ }
+}
+
+void ExtentCommitter::commit_shadow_promote(Transaction &t) {
+ LOG_PREFIX(ExtentCommitter::commit_shadow_promote);
+ assert(t.get_src() == transaction_type_t::PROMOTE);
+ assert(extent.is_logical());
+ auto &lprior = static_cast<LogicalChildNode&>(*extent.prior_instance);
+ auto &lext = static_cast<LogicalChildNode&>(extent);
+ auto shadow = lext.get_shadow();
+ assert(shadow);
+ lprior.set_shadow(shadow);
+ for (auto &trans_view : lprior.retired_transactions) {
+ assert(trans_view.t != nullptr);
+ auto view_tid = trans_view.t->get_trans_id();
+ if (view_tid == t.get_trans_id()) {
+ continue;
+ }
+ TRACET("adding shadow {} from t.{}", t, *shadow, view_tid);
+ trans_view.t->add_absent_to_retired_set(shadow);
+ }
+}
+
}
void commit_and_share_paddr();
+ void commit_shadow_demote(Transaction&);
+ void commit_shadow_promote(Transaction&);
private:
// the rewritten extent
CachedExtent &extent;
return this->_update_mapping(
c.trans,
*cursor,
- [prev_addr, addr, len, checksum](
+ [prev_addr, addr, len, checksum, extent, c](
const lba_map_val_t &in) {
lba_map_val_t ret = in;
ceph_assert(in.pladdr.is_paddr());
- ceph_assert(in.pladdr.get_paddr() == prev_addr);
ceph_assert(in.len == len);
- ret.pladdr = addr;
+ if (likely(in.pladdr.get_paddr() == prev_addr)) {
+ ret.pladdr = addr;
+ } else {
+ // this can only happen when the extent is EXIST_CLEAN
+ // and is demoted onto the cold tier by a DEMOTE trans.
+ assert(in.shadow_paddr == P_ADDR_NULL);
+ assert(extent->is_exist_clean());
+ assert(extent->get_paddr() == in.pladdr.get_paddr());
+ assert(c.cache.is_on_cold_tier(extent->get_paddr()));
+ assert(!c.cache.is_on_cold_tier(prev_addr));
+ }
ret.checksum = checksum;
return ret;
},
iterator &iter)
{
LOG_PREFIX(LBALeafNode::merge_content_to);
+ SUBTRACET(seastore_lba, "merging with {}", t, pending_version);
std::map<laddr_t, pladdr_t> modified;
auto it = pending_version.begin();
while (it != pending_version.end() && iter != this->end()) {
ceph_abort();
}
if (is_valid_child_ptr(child) &&
- (child->_is_mutable() || child->_is_pending_io())) {
- // skip the ones that the pending version is also modifying
+ (// skip the ones that the pending version is also modifying
+ (child->_is_mutable() || child->_is_pending_io()) ||
+ // EXIST_CLEAN extents created by DEMOTE transactions also
+ // updates their paddrs, so they should also be skpped.
+ (pending_version.t->get_src() == transaction_type_t::DEMOTE))) {
+ SUBTRACET(seastore_lba, "skipping {}~{}", t, it->get_key(), it->get_val());
it++;
continue;
}
+ SUBTRACET(seastore_lba, "examing v2: {}~{}, v1: {}~{}",
+ t, it->get_key(), it->get_val(), iter->get_key(), iter->get_val());
auto pending_key = it->get_key();
auto stable_key = iter->get_key();
auto stable_end = stable_key + v1.len;
auto paddr = v1.pladdr.get_paddr();
paddr = paddr + off;
m_v2.pladdr = paddr;
+ if (v1.shadow_paddr == P_ADDR_NULL) {
+ m_v2.shadow_paddr = P_ADDR_NULL;
+ } else {
+ m_v2.shadow_paddr = (v1.shadow_paddr + off);
+ }
SUBTRACET(seastore_lba, "merging to {}, paddr: {} -> {}",
t, pending_version, m_v2.pladdr, paddr);
if (!is_valid_child_ptr(child) ||
laddr_t get_end() const {
return (get_laddr() + get_length()).checked_to_laddr();
}
+
+ TCachedExtentRef<LogicalChildNode> get_shadow() const {
+ return shadow;
+ }
+
+ void set_shadow(TCachedExtentRef<LogicalChildNode> &s) {
+ assert(!shadow);
+ shadow = s;
+ }
+
+ void reset_shadow() {
+ shadow.reset();
+ }
+
protected:
void on_replace_prior(Transaction &t) final {
assert(is_seen_by_users());
void on_data_commit() final {
ceph_abort("impossible");
}
+private:
+ TCachedExtentRef<LogicalChildNode> shadow;
};
using LogicalChildNodeRef = TCachedExtentRef<LogicalChildNode>;
} // namespace crimson::os::seastore
constexpr bool is_rewrite_transaction(transaction_type_t type) {
return type == transaction_type_t::TRIM_DIRTY ||
type == transaction_type_t::CLEANER_MAIN ||
- type == transaction_type_t::CLEANER_COLD;
+ type == transaction_type_t::CLEANER_COLD ||
+ type == transaction_type_t::DEMOTE ||
+ type == transaction_type_t::PROMOTE;
}
constexpr bool is_trim_transaction(transaction_type_t type) {
}
}
+ bool remove_from_retired_set(CachedExtent &ext) {
+ auto it = retired_set.find(ext.get_paddr());
+ if (it == retired_set.end()) {
+ return false;
+ }
+ auto &extent = it->extent;
+ if (extent->get_paddr() != ext.get_paddr()) {
+ return false;
+ } else {
+ assert(ext.get_length() == extent->get_length());
+ retired_set.erase(it);
+ return true;
+ }
+ }
+
std::pair<bool, bool> pre_stable_extent_paddr_mod(
read_set_item_t<Transaction> &item)
{
auto laddr = ref->get_laddr();
cache->retire_absent_extent_addr_by_type(
t, laddr, shadow_addr, length, ref->get_type(),
- [laddr](auto &extent) {
+ [ref, laddr](auto &extent) {
auto lextent = extent.template cast<LogicalChildNode>();
assert(extent.is_logical());
assert(!lextent->has_laddr());
assert(!extent.has_been_invalidated());
lextent->set_laddr(laddr);
extent.set_shadow_extent(true);
+ ref->set_shadow(lextent);
});
}
}
LogicalChildNode
>();
ceph_assert(extent);
- cache->retire_extent(t, std::move(extent));
+ cache->retire_extent(t, extent);
+ if (mapping.has_shadow_val()) {
+ if (auto shadow = extent->get_shadow(); shadow) {
+ t.add_absent_to_retired_set(shadow);
+ } else {
+ auto laddr = mapping.get_intermediate_base();
+ std::ignore = cache->retire_absent_extent_addr_by_type(
+ t, laddr,
+ mapping.get_shadow_val(),
+ mapping.get_intermediate_length(),
+ mapping.get_extent_type(),
+ [extent, laddr](auto &ext) {
+ auto lextent = ext.template cast<LogicalChildNode>();
+ assert(ext.is_logical());
+ assert(!lextent->has_laddr());
+ assert(!ext.has_been_invalidated());
+ lextent->set_laddr(laddr);
+ ext.set_shadow_extent(true);
+ extent->set_shadow(lextent);
+ });
+ }
+ }
} else {
auto &child_pos = maybe_mapped_extent.get_child_pos();
auto laddr = mapping.get_intermediate_base();
- std::ignore = cache->retire_absent_extent_addr_by_type(
+ auto ext = cache->retire_absent_extent_addr_by_type(
t, laddr,
mapping.get_val(),
mapping.get_intermediate_length(),
child_pos.link_child(lextent.get());
lextent->set_laddr(laddr);
}
- );
- }
- if (mapping.has_shadow_val()) {
- cache->retire_absent_extent_addr(
- t, mapping.get_intermediate_base(),
- mapping.get_shadow_val(),
- mapping.get_intermediate_length());
+ )->template cast<LogicalChildNode>();
+ if (mapping.has_shadow_val()) {
+ std::ignore = cache->retire_absent_extent_addr_by_type(
+ t, mapping.get_intermediate_base(),
+ mapping.get_shadow_val(),
+ mapping.get_intermediate_length(),
+ mapping.get_extent_type(),
+ [laddr, ext](auto &extent) {
+ auto lextent = extent.template cast<LogicalChildNode>();
+ assert(extent.is_logical());
+ assert(!lextent->has_laddr());
+ assert(!extent.has_been_invalidated());
+ lextent->set_laddr(laddr);
+ extent.set_shadow_extent(true);
+ ext->set_shadow(lextent);
+ });
+ }
}
}
assert(mapping.has_shadow_val());
assert(!mapping.is_zero_reserved());
assert(mapping.is_viewable());
+ assert(t.get_src() == transaction_type_t::DEMOTE);
auto v = mapping.get_logical_extent(t);
- CachedExtentRef extent;
+ LogicalChildNodeRef extent;
auto laddr = mapping.get_key();
if (!v.has_child()) {
auto &child_pos = v.get_child_pos();
child_pos.link_child(lextent.get());
lextent->set_laddr(laddr);
}
- );
+ )->template cast<LogicalChildNode>();
} else {
- auto extent = co_await std::move(v.get_child_fut());
+ extent = co_await std::move(v.get_child_fut());
cache->retire_extent(t, extent);
}
- auto shadow_paddr = mapping.get_shadow_val();
- std::ignore = cache->retire_absent_extent_addr_by_type(
- t, laddr, shadow_paddr, mapping.get_length(), mapping.get_extent_type(),
- [laddr](auto &ext) {
- auto lextent = ext.template cast<LogicalChildNode>();
- assert(ext.is_logical());
- assert(!lextent->has_laddr());
- assert(!ext.has_been_invalidated());
- lextent->set_laddr(laddr);
- }
- );
- co_return cache->alloc_remapped_extent_by_type(
+ if (auto shadow = extent->get_shadow(); shadow) {
+ t.add_absent_to_retired_set(shadow);
+ } else {
+ auto shadow_paddr = mapping.get_shadow_val();
+ std::ignore = cache->retire_absent_extent_addr_by_type(
+ t, laddr, shadow_paddr, mapping.get_length(), mapping.get_extent_type(),
+ [laddr, extent](auto &ext) {
+ auto lextent = ext.template cast<LogicalChildNode>();
+ assert(ext.is_logical());
+ assert(!lextent->has_laddr());
+ assert(!ext.has_been_invalidated());
+ lextent->set_laddr(laddr);
+ ext.set_shadow_extent(true);
+ extent->set_shadow(lextent);
+ }
+ );
+ }
+ auto nextent = cache->alloc_remapped_extent_by_type(
t, mapping.get_extent_type(), laddr,
mapping.get_shadow_val(), 0, mapping.get_length(), std::nullopt
)->cast<LogicalChildNode>();
+ nextent->set_prior_instance(extent);
+ co_return nextent;
}
TransactionManager::submit_transaction_iertr::future<>
slice_length,
std::nullopt);
remapped_cold_extent->set_shadow_extent(true);
+ auto lremapped = remapped_cold_extent->template cast<LogicalChildNode>();
+ lext->set_shadow(lremapped);
offset += slice_length;
}
orig_ext->get_length(),
std::nullopt);
remapped_cold_extent->set_shadow_extent(true);
-
- remapped_cold_extent->set_shadow_extent(true);
+ auto lremapped = remapped_cold_extent->template cast<LogicalChildNode>();
+ lext->set_shadow(lremapped);
}
auto cursor = co_await lba_manager->get_cursor(
SUBTRACET(seastore_tm, "retire extent place holder...", t);
auto &child_pos = ret.get_child_pos();
auto laddr = pin.get_key();
- std::ignore = cache->retire_absent_extent_addr_by_type(
+ auto ext = cache->retire_absent_extent_addr_by_type(
t, laddr, original_paddr, original_len, pin.get_extent_type(),
[&child_pos, laddr](auto &extent) mutable {
auto lextent = extent.template cast<LogicalChildNode>();
child_pos.link_child(lextent.get());
lextent->set_laddr(laddr);
}
- );
+ )->template cast<LogicalChildNode>();
if (pin.has_shadow_val()) {
cache->retire_absent_extent_addr_by_type(
t, pin.get_key(), pin.get_shadow_val(),
original_len, pin.get_extent_type(),
- [laddr](auto &extent) {
+ [laddr, ext](auto &extent) {
auto lextent = extent.template cast<LogicalChildNode>();
assert(extent.is_logical());
assert(!lextent->has_laddr());
assert(!extent.has_been_invalidated());
lextent->set_laddr(laddr);
+ ext->set_shadow(lextent);
}
);
}