if (i->is_pending())
return i;
+ if (i->is_exist_clean()) {
+ i->version++;
+ i->state = CachedExtent::extent_state_t::EXIST_MUTATION_PENDING;
+ i->last_committed_crc = i->get_crc32c();
+ t.add_mutated_extent(i);
+ DEBUGT("duplicate existing extent {}", t, *i);
+ return i;
+ }
+
auto ret = i->duplicate_for_write();
ret->prior_instance = i;
t.add_mutated_extent(ret);
DEBUGT("invalid mutated extent -- {}", t, *i);
continue;
}
- assert(i->prior_instance);
+ assert(i->is_exist_mutation_pending() ||
+ i->prior_instance);
get_by_ext(efforts.mutate_by_ext,
i->get_type()).increment(i->get_length());
auto delta_bl = i->get_delta();
auto delta_length = delta_bl.length();
- DEBUGT("mutated extent with {}B delta, commit replace extent ... -- {}, prior={}",
- t, delta_length, *i, *i->prior_instance);
i->set_modify_time(commit_time);
- commit_replace_extent(t, i, i->prior_instance);
+ DEBUGT("mutated extent with {}B delta -- {}",
+ t, delta_length, *i);
+ if (!i->is_exist_mutation_pending()) {
+ DEBUGT("commit replace extent ... -- {}, prior={}",
+ t, *i, *i->prior_instance);
+ // extent with EXIST_MUTATION_PENDING doesn't have
+ // prior_instance field so skip these extents.
+ // the existing extents should be added into Cache
+ // during complete_commit to sync with gc transaction.
+ commit_replace_extent(t, i, i->prior_instance);
+ }
i->prepare_write();
i->set_io_wait();
i->get_type());
}
}
+
+ for (auto &i: t.existing_block_list) {
+ if (i->is_valid()) {
+ alloc_delta.alloc_blk_ranges.emplace_back(
+ i->get_paddr(),
+ i->cast<LogicalCachedExtent>()->get_laddr(),
+ i->get_length(),
+ i->get_type());
+ }
+ }
alloc_deltas.emplace_back(std::move(alloc_delta));
for (auto b : alloc_deltas) {
i->get_length());
}
if (is_backref_mapped_extent_node(i)) {
+ DEBUGT("backref_list new {} len {}",
+ t,
+ i->get_paddr(),
+ i->get_length());
backref_list.emplace_back(
std::make_unique<backref_buf_entry_t>(
i->get_paddr(),
if (!i->is_valid()) {
continue;
}
- assert(i->prior_instance);
+ assert(i->is_exist_mutation_pending() ||
+ i->prior_instance);
i->on_delta_write(final_block_start);
i->prior_instance = CachedExtentRef();
i->state = CachedExtent::extent_state_t::DIRTY;
i->get_paddr(),
i->get_length());
}
+ for (auto &i: t.existing_block_list) {
+ if (i->is_valid()) {
+ cleaner->mark_space_used(
+ i->get_paddr(),
+ i->get_length());
+ }
+ }
}
for (auto &i: t.mutated_block_list) {
i->dirty_from_or_retired_at = last_commit;
if (is_backref_mapped_extent_node(i)
|| is_retired_placeholder(i->get_type())) {
+ DEBUGT("backref_list free {} len {} should release {}",
+ t,
+ i->get_paddr(),
+ i->get_length(),
+ t.should_record_release(i->get_paddr()));
if (t.should_record_release(i->get_paddr())) {
backref_list.emplace_back(
std::make_unique<backref_buf_entry_t>(
ceph_abort("not possible");
}
}
+
+ auto existing_stats = t.get_existing_block_stats();
+ DEBUGT("total existing blocks num: {}, exist clean num: {}, "
+ "exist mutation pending num: {}",
+ t,
+ existing_stats.valid_num,
+ existing_stats.clean_num,
+ existing_stats.mutated_num);
+ for (auto &i: t.existing_block_list) {
+ if (i->is_valid()) {
+ if (i->is_exist_clean()) {
+ i->state = CachedExtent::extent_state_t::CLEAN;
+ } else {
+ assert(i->state == CachedExtent::extent_state_t::DIRTY);
+ }
+ DEBUGT("backref_list new existing {} len {}",
+ t,
+ i->get_paddr(),
+ i->get_length());
+ backref_list.emplace_back(
+ std::make_unique<backref_buf_entry_t>(
+ i->get_paddr(),
+ i->cast<LogicalCachedExtent>()->get_laddr(),
+ i->get_length(),
+ i->get_type(),
+ seq));
+ add_extent(i);
+ }
+ }
if (!backref_list.empty())
backref_batch_update(std::move(backref_list), seq);
}
return !is_zero() && !is_null();
}
+ bool is_absolute() const {
+ return get_device_id() <= DEVICE_ID_MAX_VALID;
+ }
+
DENC(paddr_t, v, p) {
DENC_START(1, 1, p);
denc(v.dev_addr, p);
};
get_extent_ret get_extent(paddr_t addr, CachedExtentRef *out) {
LOG_PREFIX(Transaction::get_extent);
- if (retired_set.count(addr)) {
- return get_extent_ret::RETIRED;
- } else if (auto iter = write_set.find_offset(addr);
+ // it's possible that both write_set and retired_set contain
+ // this addr at the same time when addr is absolute and the
+ // corresponding extent is used to map existing extent on disk.
+ // So search write_set first.
+ if (auto iter = write_set.find_offset(addr);
iter != write_set.end()) {
if (out)
*out = CachedExtentRef(&*iter);
SUBTRACET(seastore_cache, "{} is present in write_set -- {}",
*this, addr, *iter);
+ assert((*out)->is_valid());
return get_extent_ret::PRESENT;
+ } else if (retired_set.count(addr)) {
+ return get_extent_ret::RETIRED;
} else if (
auto iter = read_set.find(addr);
iter != read_set.end()) {
void add_to_retired_set(CachedExtentRef ref) {
ceph_assert(!is_weak());
- if (ref->is_initial_pending()) {
+ if (ref->is_exist_clean() ||
+ ref->is_exist_mutation_pending()) {
+ existing_block_stats.dec(ref);
+ ref->state = CachedExtent::extent_state_t::INVALID;
+ write_set.erase(*ref);
+ } else if (ref->is_initial_pending()) {
ref->state = CachedExtent::extent_state_t::INVALID;
write_set.erase(*ref);
} else if (ref->is_mutation_pending()) {
void add_fresh_extent(
CachedExtentRef ref) {
ceph_assert(!is_weak());
- if (ref->get_paddr().is_delayed()) {
+ if (ref->is_exist_clean()) {
+ existing_block_stats.inc(ref);
+ existing_block_list.push_back(ref);
+ } else if (ref->get_paddr().is_delayed()) {
assert(ref->get_paddr() == make_delayed_temp_paddr(0));
assert(ref->is_logical());
ref->set_paddr(make_delayed_temp_paddr(delayed_temp_offset));
delayed_temp_offset += ref->get_length();
delayed_alloc_list.emplace_back(ref->cast<LogicalCachedExtent>());
+ fresh_block_stats.increment(ref->get_length());
} else {
assert(ref->get_paddr() == make_record_relative_paddr(0));
ref->set_paddr(make_record_relative_paddr(offset));
offset += ref->get_length();
inline_block_list.push_back(ref);
+ fresh_block_stats.increment(ref->get_length());
}
- fresh_block_stats.increment(ref->get_length());
write_set.insert(*ref);
if (is_backref_node(ref->get_type()))
fresh_backref_extents++;
void add_mutated_extent(CachedExtentRef ref) {
ceph_assert(!is_weak());
- assert(read_set.count(ref->prior_instance->get_paddr()));
+ assert(ref->is_exist_mutation_pending() ||
+ read_set.count(ref->prior_instance->get_paddr()));
mutated_block_list.push_back(ref);
- write_set.insert(*ref);
+ if (!ref->is_exist_mutation_pending()) {
+ write_set.insert(*ref);
+ } else {
+ assert(write_set.find_offset(ref->get_paddr()) !=
+ write_set.end());
+ }
}
void replace_placeholder(CachedExtent& placeholder, CachedExtent& extent) {
return mutated_block_list;
}
+ const auto &get_existing_block_list() {
+ return existing_block_list;
+ }
+
const auto &get_retired_set() {
return retired_set;
}
+ bool is_retired(laddr_t laddr, extent_len_t len, paddr_t paddr) {
+ if (retired_set.empty()) {
+ return false;
+ }
+ auto iter = retired_set.lower_bound(paddr);
+ if (iter == retired_set.end() ||
+ (*iter)->get_paddr() > paddr) {
+ assert(iter != retired_set.begin());
+ --iter;
+ }
+
+ auto lextent = (*iter)->cast<LogicalCachedExtent>();
+ auto ext_laddr = lextent->get_laddr();
+ return ext_laddr <= laddr &&
+ ext_laddr + lextent->get_length() >= laddr + len;
+ }
+
bool should_record_release(paddr_t addr) {
auto count = no_release_delta_retired_set.count(addr);
#ifndef NDEBUG
ool_block_list.clear();
retired_set.clear();
no_release_delta_retired_set.clear();
+ existing_block_list.clear();
+ existing_block_stats = {};
onode_tree_stats = {};
omap_tree_stats = {};
lba_tree_stats = {};
return rewrite_version_stats;
}
+ struct existing_block_stats_t {
+ uint64_t valid_num = 0;
+ uint64_t clean_num = 0;
+ uint64_t mutated_num = 0;
+ void inc(const CachedExtentRef &ref) {
+ valid_num++;
+ if (ref->is_exist_clean()) {
+ clean_num++;
+ } else {
+ mutated_num++;
+ }
+ }
+ void dec(const CachedExtentRef &ref) {
+ valid_num--;
+ if (ref->is_exist_clean()) {
+ clean_num--;
+ } else {
+ mutated_num--;
+ }
+ }
+ };
+ existing_block_stats_t& get_existing_block_stats() {
+ return existing_block_stats;
+ }
+
private:
friend class Cache;
friend Ref make_test_transaction();
/// list of mutated blocks, holds refcounts, subset of write_set
std::list<CachedExtentRef> mutated_block_list;
+ /// partial blocks of extents on disk, with data and refcounts
+ std::list<CachedExtentRef> existing_block_list;
+ existing_block_stats_t existing_block_stats;
+
/**
* retire_set
*
// ...but add_pin from parent->leaf
std::vector<CachedExtentRef> lba_to_link;
std::vector<CachedExtentRef> backref_to_link;
- lba_to_link.reserve(tref.get_fresh_block_stats().num);
+ lba_to_link.reserve(tref.get_fresh_block_stats().num +
+ tref.get_existing_block_stats().valid_num);
backref_to_link.reserve(tref.get_fresh_block_stats().num);
tref.for_each_fresh_block([&](auto &e) {
if (e->is_valid()) {
}
});
+ for (auto &e: tref.get_existing_block_list()) {
+ if (e->is_valid()) {
+ lba_to_link.push_back(e);
+ }
+ }
+
lba_manager->complete_transaction(tref, lba_to_clear, lba_to_link);
backref_manager->complete_transaction(tref, backref_to_clear, backref_to_link);
});
}
+ /**
+ * map_existing_extent
+ *
+ * Allocates a new extent at given existing_paddr that must be absolute and
+ * reads disk to fill the extent.
+ * The common usage is that remove the LogicalCachedExtent (laddr~length at paddr)
+ * and map extent to multiple new extents.
+ * placement_hint and generation should follow the original extent.
+ */
+ using map_existing_extent_iertr =
+ alloc_extent_iertr::extend_ertr<Device::read_ertr>;
+ template <typename T>
+ using map_existing_extent_ret =
+ map_existing_extent_iertr::future<TCachedExtentRef<T>>;
+ template <typename T>
+ map_existing_extent_ret<T> map_existing_extent(
+ Transaction &t,
+ laddr_t laddr_hint,
+ paddr_t existing_paddr,
+ extent_len_t length,
+ placement_hint_t placement_hint = placement_hint_t::HOT,
+ reclaim_gen_t gen = DIRTY_GENERATION) {
+ LOG_PREFIX(TransactionManager::map_existing_extent);
+ ceph_assert(existing_paddr.is_absolute());
+ assert(t.is_retired(laddr_hint, length, existing_paddr));
+
+ auto bp = ceph::bufferptr(buffer::create_page_aligned(length));
+ bp.zero();
+
+ // ExtentPlacementManager::alloc_new_extent will make a new
+ // (relative/temp) paddr, so make extent directly
+ auto ext = CachedExtent::make_cached_extent_ref<T>(std::move(bp));
+
+ ext->init(CachedExtent::extent_state_t::EXIST_CLEAN,
+ existing_paddr,
+ placement_hint,
+ gen);
+
+ t.add_fresh_extent(ext);
+
+ return lba_manager->alloc_extent(
+ t,
+ laddr_hint,
+ length,
+ existing_paddr
+ ).si_then([ext=std::move(ext), laddr_hint, &t, this, FNAME](auto &&ref) {
+ SUBDEBUGT(seastore_tm, "map existing extent: {}, laddr_hint: {} pin: {}",
+ t, *ext, laddr_hint, *ref);
+ ceph_assert(laddr_hint == ref->get_key());
+ ext->set_pin(std::move(ref));
+ return epm->read(
+ ext->get_paddr(),
+ ext->get_length(),
+ ext->get_bptr()
+ ).safe_then([ext=std::move(ext)] {
+ return map_existing_extent_iertr::make_ready_future<TCachedExtentRef<T>>
+ (std::move(ext));
+ });
+ });
+ }
+
+
using reserve_extent_iertr = alloc_extent_iertr;
using reserve_extent_ret = reserve_extent_iertr::future<LBAPinRef>;
reserve_extent_ret reserve_region(