return v;
}
+template <typename key_t, typename val_t>
+bool BtreeNodeMapping<key_t, val_t>::is_stable() const
+{
+ assert(parent);
+ assert(parent->is_valid());
+ assert(pos != std::numeric_limits<uint16_t>::max());
+ auto &p = (FixedKVNode<key_t>&)*parent;
+ return p.is_child_stable(pos);
+}
+
template class BtreeNodeMapping<laddr_t, paddr_t>;
template class BtreeNodeMapping<paddr_t, laddr_t>;
} // namespace crimson::os::seastore
}
get_child_ret_t<LogicalCachedExtent> get_logical_extent(Transaction&) final;
+ bool is_stable() const final;
};
}
(get_node_size() - offset - 1) * sizeof(ChildableCachedExtent*));
}
- FixedKVNode& get_stable_for_key(node_key_t key) {
+ FixedKVNode& get_stable_for_key(node_key_t key) const {
ceph_assert(is_pending());
if (is_mutation_pending()) {
return (FixedKVNode&)*get_prior_instance();
virtual get_child_ret_t<LogicalCachedExtent>
get_logical_child(op_context_t<node_key_t> c, uint16_t pos) = 0;
+ virtual bool is_child_stable(uint16_t pos) const = 0;
+
template <typename T, typename iter_t>
get_child_ret_t<T> get_child(op_context_t<node_key_t> c, iter_t iter) {
auto pos = iter.get_offset();
return get_child_ret_t<LogicalCachedExtent>(child_pos_t(nullptr, 0));
}
+ bool is_child_stable(uint16_t pos) const final {
+ ceph_abort("impossible");
+ return false;
+ }
+
bool validate_stable_children() final {
LOG_PREFIX(FixedKVInternalNode::validate_stable_children);
if (this->children.empty()) {
}
}
+ // children are considered stable if any of the following case is true:
+ // 1. Not in memory
+ // 2. being stable
+ // 3. being mutation pending and under-io
+ bool is_child_stable(uint16_t pos) const final {
+ auto child = this->children[pos];
+ if (is_valid_child_ptr(child)) {
+ ceph_assert(child->is_logical());
+ return child->is_stable() ||
+ (child->is_mutation_pending() &&
+ child->is_pending_io());
+ } else if (this->is_pending()) {
+ auto key = this->iter_idx(pos).get_key();
+ auto &sparent = this->get_stable_for_key(key);
+ auto spos = sparent.child_pos_for_key(key);
+ auto child = sparent.children[spos];
+ if (is_valid_child_ptr(child)) {
+ ceph_assert(child->is_logical());
+ return child->is_stable() ||
+ (child->is_mutation_pending() &&
+ child->is_pending_io());
+ } else {
+ return true;
+ }
+ } else {
+ return true;
+ }
+ }
+
bool validate_stable_children() override {
return true;
}
std::ostream &operator<<(std::ostream &out, const LBAMapping &rhs)
{
- return out << "LBAMapping(" << rhs.get_key() << "~" << rhs.get_length()
- << "->" << rhs.get_val();
+ out << "LBAMapping(" << rhs.get_key() << "~" << rhs.get_length()
+ << "->" << rhs.get_val();
+ if (rhs.is_indirect()) {
+ out << " indirect(" << rhs.get_intermediate_base() << "~"
+ << rhs.get_intermediate_key() << "~"
+ << rhs.get_intermediate_length() << ")";
+ }
+ out << ")";
+ return out;
}
std::ostream &operator<<(std::ostream &out, const lba_pin_list_t &rhs)
// a rewrite extent has an invalid prior_instance,
// and a mutation_pending extent has a valid prior_instance
- CachedExtentRef get_prior_instance() {
+ CachedExtentRef get_prior_instance() const {
return prior_instance;
}
child_pos->link_child(c);
}
+ virtual bool is_stable() const = 0;
+
virtual ~PhysicalNodeMapping() {}
protected:
std::optional<child_pos_t> child_pos = std::nullopt;
get_coll_context(t), cid, info.split_bits
).si_then([=, this, &t](auto result) {
assert(result == CollectionNode::create_result_t::SUCCESS);
- return tm.dec_ref(t, extent->get_laddr());
+ return tm.remove(t, extent->get_laddr());
}).si_then([] (auto) {
return create_iertr::make_ready_future<>();
});
pin->get_key(), pin->get_length(),
pin->get_raw_val().get_laddr());
auto &btree_new_pin = static_cast<BtreeLBAMapping&>(*new_pin);
- btree_new_pin.set_key_for_indirect(
+ btree_new_pin.make_indirect(
pin->get_key(),
pin->get_length(),
pin->get_raw_val().get_laddr());
c.trans, pin->get_raw_val().get_laddr()
).si_then([&pin](auto new_pin) {
ceph_assert(pin->get_length() == new_pin->get_length());
- new_pin->set_key_for_indirect(
+ new_pin->make_indirect(
pin->get_key(),
pin->get_length());
return new_pin;
extent_len_t len,
pladdr_t addr,
paddr_t actual_addr,
- laddr_t intermediate_base,
LogicalCachedExtent* nextent)
{
struct state_t {
LOG_PREFIX(BtreeLBAManager::_alloc_extent);
TRACET("{}~{}, hint={}", t, addr, len, hint);
+
+ ceph_assert(actual_addr != P_ADDR_NULL ? addr.is_laddr() : addr.is_paddr());
auto c = get_context(t);
++stats.num_alloc_extents;
auto lookup_attempts = stats.num_alloc_extents_iter_nexts;
state.ret = iter;
});
});
- }).si_then([c, actual_addr, addr, intermediate_base](auto &&state) {
- auto ret_pin = state.ret->get_pin(c);
- if (actual_addr != P_ADDR_NULL) {
- ceph_assert(addr.is_laddr());
- ret_pin->set_paddr(actual_addr);
- ret_pin->set_intermediate_base(intermediate_base);
- } else {
- ceph_assert(addr.is_paddr());
- }
- return alloc_extent_iertr::make_ready_future<LBAMappingRef>(
- std::move(ret_pin));
+ }).si_then([c](auto &&state) {
+ return alloc_extent_iertr::make_ready_future<
+ LBAMappingRef>(state.ret->get_pin(c));
});
}
return ret;
},
nextent
- ).si_then([&t, laddr, prev_addr, addr, FNAME](auto result) {
+ ).si_then([&t, laddr, prev_addr, addr, FNAME](auto p) {
+ auto &result = p.first;
DEBUGT("laddr={}, paddr {} => {} done -- {}",
t, laddr, prev_addr, addr, result);
},
return out;
},
nullptr
- ).si_then([&t, addr, delta, FNAME, this, cascade_remove](auto result) {
+ ).si_then([&t, addr, delta, FNAME, this, cascade_remove](auto p) {
+ auto &result = p.first;
+ auto &mapping = p.second;
DEBUGT("laddr={}, delta={} done -- {}", t, addr, delta, result);
auto fut = ref_iertr::make_ready_future<
std::optional<std::pair<paddr_t, extent_len_t>>>();
result.len
);
}
- return fut.si_then([result](auto removed) {
+ return fut.si_then([result, mapping=std::move(mapping)]
+ (auto removed) mutable {
if (result.pladdr.is_laddr()
&& removed) {
- return ref_update_result_t{
- result.refcount,
- removed->first,
- removed->second};
+ return std::make_pair(
+ ref_update_result_t{
+ result.refcount,
+ removed->first,
+ removed->second},
+ std::move(mapping));
} else {
- return ref_update_result_t{
- result.refcount,
- result.pladdr,
- result.len
- };
+ return std::make_pair(
+ ref_update_result_t{
+ result.refcount,
+ result.pladdr,
+ result.len},
+ std::move(mapping));
}
});
});
LogicalCachedExtent* nextent)
{
auto c = get_context(t);
- return with_btree_ret<LBABtree, lba_map_val_t>(
+ return with_btree_ret<LBABtree, _update_mapping_ret_bare>(
cache,
c,
[f=std::move(f), c, addr, nextent](auto &btree) mutable {
c,
iter
).si_then([ret] {
- return ret;
+ return std::make_pair(
+ std::move(ret), BtreeLBAMappingRef(nullptr));
});
} else {
return btree.update(
iter,
ret,
nextent
- ).si_then([ret](auto) {
- return ret;
+ ).si_then([c, ret](auto iter) {
+ return std::make_pair(
+ std::move(ret), iter.get_pin(c));
});
}
});
// 3. intermediate_base: the laddr key of the physical lba mapping, intermediate_key
// and intermediate_base should be the same when doing cloning
// 4. intermediate_offset: intermediate_key - intermediate_base
-// 5. paddr: the paddr recorded in the physical lba mapping pointed to by the
+// 5. intermediate_length: the length of the actual physical lba mapping
+// 6. paddr: the paddr recorded in the physical lba mapping pointed to by the
// indirect lba mapping being queried;
//
// NOTE THAT, for direct BtreeLBAMappings, their intermediate_keys are the same as
val.len,
meta),
key(meta.begin),
- indirect(val.pladdr.is_laddr() ? true : false),
+ indirect(val.pladdr.is_laddr()),
intermediate_key(indirect ? val.pladdr.get_laddr() : L_ADDR_NULL),
intermediate_length(indirect ? val.len : 0),
raw_val(val.pladdr),
return indirect;
}
- void set_key_for_indirect(
+ void make_indirect(
laddr_t new_key,
extent_len_t length,
laddr_t interkey = L_ADDR_NULL)
{
- turn_indirect(interkey);
+ assert(!indirect);
+ assert(value.is_paddr());
+ intermediate_base = key;
+ intermediate_key = (interkey == L_ADDR_NULL ? key : interkey);
+ indirect = true;
key = new_key;
intermediate_length = len;
len = length;
return raw_val;
}
- void set_paddr(paddr_t addr) {
- value = addr;
- }
-
laddr_t get_intermediate_key() const final {
assert(is_indirect());
assert(intermediate_key != L_ADDR_NULL);
return intermediate_length;
}
- void set_intermediate_base(laddr_t base) {
- intermediate_base = base;
- }
-
protected:
std::unique_ptr<BtreeNodeMapping<laddr_t, paddr_t>> _duplicate(
op_context_t<laddr_t> ctx) const final {
return pin;
}
private:
- void turn_indirect(laddr_t interkey) {
- assert(value.is_paddr());
- intermediate_base = key;
- intermediate_key = (interkey == L_ADDR_NULL ? key : interkey);
- indirect = true;
- }
laddr_t key = L_ADDR_NULL;
bool indirect = false;
laddr_t intermediate_key = L_ADDR_NULL;
len,
P_ADDR_ZERO,
P_ADDR_NULL,
- L_ADDR_NULL,
nullptr);
}
paddr_t actual_addr,
laddr_t intermediate_base)
{
+ assert(intermediate_key != L_ADDR_NULL);
+ assert(intermediate_base != L_ADDR_NULL);
return _alloc_extent(
t,
hint,
len,
intermediate_key,
actual_addr,
- intermediate_base,
- nullptr);
+ nullptr
+ ).si_then([&t, this, intermediate_base](auto indirect_mapping) {
+ assert(indirect_mapping->is_indirect());
+ return update_refcount(t, intermediate_base, 1, false
+ ).si_then([imapping=std::move(indirect_mapping)](auto p) mutable {
+ auto mapping = std::move(p.second);
+ ceph_assert(mapping->is_stable());
+ mapping->make_indirect(
+ imapping->get_key(),
+ imapping->get_length(),
+ imapping->get_intermediate_key());
+ return seastar::make_ready_future<
+ LBAMappingRef>(std::move(mapping));
+ });
+ }).handle_error_interruptible(
+ crimson::ct_error::input_output_error::pass_further{},
+ crimson::ct_error::assert_all{"unexpect enoent"}
+ );
}
alloc_extent_ret alloc_extent(
len,
addr,
P_ADDR_NULL,
- L_ADDR_NULL,
&ext);
}
Transaction &t,
laddr_t addr,
bool cascade_remove) final {
- return update_refcount(t, addr, -1, cascade_remove);
+ return update_refcount(t, addr, -1, cascade_remove
+ ).si_then([](auto p) {
+ return std::move(p.first);
+ });
}
ref_ret incref_extent(
Transaction &t,
laddr_t addr) final {
- return update_refcount(t, addr, 1, false);
+ return update_refcount(t, addr, 1, false
+ ).si_then([](auto p) {
+ return std::move(p.first);
+ });
}
ref_ret incref_extent(
laddr_t addr,
int delta) final {
ceph_assert(delta > 0);
- return update_refcount(t, addr, delta, false);
+ return update_refcount(t, addr, delta, false
+ ).si_then([](auto p) {
+ return std::move(p.first);
+ });
}
/**
*
* Updates refcount, returns resulting refcount
*/
- using update_refcount_ret = ref_ret;
+ using update_refcount_ret_bare = std::pair<ref_update_result_t, BtreeLBAMappingRef>;
+ using update_refcount_iertr = ref_iertr;
+ using update_refcount_ret = update_refcount_iertr::future<
+ update_refcount_ret_bare>;
update_refcount_ret update_refcount(
Transaction &t,
laddr_t addr,
* Updates mapping, removes if f returns nullopt
*/
using _update_mapping_iertr = ref_iertr;
- using _update_mapping_ret = ref_iertr::future<lba_map_val_t>;
+ using _update_mapping_ret_bare = std::pair<lba_map_val_t, BtreeLBAMappingRef>;
+ using _update_mapping_ret = ref_iertr::future<_update_mapping_ret_bare>;
using update_func_t = std::function<
lba_map_val_t(const lba_map_val_t &v)
>;
extent_len_t len,
pladdr_t addr,
paddr_t actual_addr,
- laddr_t intermediate_base,
LogicalCachedExtent*);
using _get_mapping_ret = get_mapping_iertr::future<BtreeLBAMappingRef>;
DEBUGT("decreasing ref: {}",
ctx.t,
pin->get_key());
- return ctx.tm.dec_ref(
+ return ctx.tm.remove(
ctx.t,
pin->get_key()
).si_then(
object_data.get_reserved_data_base(),
object_data.get_reserved_data_len(),
data_base);
- return ctx.tm.dec_ref(
+ return ctx.tm.remove(
ctx.t,
object_data.get_reserved_data_base()
).si_then(
omap_root.hint);
oc.t.get_omap_tree_stats().depth = omap_root.depth;
oc.t.get_omap_tree_stats().extents_num_delta--;
- return oc.tm.dec_ref(oc.t, root->get_laddr()
+ return oc.tm.remove(oc.t, root->get_laddr()
).si_then([](auto &&ret) -> handle_root_merge_ret {
return seastar::now();
}).handle_error_interruptible(
).si_then([this, &t, &omap_root](auto extent) {
return extent->clear(get_omap_context(t, omap_root.hint));
}).si_then([this, &omap_root, &t] {
- return tm.dec_ref(
+ return tm.remove(
t, omap_root.get_location()
).si_then([&omap_root] (auto ret) {
omap_root.update(
using dec_ref_ret = dec_ref_iertr::future<>;
template <typename T>
dec_ref_ret dec_ref(omap_context_t oc, T&& addr) {
- return oc.tm.dec_ref(oc.t, std::forward<T>(addr)).handle_error_interruptible(
+ return oc.tm.remove(oc.t, std::forward<T>(addr)).handle_error_interruptible(
dec_ref_iertr::pass_further{},
crimson::ct_error::assert_all{
"Invalid error in OMapInnerNode helper dec_ref"
return retire_iertr::now();
}
}
- return tm.dec_ref(t, extent).si_then([addr, len, &t] (unsigned cnt) {
+ return tm.remove(t, extent).si_then([addr, len, &t] (unsigned cnt) {
assert(cnt == 0);
SUBTRACET(seastore_onode, "retired {}B at {:#x} ...", t, len, addr);
});
});
}
-TransactionManager::ref_ret TransactionManager::dec_ref(
+TransactionManager::ref_ret TransactionManager::remove(
Transaction &t,
LogicalCachedExtentRef &ref)
{
- LOG_PREFIX(TransactionManager::dec_ref);
+ LOG_PREFIX(TransactionManager::remove);
TRACET("{}", t, *ref);
return lba_manager->decref_extent(t, ref->get_laddr(), true
).si_then([this, FNAME, &t, ref](auto result) {
});
}
-TransactionManager::refs_ret TransactionManager::dec_ref(
+TransactionManager::refs_ret TransactionManager::remove(
Transaction &t,
std::vector<laddr_t> offsets)
{
- LOG_PREFIX(TransactionManager::dec_ref);
+ LOG_PREFIX(TransactionManager::remove);
DEBUG("{} offsets", offsets.size());
return seastar::do_with(std::move(offsets), std::vector<unsigned>(),
[this, &t] (auto &&offsets, auto &refcnt) {
return trans_intr::do_for_each(offsets.begin(), offsets.end(),
[this, &t, &refcnt] (auto &laddr) {
- return this->dec_ref(t, laddr).si_then([&refcnt] (auto ref) {
+ return this->remove(t, laddr).si_then([&refcnt] (auto ref) {
refcnt.push_back(ref);
return ref_iertr::now();
});
using ref_iertr = LBAManager::ref_iertr;
using ref_ret = ref_iertr::future<unsigned>;
+#ifdef UNIT_TESTS_BUILT
/// Add refcount for ref
ref_ret inc_ref(
Transaction &t,
ref_ret inc_ref(
Transaction &t,
laddr_t offset);
+#endif
- /// Remove refcount for ref
- ref_ret dec_ref(
+ /**
+ * remove
+ *
+ * Remove the extent and the corresponding lba mapping,
+ * users must make sure that lba mapping's refcount is 1
+ */
+ ref_ret remove(
Transaction &t,
LogicalCachedExtentRef &ref);
- /// Remove refcount for offset
- ref_ret dec_ref(
+ /**
+ * remove
+ *
+ * 1. Remove the indirect mapping(s), and if refcount drops to 0,
+ * also remove the direct mapping and retire the extent.
+ *
+ * 2. Remove the direct mapping(s) and retire the extent if
+ * refcount drops to 0.
+ */
+ ref_ret remove(
Transaction &t,
laddr_t offset) {
return _dec_ref(t, offset, true);
/// remove refcount for list of offset
using refs_ret = ref_iertr::future<std::vector<unsigned>>;
- refs_ret dec_ref(
+ refs_ret remove(
Transaction &t,
std::vector<laddr_t> offsets);
mapping.is_indirect()
? mapping.get_intermediate_key()
: mapping.get_key();
- auto intermediate_base =
- mapping.is_indirect()
- ? mapping.get_intermediate_base()
- : mapping.get_key();
LOG_PREFIX(TransactionManager::clone_pin);
SUBDEBUGT(seastore_tm, "len={}, laddr_hint={}, clone_offset {}",
intermediate_key,
mapping.get_val(),
intermediate_key
- ).si_then([this, &t, intermediate_base](auto pin) {
- return inc_ref(t, intermediate_base
- ).si_then([pin=std::move(pin)](auto) mutable {
- return std::move(pin);
- }).handle_error_interruptible(
- crimson::ct_error::input_output_error::pass_further(),
- crimson::ct_error::assert_all("not possible")
- );
- });
+ );
}
/* alloc_extents
ceph_assert(test_mappings.get(offset, t.mapping_delta).refcount > 0);
auto refcnt = with_trans_intr(*(t.t), [&](auto& trans) {
- return tm->dec_ref(trans, offset);
+ return tm->remove(trans, offset);
}).unsafe_get0();
auto check_refcnt = test_mappings.dec_ref(offset, t.mapping_delta);
EXPECT_EQ(refcnt, check_refcnt);