std::vector<CachedExtentRef> &extents)
{
return seastar::do_with(
- JOURNAL_SEQ_NULL,
std::move(backrefs),
- [this, &t, &extents](auto &seq, auto &backrefs) {
+ [this, &t, &extents](auto &backrefs) {
return trans_intr::parallel_for_each(
backrefs,
- [this, &extents, &t, &seq](auto &ent) {
+ [this, &extents, &t](auto &ent) {
LOG_PREFIX(AsyncCleaner::_retrieve_live_extents);
DEBUGT("getting extent of type {} at {}~{}",
t,
ent.len);
return ecb->get_extents_if_live(
t, ent.type, ent.paddr, ent.laddr, ent.len
- ).si_then([this, FNAME, &extents, &ent, &seq, &t](auto list) {
+ ).si_then([&extents, &ent, &t](auto list) {
+ LOG_PREFIX(AsyncCleaner::_retrieve_live_extents);
if (list.empty()) {
DEBUGT("addr {} dead, skipping", t, ent.paddr);
- auto backref = backref_manager.get_cached_backref_removal(ent.paddr);
- if (seq == JOURNAL_SEQ_NULL || seq < backref.seq) {
- seq = backref.seq;
- }
} else {
for (auto &e : list) {
extents.emplace_back(std::move(e));
}
return ExtentCallbackInterface::rewrite_extent_iertr::now();
});
- }).si_then([&seq] {
- return retrieve_live_extents_iertr::make_ready_future<
- journal_seq_t>(std::move(seq));
});
});
}
reclaimed = 0;
runs++;
return seastar::do_with(
- backref_manager.get_cached_backref_extents_in_range(
- reclaim_state->start_pos, reclaim_state->end_pos),
- backref_manager.get_cached_backrefs_in_range(
- reclaim_state->start_pos, reclaim_state->end_pos),
- backref_manager.get_cached_backref_removals_in_range(
- reclaim_state->start_pos, reclaim_state->end_pos),
JOURNAL_SEQ_NULL,
[this, &reclaimed, &pin_list](
- auto &backref_extents,
- auto &backrefs,
- auto &del_backrefs,
auto &seq) {
return ecb->with_transaction_intr(
Transaction::src_t::CLEANER_RECLAIM,
"reclaim_space",
- [this, &backref_extents, &backrefs, &seq,
- &del_backrefs, &reclaimed, &pin_list](auto &t) {
- LOG_PREFIX(AsyncCleaner::gc_reclaim_space);
- DEBUGT("{} backrefs, {} del_backrefs, {} pins", t,
- backrefs.size(), del_backrefs.size(), pin_list.size());
- for (auto &br : backrefs) {
- if (seq == JOURNAL_SEQ_NULL
- || (br.seq != JOURNAL_SEQ_NULL && br.seq > seq))
- seq = br.seq;
- }
- for (auto &pin : pin_list) {
- backrefs.emplace(
- pin->get_key(),
- pin->get_val(),
- pin->get_length(),
- pin->get_type(),
- journal_seq_t());
- }
- for (auto &del_backref : del_backrefs) {
- DEBUGT("del_backref {}~{} {} {}", t,
- del_backref.paddr, del_backref.len, del_backref.type, del_backref.seq);
- auto it = backrefs.find(del_backref.paddr);
- if (it != backrefs.end() &&
- it->len == del_backref.len)
- backrefs.erase(it);
- if (seq == JOURNAL_SEQ_NULL
- || (del_backref.seq != JOURNAL_SEQ_NULL && del_backref.seq > seq))
- seq = del_backref.seq;
- }
+ [this, &seq, &reclaimed, &pin_list](auto &t) {
return seastar::do_with(
std::vector<CachedExtentRef>(),
- [this, &backref_extents, &backrefs, &reclaimed, &t, &seq]
+ [this, &reclaimed, &t, &seq, &pin_list]
(auto &extents) {
return backref_manager.retrieve_backref_extents(
- t, std::move(backref_extents), extents
- ).si_then([this, &extents, &t, &backrefs] {
+ t,
+ backref_manager.get_cached_backref_extents_in_range(
+ reclaim_state->start_pos, reclaim_state->end_pos),
+ extents
+ ).si_then([this, &extents, &t, &pin_list] {
+ // calculate live extents
+ auto backref_set =
+ backref_manager.get_cached_backrefs_in_range(
+ reclaim_state->start_pos, reclaim_state->end_pos);
+ std::set<
+ backref_buf_entry_t,
+ backref_buf_entry_t::cmp_t> backrefs;
+ for (auto &pin : pin_list) {
+ backrefs.emplace(pin->get_key(), pin->get_val(),
+ pin->get_length(), pin->get_type(), journal_seq_t());
+ }
+ for (auto &backref : backref_set) {
+ if (backref.laddr == L_ADDR_NULL) {
+ auto it = backrefs.find(backref.paddr);
+ assert(it->len == backref.len);
+ backrefs.erase(it);
+ } else {
+ backrefs.emplace(backref.paddr, backref.laddr,
+ backref.len, backref.type, backref.seq);
+ }
+ }
+ // retrieve live extents
return _retrieve_live_extents(
t, std::move(backrefs), extents);
- }).si_then([this, &seq, &t](auto nseq) {
- if (nseq != JOURNAL_SEQ_NULL &&
- (nseq > seq || seq == JOURNAL_SEQ_NULL))
- seq = nseq;
+ }).si_then([this, &t, &seq] {
+ // we need to get the backref_set in range again, because
+ // it can change during live extents retrieval
+ auto backref_set =
+ backref_manager.get_cached_backrefs_in_range(
+ reclaim_state->start_pos, reclaim_state->end_pos);
+ // calculate the journal seq up to which the backref merge
+ // should run
+ for (auto &backref : backref_set) {
+ if (backref.seq != JOURNAL_SEQ_NULL &&
+ (backref.seq > seq || seq == JOURNAL_SEQ_NULL)) {
+ seq = backref.seq;
+ }
+ }
auto fut = BackrefManager::merge_cached_backrefs_iertr::now();
if (seq != JOURNAL_SEQ_NULL) {
fut = backref_manager.merge_cached_backrefs(
t.mark_segment_to_release(reclaim_state->get_segment_id());
}
return ecb->submit_transaction_direct(
- t, std::make_optional<journal_seq_t>(std::move(seq)));
+ t, std::make_optional<journal_seq_t>(std::move(seq)),
+ std::make_optional<std::pair<paddr_t, paddr_t>>(
+ {reclaim_state->start_pos, reclaim_state->end_pos}));
});
});
});
}).safe_then(
[&reclaimed, this, pavail_ratio, start, &runs] {
LOG_PREFIX(AsyncCleaner::gc_reclaim_space);
-#ifndef NDEBUG
- auto ndel_backrefs =
- backref_manager.get_cached_backref_removals_in_range(
- reclaim_state->start_pos, reclaim_state->end_pos);
- if (!ndel_backrefs.empty()) {
- for (auto &del_br : ndel_backrefs) {
- ERROR("unexpected del_backref {}~{} {} {}",
- del_br.paddr, del_br.len, del_br.type, del_br.seq);
- }
- ceph_abort("impossible");
- }
-#endif
stats.reclaiming_bytes += reclaimed;
auto d = seastar::lowres_system_clock::now() - start;
DEBUG("duration: {}, pavail_ratio before: {}, repeats: {}", d, pavail_ratio, runs);
return sm_group->release_segment(to_release
).safe_then([this, FNAME, &t, to_release] {
auto old_usage = calc_utilization(to_release);
- if(old_usage != 0) {
+ if(unlikely(old_usage != 0)) {
+ space_tracker->dump_usage(to_release);
ERRORT("segment {} old_usage {} != 0", t, to_release, old_usage);
ceph_abort();
}
void AsyncCleaner::mark_space_free(
paddr_t addr,
- extent_len_t len)
+ extent_len_t len,
+ bool init_scan)
{
LOG_PREFIX(AsyncCleaner::mark_space_free);
- if (!init_complete) {
+ if (!init_complete && !init_scan) {
return;
}
if (addr.get_addr_type() != addr_types_t::SEGMENT) {
submit_transaction_direct_iertr::future<>;
virtual submit_transaction_direct_ret submit_transaction_direct(
Transaction &t,
- std::optional<journal_seq_t> seq_to_trim = std::nullopt) = 0;
+ std::optional<journal_seq_t> seq_to_trim = std::nullopt,
+ std::optional<std::pair<paddr_t, paddr_t>> gc_range = std::nullopt) = 0;
};
private:
void mark_space_free(
paddr_t addr,
- extent_len_t len);
+ extent_len_t len,
+ bool init_scan = false);
SpaceTrackerIRef get_empty_space_tracker() const {
return space_tracker->make_empty();
using retrieve_live_extents_iertr = work_iertr;
using retrieve_live_extents_ret =
- retrieve_live_extents_iertr::future<journal_seq_t>;
+ retrieve_live_extents_iertr::future<>;
retrieve_live_extents_ret _retrieve_live_extents(
Transaction &t,
std::set<
return cache.get_backrefs_in_range(start, end);
}
-Cache::backref_buf_entry_query_set_t
-BtreeBackrefManager::get_cached_backref_removals_in_range(
- paddr_t start,
- paddr_t end)
-{
- return cache.get_del_backrefs_in_range(start, end);
-}
-
-const backref_buf_entry_t::set_t&
-BtreeBackrefManager::get_cached_backref_removals()
-{
- return cache.get_del_backrefs();
-}
-
-const backref_buf_entry_t::set_t&
+const backref_set_t&
BtreeBackrefManager::get_cached_backrefs()
{
return cache.get_backrefs();
}
-backref_buf_entry_t
-BtreeBackrefManager::get_cached_backref_removal(paddr_t addr)
-{
- return cache.get_del_backref(addr);
-}
-
Cache::backref_extent_buf_entry_query_set_t
BtreeBackrefManager::get_cached_backref_extents_in_range(
paddr_t start,
});
}
-bool BtreeBackrefManager::backref_should_be_removed(paddr_t paddr) {
- return cache.backref_should_be_removed(paddr);
-}
-
} // namespace crimson::os::seastore::backref
get_cached_backrefs_in_range(
paddr_t start,
paddr_t end) final;
-
- Cache::backref_buf_entry_query_set_t
- get_cached_backref_removals_in_range(
- paddr_t start,
- paddr_t end) final;
-
- const backref_buf_entry_t::set_t& get_cached_backref_removals() final;
- const backref_buf_entry_t::set_t& get_cached_backrefs() final;
- backref_buf_entry_t get_cached_backref_removal(paddr_t addr) final;
+ const backref_set_t& get_cached_backrefs() final;
Cache::backref_extent_buf_entry_query_set_t
get_cached_backref_extents_in_range(
void cache_new_backref_extent(paddr_t paddr, extent_types_t type) final;
- bool backref_should_be_removed(paddr_t paddr) final;
-
private:
SegmentManagerGroup &sm_group;
Cache &cache;
get_cached_backrefs_in_range(
paddr_t start,
paddr_t end) = 0;
-
- virtual Cache::backref_buf_entry_query_set_t
- get_cached_backref_removals_in_range(
- paddr_t start,
- paddr_t end) = 0;
-
- virtual const backref_buf_entry_t::set_t& get_cached_backref_removals() = 0;
- virtual const backref_buf_entry_t::set_t& get_cached_backrefs() = 0;
- virtual backref_buf_entry_t get_cached_backref_removal(paddr_t addr) = 0;
+ virtual const backref_set_t& get_cached_backrefs() = 0;
virtual Cache::backref_extent_buf_entry_query_set_t
get_cached_backref_extents_in_range(
paddr_t start,
paddr_t end) = 0;
- virtual bool backref_should_be_removed(paddr_t paddr) = 0;
-
using retrieve_backref_extents_iertr = trans_iertr<
crimson::errorator<
crimson::ct_error::input_output_error>
namespace crimson::os::seastore {
+std::ostream &operator<<(std::ostream &out, const backref_buf_entry_t &ent) {
+ return out << "backref_buf_entry_t{"
+ << ent.paddr << "~" << ent.len << ", "
+ << "laddr: " << ent.laddr << ", "
+ << "type: " << ent.type << ", "
+ << "seq: " << ent.seq << ", "
+ << "}";
+}
+
Cache::Cache(
ExtentPlacementManager &epm)
: epm(epm),
if (!backref_buffer) {
backref_buffer = std::make_unique<backref_cache_t>();
}
- // backref_buf_entry_t::laddr == L_ADDR_NULL means erase
+
for (auto &ent : list) {
- if (ent->laddr == L_ADDR_NULL) {
- auto insert_set_iter = backref_inserted_set.find(
- ent->paddr, backref_buf_entry_t::cmp_t());
- if (insert_set_iter == backref_inserted_set.end()) {
- // backref to be removed isn't in the backref buffer,
- // it must be in the backref tree.
- auto [it, insert] = backref_remove_set.insert(*ent);
- boost::ignore_unused(insert);
-#ifndef NDEBUG
- if (!insert) {
- ERROR("backref_remove_set already contains {}", ent->paddr);
- }
-#endif
- assert(insert);
- } else {
- // the backref insertion hasn't been applied to the
- // backref tree
- auto seq = insert_set_iter->seq;
- auto it = backref_buffer->backrefs_by_seq.find(seq);
- ceph_assert(it != backref_buffer->backrefs_by_seq.end());
- auto &backref_buf = it->second;
- assert(insert_set_iter->backref_buf_hook.is_linked());
- backref_buf.br_list.erase(
- backref_buf_entry_t::list_t::s_iterator_to(*insert_set_iter));
- backref_inserted_set.erase(insert_set_iter);
- }
- } else {
- auto [it, insert] = backref_inserted_set.insert(*ent);
- boost::ignore_unused(insert);
- assert(insert);
- }
+ backref_set.insert(*ent);
}
auto iter = backref_buffer->backrefs_by_seq.find(seq);
const backref_buf_entry_t &r) {
return l.paddr == r.paddr;
}
+
using set_hook_t =
boost::intrusive::set_member_hook<
boost::intrusive::link_mode<
backref_buf_entry_t,
set_hook_t,
&backref_buf_entry_t::backref_set_hook>;
- using set_t = boost::intrusive::set<
+ using set_t = boost::intrusive::multiset<
backref_buf_entry_t,
backref_set_member_options,
boost::intrusive::constant_time_size<false>>;
};
};
+std::ostream &operator<<(std::ostream &out, const backref_buf_entry_t &ent);
+
using backref_buf_entry_ref =
std::unique_ptr<backref_buf_entry_t>;
+using backref_set_t = backref_buf_entry_t::set_t;
+
struct backref_buf_t {
backref_buf_t(std::vector<backref_buf_entry_ref> &&refs) : backrefs(std::move(refs)) {
for (auto &ref : backrefs) {
}
backref_cache_ref backref_buffer;
- // backrefs that needs to be inserted into the backref tree
- backref_buf_entry_t::set_t backref_inserted_set;
- backref_buf_entry_t::set_t backref_remove_set; // backrefs needs to be removed
- // from the backref tree
+ backref_set_t backref_set; // in cache backrefs indexed by paddr_t
using backref_buf_entry_query_set_t =
- std::set<
+ std::multiset<
backref_buf_entry_t,
backref_buf_entry_t::cmp_t>;
- backref_buf_entry_query_set_t get_backrefs_in_range(
- paddr_t start,
- paddr_t end) {
- auto start_iter = backref_inserted_set.lower_bound(
- start,
- backref_buf_entry_t::cmp_t());
- auto end_iter = backref_inserted_set.lower_bound(
- end,
- backref_buf_entry_t::cmp_t());
- std::set<
- backref_buf_entry_t,
- backref_buf_entry_t::cmp_t> res;
- for (auto it = start_iter;
- it != end_iter;
- it++) {
- res.emplace(it->paddr, it->laddr, it->len, it->type, it->seq);
- }
- return res;
- }
- backref_buf_entry_query_set_t get_del_backrefs_in_range(
+ backref_buf_entry_query_set_t get_backrefs_in_range(
paddr_t start,
paddr_t end) {
- LOG_PREFIX(Cache::get_del_backrefs_in_range);
- SUBDEBUG(seastore_cache, "total {} del_backrefs", backref_remove_set.size());
- auto start_iter = backref_remove_set.lower_bound(
+ auto start_iter = backref_set.lower_bound(
start,
backref_buf_entry_t::cmp_t());
- auto end_iter = backref_remove_set.lower_bound(
+ auto end_iter = backref_set.lower_bound(
end,
backref_buf_entry_t::cmp_t());
- std::set<
- backref_buf_entry_t,
- backref_buf_entry_t::cmp_t> res;
+ backref_buf_entry_query_set_t res;
for (auto it = start_iter;
it != end_iter;
it++) {
res.emplace(it->paddr, it->laddr, it->len, it->type, it->seq);
}
- SUBDEBUG(seastore_cache, "{} del_backrefs in range", res.size());
return res;
}
- backref_buf_entry_t get_del_backref(
- paddr_t addr) {
- auto it = backref_remove_set.find(addr, backref_buf_entry_t::cmp_t());
- assert(it != backref_remove_set.end());
- return *it;
- }
-
- bool backref_should_be_removed(paddr_t addr) {
- return backref_remove_set.find(
- addr, backref_buf_entry_t::cmp_t()) != backref_remove_set.end();
- }
-
- const backref_buf_entry_t::set_t& get_backrefs() {
- return backref_inserted_set;
- }
-
- const backref_buf_entry_t::set_t& get_del_backrefs() {
- return backref_remove_set;
+ const backref_set_t& get_backrefs() {
+ return backref_set;
}
backref_cache_ref& get_backref_buffer() {
std::vector<OnodeRef> &d_onodes,
ceph::os::Transaction::iterator &i)
{
- LOG_PREFIX(SeaStore::_do_transaction_step);
auto op = i.decode_op();
using ceph::os::Transaction;
}
}
return fut.si_then([&, op, this](auto&& get_onode) -> tm_ret {
+ LOG_PREFIX(SeaStore::_do_transaction_step);
OnodeRef &o = onodes[op->oid];
if (!o) {
assert(get_onode);
t,
addr,
len);
- if (addr.is_real() &&
- !backref_manager->backref_should_be_removed(addr)) {
- async_cleaner->mark_space_used(
- addr,
- len ,
- /* init_scan = */ true);
- }
+ async_cleaner->mark_space_used(
+ addr,
+ len ,
+ /* init_scan = */ true);
if (is_backref_node(type)) {
ceph_assert(depth);
backref_manager->cache_new_backref_extent(addr, type);
cache->update_tree_extents_num(type, 1);
return seastar::now();
}
- }).si_then([this] {
+ }).si_then([this, &t] {
LOG_PREFIX(TransactionManager::mount);
auto &backrefs = backref_manager->get_cached_backrefs();
- DEBUG("marking {} backrefs used", backrefs.size());
+ DEBUGT("scan backref cache", t);
for (auto &backref : backrefs) {
- async_cleaner->mark_space_used(
- backref.paddr,
- backref.len,
- true);
- cache->update_tree_extents_num(backref.type, 1);
+ if (backref.laddr == L_ADDR_NULL) {
+ async_cleaner->mark_space_free(
+ backref.paddr,
+ backref.len,
+ true);
+ cache->update_tree_extents_num(backref.type, -1);
+ } else {
+ async_cleaner->mark_space_used(
+ backref.paddr,
+ backref.len,
+ true);
+ cache->update_tree_extents_num(backref.type, 1);
+ }
}
return seastar::now();
});
TransactionManager::submit_transaction_direct_ret
TransactionManager::submit_transaction_direct(
Transaction &tref,
- std::optional<journal_seq_t> seq_to_trim)
+ std::optional<journal_seq_t> seq_to_trim,
+ std::optional<std::pair<paddr_t, paddr_t>> gc_range)
{
LOG_PREFIX(TransactionManager::submit_transaction_direct);
SUBTRACET(seastore_t, "start", tref);
}).si_then([this, FNAME, &tref] {
SUBTRACET(seastore_t, "about to prepare", tref);
return tref.get_handle().enter(write_pipeline.prepare);
- }).si_then([this, FNAME, &tref, seq_to_trim=std::move(seq_to_trim)]() mutable
+ }).si_then([this, FNAME, &tref, seq_to_trim=std::move(seq_to_trim),
+ gc_range=std::move(gc_range)]() mutable
-> submit_transaction_iertr::future<> {
if (seq_to_trim && *seq_to_trim != JOURNAL_SEQ_NULL) {
cache->trim_backref_bufs(*seq_to_trim);
}
+
+#ifndef NDEBUG
+ if (gc_range) {
+ auto backref_set =
+ backref_manager->get_cached_backrefs_in_range(
+ gc_range->first, gc_range->second);
+ for (auto &backref : backref_set) {
+ ERRORT("unexpected backref: {}~{}, {}, {}, {}",
+ tref, backref.paddr, backref.len, backref.laddr,
+ backref.type, backref.seq);
+ ceph_abort("impossible");
+ }
+ }
+#endif
auto record = cache->prepare_record(tref, async_cleaner.get());
tref.get_handle().maybe_release_collection_lock();
using AsyncCleaner::ExtentCallbackInterface::submit_transaction_direct_ret;
submit_transaction_direct_ret submit_transaction_direct(
Transaction &t,
- std::optional<journal_seq_t> seq_to_trim = std::nullopt) final;
+ std::optional<journal_seq_t> seq_to_trim = std::nullopt,
+ std::optional<std::pair<paddr_t, paddr_t>> gc_range = std::nullopt) final;
/**
* flush
[this, &tracker](auto &t) {
return backref_manager->scan_mapped_space(
t,
- [&tracker, this](auto offset, auto len, depth_t, extent_types_t) {
- if (offset.get_addr_type() == addr_types_t::SEGMENT &&
- !backref_manager->backref_should_be_removed(offset)) {
+ [&tracker](auto offset, auto len, depth_t, extent_types_t) {
+ if (offset.get_addr_type() == addr_types_t::SEGMENT) {
logger().debug("check_usage: tracker alloc {}~{}",
offset, len);
tracker->allocate(
auto &backrefs = backref_manager->get_cached_backrefs();
for (auto &backref : backrefs) {
if (backref.paddr.get_addr_type() == addr_types_t::SEGMENT) {
- logger().debug("check_usage: by backref, tracker alloc {}~{}",
- backref.paddr, backref.len);
- tracker->allocate(
- backref.paddr.as_seg_paddr().get_segment_id(),
- backref.paddr.as_seg_paddr().get_segment_off(),
- backref.len);
+ if (backref.laddr == L_ADDR_NULL) {
+ tracker->release(
+ backref.paddr.as_seg_paddr().get_segment_id(),
+ backref.paddr.as_seg_paddr().get_segment_off(),
+ backref.len);
+ } else {
+ tracker->allocate(
+ backref.paddr.as_seg_paddr().get_segment_id(),
+ backref.paddr.as_seg_paddr().get_segment_off(),
+ backref.len);
+ }
}
}
return seastar::now();