return NULL_SEG_ID;
}
-void AsyncCleaner::update_journal_tail_target(
- journal_seq_t dirty_replay_from,
- journal_seq_t alloc_replay_from)
+void AsyncCleaner::update_journal_tails(
+ journal_seq_t dirty_tail,
+ journal_seq_t alloc_tail)
{
- LOG_PREFIX(AsyncCleaner::update_journal_tail_target);
+ LOG_PREFIX(AsyncCleaner::update_journal_tails);
if (disable_trim) return;
- assert(dirty_replay_from.offset.get_addr_type() != addr_types_t::RANDOM_BLOCK);
- assert(alloc_replay_from.offset.get_addr_type() != addr_types_t::RANDOM_BLOCK);
- if (dirty_extents_replay_from == JOURNAL_SEQ_NULL
- || dirty_replay_from > dirty_extents_replay_from) {
- DEBUG("dirty_extents_replay_from={} => {}",
- dirty_extents_replay_from, dirty_replay_from);
- dirty_extents_replay_from = dirty_replay_from;
- }
-
- update_alloc_info_replay_from(alloc_replay_from);
+ assert(dirty_tail.offset.get_addr_type() != addr_types_t::RANDOM_BLOCK);
+ assert(alloc_tail.offset.get_addr_type() != addr_types_t::RANDOM_BLOCK);
- journal_seq_t target = std::min(dirty_replay_from, alloc_replay_from);
- ceph_assert(target != JOURNAL_SEQ_NULL);
+ ceph_assert(dirty_tail != JOURNAL_SEQ_NULL);
+ ceph_assert(alloc_tail != JOURNAL_SEQ_NULL);
ceph_assert(journal_head == JOURNAL_SEQ_NULL ||
- journal_head >= target);
- if (journal_tail_target == JOURNAL_SEQ_NULL ||
- target > journal_tail_target) {
- if (!init_complete ||
- journal_tail_target.segment_seq == target.segment_seq) {
- DEBUG("journal_tail_target={} => {}", journal_tail_target, target);
+ (journal_head >= dirty_tail && journal_head >= alloc_tail));
+
+ if (journal_dirty_tail == JOURNAL_SEQ_NULL ||
+ dirty_tail > journal_dirty_tail) {
+ if (journal_dirty_tail.segment_seq == dirty_tail.segment_seq) {
+ DEBUG("journal_dirty_tail {} => {}", journal_dirty_tail, dirty_tail);
} else {
- INFO("journal_tail_target={} => {}", journal_tail_target, target);
+ INFO("journal_dirty_tail {} => {}", journal_dirty_tail, dirty_tail);
}
- journal_tail_target = target;
+ journal_dirty_tail = dirty_tail;
}
- gc_process.maybe_wake_on_space_used();
- maybe_wake_gc_blocked_io();
-}
-void AsyncCleaner::update_alloc_info_replay_from(
- journal_seq_t alloc_replay_from)
-{
- LOG_PREFIX(AsyncCleaner::update_alloc_info_replay_from);
- if (alloc_info_replay_from == JOURNAL_SEQ_NULL
- || alloc_replay_from > alloc_info_replay_from) {
- DEBUG("alloc_info_replay_from={} => {}",
- alloc_info_replay_from, alloc_replay_from);
- alloc_info_replay_from = alloc_replay_from;
- }
-}
-
-void AsyncCleaner::update_journal_tail_committed(journal_seq_t committed)
-{
- LOG_PREFIX(AsyncCleaner::update_journal_tail_committed);
- assert(committed.offset.get_addr_type() != addr_types_t::RANDOM_BLOCK);
- if (committed == JOURNAL_SEQ_NULL) {
- return;
+ if (journal_alloc_tail == JOURNAL_SEQ_NULL ||
+ alloc_tail > journal_alloc_tail) {
+ if (journal_alloc_tail.segment_seq == alloc_tail.segment_seq) {
+ DEBUG("journal_alloc_tail {} => {}", journal_alloc_tail, alloc_tail);
+ } else {
+ INFO("journal_alloc_tail {} => {}", journal_alloc_tail, alloc_tail);
+ }
+ journal_alloc_tail = alloc_tail;
}
- ceph_assert(journal_head == JOURNAL_SEQ_NULL ||
- journal_head >= committed);
- if (journal_tail_committed == JOURNAL_SEQ_NULL ||
- committed > journal_tail_committed) {
- DEBUG("update journal_tail_committed={} => {}",
- journal_tail_committed, committed);
- journal_tail_committed = committed;
- }
- if (journal_tail_target == JOURNAL_SEQ_NULL ||
- committed > journal_tail_target) {
- DEBUG("update journal_tail_target={} => {}",
- journal_tail_target, committed);
- journal_tail_target = committed;
- }
+ gc_process.maybe_wake_on_space_used();
+ maybe_wake_gc_blocked_io();
}
void AsyncCleaner::close_segment(segment_id_t segment)
AsyncCleaner::gc_cycle_ret AsyncCleaner::do_gc_cycle()
{
if (gc_should_trim_backref()) {
- return gc_trim_backref(get_backref_tail()
+ return gc_trim_backref(get_alloc_tail_target()
).safe_then([](auto) {
return seastar::now();
}).handle_error(
AsyncCleaner::gc_trim_journal_ret AsyncCleaner::gc_trim_journal()
{
- return gc_trim_backref(get_dirty_tail()
+ return gc_trim_backref(get_dirty_tail_target()
).safe_then([this](auto seq) {
return repeat_eagain([this, seq=std::move(seq)]() mutable {
return ecb->with_transaction_intr(
INFO("{} segment managers", sms.size());
init_complete = false;
stats = {};
- journal_tail_target = JOURNAL_SEQ_NULL;
- journal_tail_committed = JOURNAL_SEQ_NULL;
- dirty_extents_replay_from = JOURNAL_SEQ_NULL;
- alloc_info_replay_from = JOURNAL_SEQ_NULL;
+ journal_alloc_tail = JOURNAL_SEQ_NULL;
+ journal_dirty_tail = JOURNAL_SEQ_NULL;
space_tracker.reset(
detailed ?
) mutable -> SegmentManagerGroup::scan_valid_records_ertr::future<>
{
LOG_PREFIX(AsyncCleaner::scan_no_tail_segment);
- if (segment_header.get_type() == segment_type_t::OOL) {
- DEBUG("out-of-line segment {}, decodeing {} records",
- segment_id,
- record_group_header.records);
- } else {
- DEBUG("inline segment {}, decodeing {} records",
- segment_id,
- record_group_header.records);
- auto maybe_record_deltas_list = try_decode_deltas(
- record_group_header, mdbuf, locator.record_block_base);
- if (!maybe_record_deltas_list) {
- ERROR("unable to decode deltas for record {} at {}",
- record_group_header, locator);
- return crimson::ct_error::input_output_error::make();
- }
- for (auto &record_deltas : *maybe_record_deltas_list) {
- for (auto &[ctime, delta] : record_deltas.deltas) {
- if (delta.type == extent_types_t::ALLOC_TAIL) {
- journal_seq_t seq;
- decode(seq, delta.bl);
- update_alloc_info_replay_from(seq);
- }
- }
- }
- }
+ DEBUG("{} {}, decoding {} records",
+ segment_id, segment_header.get_type(), record_group_header.records);
auto maybe_headers = try_decode_record_headers(
record_group_header, mdbuf);
}
for (auto& [_id, segment_info] : segments) {
if (segment_info.is_closed() &&
- !segment_info.is_in_journal(journal_tail_committed)) {
+ !segment_info.is_in_journal(get_journal_tail())) {
double benefit_cost = calc_gc_benefit_cost(_id, now_time, bound_time);
if (benefit_cost > max_benefit_cost) {
id = _id;
"should_block_on_reclaim {}, "
"gc_should_reclaim_space {}, "
"journal_head {}, "
- "journal_tail_target {}, "
- "journal_tail_commit {}, "
- "dirty_tail {}, "
- "dirty_tail_limit {}, "
+ "journal_alloc_tail {}, "
+ "journal_dirty_tail {}, "
+ "alloc_tail_target {}, "
+ "dirty_tail_target {}, "
+ "tail_limit {}, "
"gc_should_trim_journal {}, ",
caller,
segments.get_num_empty(),
should_block_on_reclaim(),
gc_should_reclaim_space(),
journal_head,
- journal_tail_target,
- journal_tail_committed,
- get_dirty_tail(),
- get_dirty_tail_limit(),
+ journal_alloc_tail,
+ journal_dirty_tail,
+ get_alloc_tail_target(),
+ get_dirty_tail_target(),
+ get_tail_limit(),
gc_should_trim_journal()
);
}
public:
virtual void set_journal_head(journal_seq_t) = 0;
- virtual journal_seq_t get_journal_tail_target() const = 0;
+ journal_seq_t get_journal_tail() const {
+ return std::min(get_alloc_tail(), get_dirty_tail());
+ }
+
+ virtual journal_seq_t get_dirty_tail() const = 0;
+
+ virtual journal_seq_t get_alloc_tail() const = 0;
+
+ virtual void update_journal_tails(
+ journal_seq_t dirty_tail, journal_seq_t alloc_tail) = 0;
virtual const segment_info_t& get_seg_info(segment_id_t id) const = 0;
virtual segment_id_t allocate_segment(
segment_seq_t, segment_type_t, data_category_t, reclaim_gen_t) = 0;
- virtual journal_seq_t get_dirty_extents_replay_from() const = 0;
-
- virtual journal_seq_t get_alloc_info_replay_from() const = 0;
-
virtual void close_segment(segment_id_t) = 0;
- virtual void update_journal_tail_committed(journal_seq_t tail_committed) = 0;
-
virtual void update_segment_avail_bytes(segment_type_t, paddr_t) = 0;
virtual void update_modify_time(
seastar::metrics::metric_group metrics;
void register_metrics();
- /// target journal_tail for next fresh segment
- journal_seq_t journal_tail_target;
+ journal_seq_t journal_alloc_tail;
- /// target replay_from for dirty extents
- journal_seq_t dirty_extents_replay_from;
-
- /// target replay_from for alloc infos
- journal_seq_t alloc_info_replay_from;
-
- /// most recently committed journal_tail
- journal_seq_t journal_tail_committed;
+ journal_seq_t journal_dirty_tail;
/// head of journal
journal_seq_t journal_head;
/*
* SegmentProvider interfaces
*/
- journal_seq_t get_journal_tail_target() const final {
- return journal_tail_target;
- }
-
const segment_info_t& get_seg_info(segment_id_t id) const final {
return segments[id];
}
void close_segment(segment_id_t segment) final;
- void update_journal_tail_committed(journal_seq_t committed) final;
-
void update_segment_avail_bytes(segment_type_t type, paddr_t offset) final {
segments.update_written_to(type, offset);
if (type == segment_type_t::JOURNAL) {
return sm_group.get();
}
- journal_seq_t get_dirty_extents_replay_from() const final {
- return dirty_extents_replay_from;
+ journal_seq_t get_dirty_tail() const final {
+ return journal_dirty_tail;
}
- journal_seq_t get_alloc_info_replay_from() const final {
- return alloc_info_replay_from;
+ journal_seq_t get_alloc_tail() const final {
+ return journal_alloc_tail;
}
- void update_journal_tail_target(
- journal_seq_t dirty_replay_from,
- journal_seq_t alloc_replay_from);
-
- void update_alloc_info_replay_from(
- journal_seq_t alloc_replay_from);
+ void update_journal_tails(
+ journal_seq_t dirty_tail, journal_seq_t alloc_tail) final;
void init_mkfs() {
ceph_assert(disable_trim || journal_head != JOURNAL_SEQ_NULL);
- journal_tail_target = journal_head;
- journal_tail_committed = journal_head;
+ journal_alloc_tail = journal_head;
+ journal_dirty_tail = journal_head;
}
using release_ertr = SegmentManagerGroup::release_ertr;
Transaction &t,
journal_seq_t limit);
- journal_seq_t get_dirty_tail() const {
+ journal_seq_t get_dirty_tail_target() const {
auto ret = journal_head;
ceph_assert(ret != JOURNAL_SEQ_NULL);
if (ret.segment_seq >= config.target_journal_segments) {
return ret;
}
- journal_seq_t get_dirty_tail_limit() const {
+ journal_seq_t get_tail_limit() const {
auto ret = journal_head;
ceph_assert(ret != JOURNAL_SEQ_NULL);
if (ret.segment_seq >= config.max_journal_segments) {
return ret;
}
- journal_seq_t get_backref_tail() const {
+ journal_seq_t get_alloc_tail_target() const {
auto ret = journal_head;
ceph_assert(ret != JOURNAL_SEQ_NULL);
if (ret.segment_seq >= config.target_backref_inflight_segments) {
if (!init_complete) {
return 0;
}
- if (journal_tail_committed == JOURNAL_SEQ_NULL) {
+ auto journal_tail = get_journal_tail();
+ if (journal_tail == JOURNAL_SEQ_NULL) {
return segments.get_num_type_journal();
}
assert(journal_head != JOURNAL_SEQ_NULL);
- assert(journal_head.segment_seq >= journal_tail_committed.segment_seq);
- return journal_head.segment_seq + 1 - journal_tail_committed.segment_seq;
+ assert(journal_head.segment_seq >= journal_tail.segment_seq);
+ return journal_head.segment_seq + 1 - journal_tail.segment_seq;
}
std::size_t get_segments_in_journal_closed() const {
auto in_journal = get_segments_in_journal();
*/
std::size_t get_dirty_journal_size() const {
if (journal_head == JOURNAL_SEQ_NULL ||
- dirty_extents_replay_from == JOURNAL_SEQ_NULL) {
+ journal_dirty_tail == JOURNAL_SEQ_NULL) {
return 0;
}
- return (journal_head.segment_seq - dirty_extents_replay_from.segment_seq) *
+ return (journal_head.segment_seq - journal_dirty_tail.segment_seq) *
segments.get_segment_size() +
journal_head.offset.as_seg_paddr().get_segment_off() -
segments.get_segment_size() -
- dirty_extents_replay_from.offset.as_seg_paddr().get_segment_off();
+ journal_dirty_tail.offset.as_seg_paddr().get_segment_off();
}
std::size_t get_alloc_journal_size() const {
if (journal_head == JOURNAL_SEQ_NULL ||
- alloc_info_replay_from == JOURNAL_SEQ_NULL) {
+ journal_alloc_tail == JOURNAL_SEQ_NULL) {
return 0;
}
- return (journal_head.segment_seq - alloc_info_replay_from.segment_seq) *
+ return (journal_head.segment_seq - journal_alloc_tail.segment_seq) *
segments.get_segment_size() +
journal_head.offset.as_seg_paddr().get_segment_off() -
segments.get_segment_size() -
- alloc_info_replay_from.offset.as_seg_paddr().get_segment_off();
+ journal_alloc_tail.offset.as_seg_paddr().get_segment_off();
}
/**
*/
bool should_block_on_trim() const {
if (disable_trim) return false;
- return get_dirty_tail_limit() > journal_tail_target;
+ return get_tail_limit() > get_journal_tail();
}
bool should_block_on_reclaim() const {
* Encapsulates logic for whether gc should be reclaiming segment space.
*/
bool gc_should_trim_journal() const {
- return get_dirty_tail() > journal_tail_target;
+ return get_dirty_tail_target() > journal_dirty_tail;
}
bool gc_should_trim_backref() const {
- return get_backref_tail() > alloc_info_replay_from;
+ return get_alloc_tail_target() > journal_alloc_tail;
}
/**
* gc_should_run
{extent_types_t::OBJECT_DATA_BLOCK, sm::label_instance("ext", "OBJECT_DATA_BLOCK")},
{extent_types_t::RETIRED_PLACEHOLDER, sm::label_instance("ext", "RETIRED_PLACEHOLDER")},
{extent_types_t::ALLOC_INFO, sm::label_instance("ext", "ALLOC_INFO")},
- {extent_types_t::ALLOC_TAIL, sm::label_instance("ext", "ALLOC_TAIL")},
+ {extent_types_t::JOURNAL_TAIL, sm::label_instance("ext", "JOURNAL_TAIL")},
{extent_types_t::TEST_BLOCK, sm::label_instance("ext", "TEST_BLOCK")},
{extent_types_t::TEST_BLOCK_PHYSICAL, sm::label_instance("ext", "TEST_BLOCK_PHYSICAL")},
{extent_types_t::BACKREF_INTERNAL, sm::label_instance("ext", "BACKREF_INTERNAL")},
t.read_set.clear();
t.write_set.clear();
- record_t record;
+ record_t record(trans_src);
auto commit_time = seastar::lowres_system_clock::now();
// Add new copy of mutated blocks, set_io_wait to block until written
}
if (is_cleaner_transaction(trans_src)) {
+ assert(cleaner != nullptr);
+ auto tails = journal_tail_delta_t{
+ get_oldest_backref_dirty_from().value_or(JOURNAL_SEQ_NULL),
+ get_oldest_dirty_from().value_or(JOURNAL_SEQ_NULL)
+ };
bufferlist bl;
- encode(get_oldest_backref_dirty_from().value_or(JOURNAL_SEQ_NULL), bl);
+ encode(tails, bl);
delta_info_t delta;
- delta.type = extent_types_t::ALLOC_TAIL;
+ delta.type = extent_types_t::JOURNAL_TAIL;
delta.bl = bl;
record.push_back(std::move(delta));
}
journal_seq_t journal_seq,
paddr_t record_base,
const delta_info_t &delta,
- const journal_seq_t &alloc_replay_from,
+ const journal_seq_t &alloc_tail,
sea_time_point &modify_time)
{
LOG_PREFIX(Cache::replay_delta);
- assert(alloc_replay_from != JOURNAL_SEQ_NULL);
+ assert(alloc_tail != JOURNAL_SEQ_NULL);
ceph_assert(modify_time != NULL_TIME);
if (delta.type == extent_types_t::ROOT) {
TRACE("replay root delta at {} {}, remove extent ... -- {}, prv_root={}",
add_extent(root);
return replay_delta_ertr::now();
} else if (delta.type == extent_types_t::ALLOC_INFO) {
- if (journal_seq < alloc_replay_from) {
- DEBUG("journal_seq {} < alloc_replay_from {}, don't replay {}",
- journal_seq, alloc_replay_from, delta);
+ if (journal_seq < alloc_tail) {
+ DEBUG("journal_seq {} < alloc_tail {}, don't replay {}",
+ journal_seq, alloc_tail, delta);
return replay_delta_ertr::now();
}
alloc_delta_t alloc_delta;
if (!backref_list.empty())
backref_batch_update(std::move(backref_list), journal_seq);
return replay_delta_ertr::now();
- } else if (delta.type == extent_types_t::ALLOC_TAIL) {
+ } else if (delta.type == extent_types_t::JOURNAL_TAIL) {
// this delta should have been dealt with during segment cleaner mounting
return replay_delta_ertr::now();
} else {
return do_write(t, extents);
});
}
- record_t record;
+ record_t record(TRANSACTION_TYPE_NULL);
std::list<LogicalCachedExtentRef> pending_extents;
auto commit_time = seastar::lowres_system_clock::now();
).safe_then([this, is_mkfs, FNAME, new_segment_seq](auto sref) {
// initialize new segment
segment_id_t segment_id = sref->get_segment_id();
- journal_seq_t new_journal_tail;
- journal_seq_t new_alloc_replay_from;
+ journal_seq_t dirty_tail;
+ journal_seq_t alloc_tail;
if (type == segment_type_t::JOURNAL) {
- new_journal_tail = segment_provider.get_journal_tail_target();
- new_alloc_replay_from = segment_provider.get_alloc_info_replay_from();
+ dirty_tail = segment_provider.get_dirty_tail();
+ alloc_tail = segment_provider.get_alloc_tail();
if (is_mkfs) {
- ceph_assert(new_journal_tail == JOURNAL_SEQ_NULL);
- ceph_assert(new_alloc_replay_from == JOURNAL_SEQ_NULL);
+ ceph_assert(dirty_tail == JOURNAL_SEQ_NULL);
+ ceph_assert(alloc_tail == JOURNAL_SEQ_NULL);
auto mkfs_seq = journal_seq_t{
new_segment_seq,
paddr_t::make_seg_paddr(segment_id, 0)
};
- new_journal_tail = mkfs_seq;
- new_alloc_replay_from = mkfs_seq;
+ dirty_tail = mkfs_seq;
+ alloc_tail = mkfs_seq;
} else {
- ceph_assert(new_journal_tail != JOURNAL_SEQ_NULL);
- ceph_assert(new_alloc_replay_from != JOURNAL_SEQ_NULL);
+ ceph_assert(dirty_tail != JOURNAL_SEQ_NULL);
+ ceph_assert(alloc_tail != JOURNAL_SEQ_NULL);
}
} else { // OOL
ceph_assert(!is_mkfs);
- new_journal_tail = NO_DELTAS;
- new_alloc_replay_from = NO_DELTAS;
+ dirty_tail = NO_DELTAS;
+ alloc_tail = NO_DELTAS;
}
auto header = segment_header_t{
new_segment_seq,
segment_id,
- new_journal_tail,
- new_alloc_replay_from,
+ dirty_tail,
+ alloc_tail,
current_segment_nonce,
type,
category,
).safe_then([this,
FNAME,
new_journal_seq,
- new_journal_tail,
sref=std::move(sref)]() mutable {
ceph_assert(!current_segment);
current_segment = std::move(sref);
- if (type == segment_type_t::JOURNAL) {
- segment_provider.update_journal_tail_committed(new_journal_tail);
- }
DEBUG("{} rolled new segment id={}",
print_name, current_segment->get_segment_id());
ceph_assert(new_journal_seq.segment_seq ==
}
});
- auto journal_tail = segments.rbegin()->second.journal_tail;
- segment_provider.update_journal_tail_committed(journal_tail);
- auto journal_tail_paddr = journal_tail.offset;
- ceph_assert(journal_tail != JOURNAL_SEQ_NULL);
- ceph_assert(journal_tail_paddr != P_ADDR_NULL);
- auto from = std::find_if(
- segments.begin(),
- segments.end(),
- [&journal_tail_paddr](const auto &seg) -> bool {
- auto& seg_addr = journal_tail_paddr.as_seg_paddr();
- return seg.first == seg_addr.get_segment_id();
- });
- if (from->second.segment_seq != journal_tail.segment_seq) {
- ERROR("journal_tail {} does not match {}",
- journal_tail, from->second);
- ceph_abort();
- }
+ auto last_segment_id = segments.rbegin()->first;
+ auto last_header = segments.rbegin()->second;
+ return scan_last_segment(last_segment_id, last_header
+ ).safe_then([this, FNAME, segments=std::move(segments)] {
+ auto journal_tail = segment_provider.get_journal_tail();
+ auto journal_tail_paddr = journal_tail.offset;
+ ceph_assert(journal_tail != JOURNAL_SEQ_NULL);
+ ceph_assert(journal_tail_paddr != P_ADDR_NULL);
+ auto from = std::find_if(
+ segments.begin(),
+ segments.end(),
+ [&journal_tail_paddr](const auto &seg) -> bool {
+ auto& seg_addr = journal_tail_paddr.as_seg_paddr();
+ return seg.first == seg_addr.get_segment_id();
+ });
+ if (from->second.segment_seq != journal_tail.segment_seq) {
+ ERROR("journal_tail {} does not match {}",
+ journal_tail, from->second);
+ ceph_abort();
+ }
- auto num_segments = segments.end() - from;
- INFO("{} segments to replay from {}",
- num_segments, journal_tail);
- auto ret = replay_segments_t(num_segments);
- std::transform(
- from, segments.end(), ret.begin(),
- [this](const auto &p) {
- auto ret = journal_seq_t{
- p.second.segment_seq,
- paddr_t::make_seg_paddr(
- p.first,
- sm_group.get_block_size())
- };
- return std::make_pair(ret, p.second);
- });
- ret[0].first.offset = journal_tail_paddr;
- return prep_replay_segments_fut(
- prep_replay_segments_ertr::ready_future_marker{},
- std::move(ret));
+ auto num_segments = segments.end() - from;
+ INFO("{} segments to replay from {}",
+ num_segments, journal_tail);
+ auto ret = replay_segments_t(num_segments);
+ std::transform(
+ from, segments.end(), ret.begin(),
+ [this](const auto &p) {
+ auto ret = journal_seq_t{
+ p.second.segment_seq,
+ paddr_t::make_seg_paddr(
+ p.first,
+ sm_group.get_block_size())
+ };
+ return std::make_pair(ret, p.second);
+ });
+ ret[0].first.offset = journal_tail_paddr;
+ return prep_replay_segments_fut(
+ replay_ertr::ready_future_marker{},
+ std::move(ret));
+ });
+}
+
+SegmentedJournal::scan_last_segment_ertr::future<>
+SegmentedJournal::scan_last_segment(
+ const segment_id_t &segment_id,
+ const segment_header_t &segment_header)
+{
+ LOG_PREFIX(SegmentedJournal::scan_last_segment);
+ assert(segment_id == segment_header.physical_segment_id);
+ segment_provider.update_journal_tails(
+ segment_header.dirty_tail, segment_header.alloc_tail);
+ auto seq = journal_seq_t{
+ segment_header.segment_seq,
+ paddr_t::make_seg_paddr(segment_id, 0)
+ };
+ INFO("scanning {} for journal tail deltas", seq);
+ return seastar::do_with(
+ scan_valid_records_cursor(seq),
+ SegmentManagerGroup::found_record_handler_t(
+ [FNAME, this](
+ record_locator_t locator,
+ const record_group_header_t& record_group_header,
+ const bufferlist& mdbuf
+ ) -> SegmentManagerGroup::scan_valid_records_ertr::future<>
+ {
+ DEBUG("decoding {} at {}", record_group_header, locator);
+ bool has_tail_delta = false;
+ auto maybe_headers = try_decode_record_headers(
+ record_group_header, mdbuf);
+ if (!maybe_headers) {
+ // This should be impossible, we did check the crc on the mdbuf
+ ERROR("unable to decode headers from {} at {}",
+ record_group_header, locator);
+ ceph_abort();
+ }
+ for (auto &record_header : *maybe_headers) {
+ ceph_assert(is_valid_transaction(record_header.type));
+ if (is_cleaner_transaction(record_header.type)) {
+ has_tail_delta = true;
+ }
+ }
+ if (has_tail_delta) {
+ bool found_delta = false;
+ auto maybe_record_deltas_list = try_decode_deltas(
+ record_group_header, mdbuf, locator.record_block_base);
+ if (!maybe_record_deltas_list) {
+ ERROR("unable to decode deltas from {} at {}",
+ record_group_header, locator);
+ ceph_abort();
+ }
+ for (auto &record_deltas : *maybe_record_deltas_list) {
+ for (auto &[ctime, delta] : record_deltas.deltas) {
+ if (delta.type == extent_types_t::JOURNAL_TAIL) {
+ found_delta = true;
+ journal_tail_delta_t tail_delta;
+ decode(tail_delta, delta.bl);
+ if (tail_delta.alloc_tail == JOURNAL_SEQ_NULL) {
+ tail_delta.alloc_tail = locator.write_result.start_seq;
+ }
+ if (tail_delta.dirty_tail == JOURNAL_SEQ_NULL) {
+ tail_delta.dirty_tail = locator.write_result.start_seq;
+ }
+ segment_provider.update_journal_tails(
+ tail_delta.dirty_tail, tail_delta.alloc_tail);
+ }
+ }
+ }
+ ceph_assert(found_delta);
+ }
+ return seastar::now();
+ }),
+ [this, nonce=segment_header.segment_nonce](auto &cursor, auto &handler)
+ {
+ return sm_group.scan_valid_records(
+ cursor,
+ nonce,
+ std::numeric_limits<std::size_t>::max(),
+ handler).discard_result();
+ });
}
SegmentedJournal::replay_ertr::future<>
return handler(
locator,
delta,
- segment_provider.get_alloc_info_replay_from(),
+ segment_provider.get_alloc_tail(),
modify_time);
});
});
/// return ordered vector of segments to replay
using replay_segments_t = std::vector<
std::pair<journal_seq_t, segment_header_t>>;
- using prep_replay_segments_ertr = crimson::errorator<
- crimson::ct_error::input_output_error
- >;
- using prep_replay_segments_fut = prep_replay_segments_ertr::future<
+ using prep_replay_segments_fut = replay_ertr::future<
replay_segments_t>;
prep_replay_segments_fut prep_replay_segments(
std::vector<std::pair<segment_id_t, segment_header_t>> segments);
+ /// scan the last segment for tail deltas
+ using scan_last_segment_ertr = replay_ertr;
+ scan_last_segment_ertr::future<> scan_last_segment(
+ const segment_id_t&, const segment_header_t&);
+
/// replays records starting at start through end of segment
replay_ertr::future<>
replay_segment(
return out << "segment_header_t("
<< "segment_seq=" << segment_seq_printer_t{header.segment_seq}
<< ", segment_id=" << header.physical_segment_id
- << ", journal_tail=" << header.journal_tail
+ << ", dirty_tail=" << header.dirty_tail
+ << ", alloc_tail=" << header.alloc_tail
<< ", segment_nonce=" << header.segment_nonce
<< ", type=" << header.type
<< ", category=" << header.category
std::ostream &operator<<(std::ostream& out, const record_t& r)
{
return out << "record_t("
- << "num_extents=" << r.extents.size()
+ << "type=" << r.type
+ << ", num_extents=" << r.extents.size()
<< ", num_deltas=" << r.deltas.size()
<< ", modify_time=" << sea_time_point_printer_t{r.modify_time}
<< ")";
std::ostream &operator<<(std::ostream& out, const record_header_t& r)
{
return out << "record_header_t("
- << "num_extents=" << r.extents
+ << "type=" << r.type
+ << ", num_extents=" << r.extents
<< ", num_deltas=" << r.deltas
<< ", modify_time=" << mod_time_point_printer_t{r.modify_time}
<< ")";
for (auto& r: record_group.records) {
record_header_t rheader{
+ r.type,
(extent_len_t)r.deltas.size(),
(extent_len_t)r.extents.size(),
timepoint_to_mod(r.modify_time)
// the following two types are not extent types,
// they are just used to indicates paddr allocation deltas
ALLOC_INFO = 9,
- ALLOC_TAIL = 10,
+ JOURNAL_TAIL = 10,
// Test Block Types
TEST_BLOCK = 11,
TEST_BLOCK_PHYSICAL = 12,
std::ostream &operator<<(std::ostream &out, const delta_info_t &delta);
+/* contains the latest journal tail information */
+struct journal_tail_delta_t {
+ journal_seq_t alloc_tail;
+ journal_seq_t dirty_tail;
+
+ DENC(journal_tail_delta_t, v, p) {
+ DENC_START(1, 1, p);
+ denc(v.alloc_tail, p);
+ denc(v.dirty_tail, p);
+ DENC_FINISH(p);
+ }
+};
+
class object_data_t {
laddr_t reserved_data_base = L_ADDR_NULL;
extent_len_t reserved_data_len = 0;
* Every segment contains and encode segment_header_t in the first block.
* Our strategy for finding the journal replay point is:
* 1) Find the segment with the highest journal_segment_seq
- * 2) Replay starting at record located at that segment's journal_tail
+ * 2) Get dirty_tail and alloc_tail from the segment header
+ * 3) Scan forward to update tails from journal_tail_delta_t
+ * 4) Replay from the latest tails
*/
struct segment_header_t {
segment_seq_t segment_seq;
segment_id_t physical_segment_id; // debugging
- journal_seq_t journal_tail;
- journal_seq_t alloc_replay_from;
+ journal_seq_t dirty_tail;
+ journal_seq_t alloc_tail;
segment_nonce_t segment_nonce;
segment_type_t type;
DENC_START(1, 1, p);
denc(v.segment_seq, p);
denc(v.physical_segment_id, p);
- denc(v.journal_tail, p);
- denc(v.alloc_replay_from, p);
+ denc(v.dirty_tail, p);
+ denc(v.alloc_tail, p);
denc(v.segment_nonce, p);
denc(v.type, p);
denc(v.category, p);
std::ostream &operator<<(std::ostream&, const record_size_t&);
struct record_t {
+ transaction_type_t type = TRANSACTION_TYPE_NULL;
std::vector<extent_t> extents;
std::vector<delta_info_t> deltas;
record_size_t size;
sea_time_point modify_time = NULL_TIME;
- record_t() = default;
+ record_t(transaction_type_t type) : type{type} { }
+
+ // unit test only
+ record_t() {
+ type = transaction_type_t::MUTATE;
+ }
// unit test only
record_t(std::vector<extent_t>&& _extents,
for (auto& d: _deltas) {
push_back(std::move(d));
}
+ type = transaction_type_t::MUTATE;
}
bool is_empty() const {
std::ostream &operator<<(std::ostream&, const record_t&);
struct record_header_t {
+ transaction_type_t type;
uint32_t deltas; // number of deltas
uint32_t extents; // number of extents
mod_time_point_t modify_time;
DENC(record_header_t, v, p) {
DENC_START(1, 1, p);
+ denc(v.type, p);
denc(v.deltas, p);
denc(v.extents, p);
denc(v.modify_time, p);
WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::paddr_t)
WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::journal_seq_t)
WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::delta_info_t)
+WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::journal_tail_delta_t)
WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::record_header_t)
WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::record_group_header_t)
WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::extent_info_t)
[this](
const auto &offsets,
const auto &e,
- const journal_seq_t alloc_replay_from,
+ const journal_seq_t alloc_tail,
auto modify_time)
{
auto start_seq = offsets.write_result.start_seq;
- async_cleaner->update_journal_tail_target(
+ async_cleaner->update_journal_tails(
cache->get_oldest_dirty_from().value_or(start_seq),
cache->get_oldest_backref_dirty_from().value_or(start_seq));
return cache->replay_delta(
start_seq,
offsets.record_block_base,
e,
- alloc_replay_from,
+ alloc_tail,
modify_time);
});
}).safe_then([this] {
lba_manager->complete_transaction(tref, lba_to_clear, lba_to_link);
backref_manager->complete_transaction(tref, backref_to_clear, backref_to_link);
- async_cleaner->update_journal_tail_target(
+ async_cleaner->update_journal_tails(
cache->get_oldest_dirty_from().value_or(start_seq),
cache->get_oldest_backref_dirty_from().value_or(start_seq));
return async_cleaner->maybe_release_segment(tref);
*/
void set_journal_head(journal_seq_t) final {}
- journal_seq_t get_journal_tail_target() const final { return dummy_tail; }
+ journal_seq_t get_dirty_tail() const final { return dummy_tail; }
+
+ journal_seq_t get_alloc_tail() const final { return dummy_tail; }
+
+ void update_journal_tails(journal_seq_t, journal_seq_t) final {}
const segment_info_t& get_seg_info(segment_id_t id) const final {
tmp_info = {};
void close_segment(segment_id_t) final {}
- void update_journal_tail_committed(journal_seq_t committed) final {}
-
void update_segment_avail_bytes(segment_type_t, paddr_t) final {}
void update_modify_time(segment_id_t, sea_time_point, std::size_t) final {}
SegmentManagerGroup* get_segment_manager_group() final { return sms.get(); }
- journal_seq_t get_dirty_extents_replay_from() const final {
- return dummy_tail;
- }
-
- journal_seq_t get_alloc_info_replay_from() const final {
- return dummy_tail;
- }
-
virtual void complete_commit(Transaction &t) {}
seastar::future<> submit_transaction(TransactionRef t)
{
*/
void set_journal_head(journal_seq_t) final {}
- journal_seq_t get_journal_tail_target() const final { return dummy_tail; }
+ journal_seq_t get_dirty_tail() const final { return dummy_tail; }
+
+ journal_seq_t get_alloc_tail() const final { return dummy_tail; }
+
+ void update_journal_tails(journal_seq_t, journal_seq_t) final {}
const segment_info_t& get_seg_info(segment_id_t id) const final {
tmp_info = {};
return tmp_info;
}
- journal_seq_t get_dirty_extents_replay_from() const final {
- return dummy_tail;
- }
-
- journal_seq_t get_alloc_info_replay_from() const final {
- return dummy_tail;
- }
-
segment_id_t allocate_segment(
segment_seq_t seq,
segment_type_t type,
void close_segment(segment_id_t) final {}
- void update_journal_tail_committed(journal_seq_t paddr) final {}
-
void update_segment_avail_bytes(segment_type_t, paddr_t) final {}
void update_modify_time(segment_id_t, sea_time_point, std::size_t) final {}