From: Xuehan Xu Date: Fri, 4 Mar 2022 08:56:33 +0000 (+0800) Subject: crimson/os/seastore: track extents' last modification time X-Git-Tag: v18.0.0~1268^2~4 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=7d013677fa6b66a0882d31f3708f5734e94b787e;p=ceph.git crimson/os/seastore: track extents' last modification time Signed-off-by: Xuehan Xu --- diff --git a/src/crimson/os/seastore/cache.cc b/src/crimson/os/seastore/cache.cc index be848c60ce17..ab73db01f389 100644 --- a/src/crimson/os/seastore/cache.cc +++ b/src/crimson/os/seastore/cache.cc @@ -945,6 +945,11 @@ record_t Cache::prepare_record(Transaction &t) t.write_set.clear(); record_t record; + auto commit_time = seastar::lowres_system_clock::now(); + record.commit_time = commit_time.time_since_epoch().count(); + record.commit_type = (t.get_src() == Transaction::src_t::MUTATE) + ? record_commit_type_t::MODIFY + : record_commit_type_t::REWRITE; // Add new copy of mutated blocks, set_io_wait to block until written record.deltas.reserve(t.mutated_block_list.size()); @@ -967,6 +972,7 @@ record_t Cache::prepare_record(Transaction &t) i->prepare_write(); i->set_io_wait(); + i->set_last_modified(commit_time); assert(i->get_version() > 0); auto final_crc = i->get_crc32c(); if (i->get_type() == extent_types_t::ROOT) { @@ -1053,6 +1059,13 @@ record_t Cache::prepare_record(Transaction &t) ceph_assert(0 == "ROOT never gets written as a fresh block"); } + if (t.get_src() == Transaction::src_t::MUTATE) { + i->set_last_modified(commit_time); + } else { + assert(t.get_src() >= Transaction::src_t::CLEANER_TRIM); + i->set_last_rewritten(commit_time); + } + assert(bl.length() == i->get_length()); record.push_back(extent_t{ i->get_type(), @@ -1061,7 +1074,8 @@ record_t Cache::prepare_record(Transaction &t) : (is_lba_node(i->get_type()) ? i->cast()->get_node_meta().begin : L_ADDR_NULL), - std::move(bl) + std::move(bl), + i->get_last_modified().time_since_epoch().count() }); } @@ -1173,7 +1187,13 @@ void Cache::complete_commit( if (cleaner) { cleaner->mark_space_used( i->get_paddr(), - i->get_length()); + i->get_length(), + (t.get_src() == Transaction::src_t::MUTATE) + ? i->last_modified + : seastar::lowres_system_clock::time_point(), + (t.get_src() >= Transaction::src_t::CLEANER_TRIM) + ? i->last_rewritten + : seastar::lowres_system_clock::time_point()); } } }); @@ -1274,7 +1294,8 @@ Cache::replay_delta_ret Cache::replay_delta( journal_seq_t journal_seq, paddr_t record_base, - const delta_info_t &delta) + const delta_info_t &delta, + seastar::lowres_system_clock::time_point& last_modified) { LOG_PREFIX(Cache::replay_delta); if (delta.type == extent_types_t::ROOT) { @@ -1286,6 +1307,7 @@ Cache::replay_delta( root->state = CachedExtent::extent_state_t::DIRTY; DEBUG("replayed root delta at {} {}, add extent -- {}, root={}", journal_seq, record_base, delta, *root); + root->set_last_modified(last_modified); add_extent(root); return replay_delta_ertr::now(); } else { @@ -1336,6 +1358,7 @@ Cache::replay_delta( assert(extent->last_committed_crc == delta.prev_crc); extent->apply_delta_and_adjust_crc(record_base, delta.bl); + extent->set_last_modified(last_modified); assert(extent->last_committed_crc == delta.final_crc); extent->version++; diff --git a/src/crimson/os/seastore/cache.h b/src/crimson/os/seastore/cache.h index 3379d8e64849..57803a27085a 100644 --- a/src/crimson/os/seastore/cache.h +++ b/src/crimson/os/seastore/cache.h @@ -596,7 +596,8 @@ public: replay_delta_ret replay_delta( journal_seq_t seq, paddr_t record_block_base, - const delta_info_t &delta); + const delta_info_t &delta, + seastar::lowres_system_clock::time_point& last_modified); /** * init_cached_extents diff --git a/src/crimson/os/seastore/cached_extent.h b/src/crimson/os/seastore/cached_extent.h index 19dd03f13df3..f1063c1a02c1 100644 --- a/src/crimson/os/seastore/cached_extent.h +++ b/src/crimson/os/seastore/cached_extent.h @@ -100,7 +100,36 @@ class CachedExtent : public boost::intrusive_ref_counter< // Points at current version while in state MUTATION_PENDING CachedExtentRef prior_instance; + // time of the last modification + seastar::lowres_system_clock::time_point last_modified; + + // time of the last rewrite + seastar::lowres_system_clock::time_point last_rewritten; public: + + void set_last_modified(seastar::lowres_system_clock::duration d) { + last_modified = seastar::lowres_system_clock::time_point(d); + } + + void set_last_modified(seastar::lowres_system_clock::time_point t) { + last_modified = t; + } + + seastar::lowres_system_clock::time_point get_last_modified() const { + return last_modified; + } + + void set_last_rewritten(seastar::lowres_system_clock::duration d) { + last_rewritten = seastar::lowres_system_clock::time_point(d); + } + + void set_last_rewritten(seastar::lowres_system_clock::time_point t) { + last_rewritten = t; + } + + seastar::lowres_system_clock::time_point get_last_rewritten() const { + return last_rewritten; + } /** * duplicate_for_write * @@ -170,6 +199,8 @@ public: << ", type=" << get_type() << ", version=" << version << ", dirty_from_or_retired_at=" << dirty_from_or_retired_at + << ", last_modified=" << last_modified.time_since_epoch() + << ", last_rewritten=" << last_rewritten.time_since_epoch() << ", paddr=" << get_paddr() << ", length=" << get_length() << ", state=" << state diff --git a/src/crimson/os/seastore/extent_placement_manager.cc b/src/crimson/os/seastore/extent_placement_manager.cc index 7dc44e78b013..44f0172ad8c8 100644 --- a/src/crimson/os/seastore/extent_placement_manager.cc +++ b/src/crimson/os/seastore/extent_placement_manager.cc @@ -80,7 +80,11 @@ SegmentedAllocator::Writer::do_write( } assert(segment_allocator.can_write()); - ool_record_t record(segment_allocator.get_block_size()); + ool_record_t record( + segment_allocator.get_block_size(), + (t.get_src() == Transaction::src_t::MUTATE) + ? record_commit_type_t::MODIFY + : record_commit_type_t::REWRITE); for (auto it = extents.begin(); it != extents.end();) { auto& extent = *it; auto wouldbe_length = record.get_wouldbe_encoded_record_length(extent); diff --git a/src/crimson/os/seastore/extent_placement_manager.h b/src/crimson/os/seastore/extent_placement_manager.h index 7034618ade91..bd464abc3454 100644 --- a/src/crimson/os/seastore/extent_placement_manager.h +++ b/src/crimson/os/seastore/extent_placement_manager.h @@ -28,6 +28,7 @@ class ool_record_t { public: OolExtent(LogicalCachedExtentRef& lextent) : lextent(lextent) {} + void set_ool_paddr(paddr_t addr) { ool_offset = addr; } @@ -46,7 +47,11 @@ class ool_record_t { }; public: - ool_record_t(size_t block_size) : block_size(block_size) {} + ool_record_t( + size_t block_size, + record_commit_type_t commit_type) + : block_size(block_size), + commit_type(commit_type) {} record_group_size_t get_encoded_record_length() { assert(extents.size() == record.extents.size()); return record_group_size_t(record.size, block_size); @@ -60,11 +65,20 @@ public: segment_nonce_t nonce) { assert(extents.size() == record.extents.size()); assert(!record.deltas.size()); + auto commit_time = seastar::lowres_system_clock::now(); + record.commit_time = commit_time.time_since_epoch().count(); + record.commit_type = commit_type; auto record_group = record_group_t(std::move(record), block_size); seastore_off_t extent_offset = base + record_group.size.get_mdlength(); for (auto& extent : extents) { extent.set_ool_paddr( paddr_t::make_seg_paddr(segment, extent_offset)); + if (commit_type == record_commit_type_t::MODIFY) { + extent.get_lextent()->set_last_modified(commit_time); + } else { + assert(commit_type == record_commit_type_t::REWRITE); + extent.get_lextent()->set_last_rewritten(commit_time); + } extent_offset += extent.get_bptr().length(); } assert(extent_offset == @@ -78,7 +92,8 @@ public: record.push_back(extent_t{ extent->get_type(), extent->get_laddr(), - std::move(bl)}); + std::move(bl), + extent->get_last_modified().time_since_epoch().count()}); } std::vector& get_extents() { return extents; @@ -103,6 +118,8 @@ private: record_t record; size_t block_size; seastore_off_t base = MAX_SEG_OFF; + record_commit_type_t commit_type = + record_commit_type_t::NONE; }; /** diff --git a/src/crimson/os/seastore/extent_reader.cc b/src/crimson/os/seastore/extent_reader.cc index cbb62933e3b1..a2f3c10ab3a7 100644 --- a/src/crimson/os/seastore/extent_reader.cc +++ b/src/crimson/os/seastore/extent_reader.cc @@ -9,6 +9,48 @@ SET_SUBSYS(seastore_journal); namespace crimson::os::seastore { +ExtentReader::read_segment_tail_ret +ExtentReader::read_segment_tail(segment_id_t segment) +{ + auto& segment_manager = *segment_managers[segment.device_id()]; + return segment_manager.read( + paddr_t::make_seg_paddr( + segment, + segment_manager.get_segment_size() - + segment_manager.get_rounded_tail_length()), + segment_manager.get_rounded_tail_length() + ).handle_error( + read_segment_header_ertr::pass_further{}, + crimson::ct_error::assert_all{ + "Invalid error in ExtentReader::read_segment_tail" + } + ).safe_then([=, &segment_manager](bufferptr bptr) -> read_segment_tail_ret { + LOG_PREFIX(ExtentReader::read_segment_tail); + DEBUG("segment {} bptr size {}", segment, bptr.length()); + + segment_tail_t tail; + bufferlist bl; + bl.push_back(bptr); + + DEBUG("segment {} block crc {}", + segment, + bl.begin().crc32c(segment_manager.get_block_size(), 0)); + + auto bp = bl.cbegin(); + try { + decode(tail, bp); + } catch (ceph::buffer::error &e) { + DEBUG("segment {} unable to decode tail, skipping -- {}", + segment, e); + return crimson::ct_error::enodata::make(); + } + DEBUG("segment {} tail {}", segment, tail); + return read_segment_tail_ret( + read_segment_tail_ertr::ready_future_marker{}, + tail); + }); +} + ExtentReader::read_segment_header_ret ExtentReader::read_segment_header(segment_id_t segment) { @@ -82,7 +124,12 @@ ExtentReader::scan_extents_ret ExtentReader::scan_extents( for (auto& r: *maybe_record_extent_infos) { DEBUG("decoded {} extents", r.extent_infos.size()); for (const auto &i : r.extent_infos) { - extents->emplace_back(extent_offset, i); + extents->emplace_back( + extent_offset, + std::pair( + {r.header.commit_time, + r.header.commit_type}, + i)); auto& seg_addr = extent_offset.as_seg_paddr(); seg_addr.set_segment_off( seg_addr.get_segment_off() + i.len); diff --git a/src/crimson/os/seastore/extent_reader.h b/src/crimson/os/seastore/extent_reader.h index 697adb47aa85..ffd1ad4f42cd 100644 --- a/src/crimson/os/seastore/extent_reader.h +++ b/src/crimson/os/seastore/extent_reader.h @@ -34,6 +34,16 @@ public: segment_header_t>; read_segment_header_ret read_segment_header(segment_id_t segment); + using read_segment_tail_ertr = read_segment_header_ertr; + using read_segment_tail_ret = read_segment_tail_ertr::future< + segment_tail_t>; + read_segment_tail_ret read_segment_tail(segment_id_t segment); + + struct commit_info_t { + mod_time_point_t commit_time; + record_commit_type_t commit_type; + }; + /** * scan_extents * @@ -45,7 +55,8 @@ public: */ using scan_extents_cursor = scan_valid_records_cursor; using scan_extents_ertr = read_ertr::extend; - using scan_extents_ret_bare = std::list>; + using scan_extents_ret_bare = + std::list>>; using scan_extents_ret = scan_extents_ertr::future; scan_extents_ret scan_extents( scan_extents_cursor &cursor, diff --git a/src/crimson/os/seastore/journal.h b/src/crimson/os/seastore/journal.h index 904d794d4a23..8cbbb1f07042 100644 --- a/src/crimson/os/seastore/journal.h +++ b/src/crimson/os/seastore/journal.h @@ -79,7 +79,8 @@ public: using replay_ret = replay_ertr::future<>; using delta_handler_t = std::function< replay_ret(const record_locator_t&, - const delta_info_t&)>; + const delta_info_t&, + seastar::lowres_system_clock::time_point last_modified)>; virtual replay_ret replay( delta_handler_t &&delta_handler) = 0; diff --git a/src/crimson/os/seastore/journal/segment_allocator.cc b/src/crimson/os/seastore/journal/segment_allocator.cc index 7b04f6f0cf0a..938c2b2f843b 100644 --- a/src/crimson/os/seastore/journal/segment_allocator.cc +++ b/src/crimson/os/seastore/journal/segment_allocator.cc @@ -224,7 +224,11 @@ SegmentAllocator::close_segment(bool is_rolling) cur_segment_seq, close_segment_id, cur_journal_tail, - current_segment_nonce}; + current_segment_nonce, + segment_provider.get_last_modified( + close_segment_id).time_since_epoch().count(), + segment_provider.get_last_rewritten( + close_segment_id).time_since_epoch().count()}; ceph::bufferlist bl; encode(tail, bl); diff --git a/src/crimson/os/seastore/journal/segmented_journal.cc b/src/crimson/os/seastore/journal/segmented_journal.cc index ce677b55c9d8..fad6f0ee1dac 100644 --- a/src/crimson/os/seastore/journal/segmented_journal.cc +++ b/src/crimson/os/seastore/journal/segmented_journal.cc @@ -191,8 +191,10 @@ SegmentedJournal::replay_segment( [locator, this, FNAME, - &handler](delta_info_t& delta) + &handler](auto &p) { + auto& commit_time = p.first; + auto& delta = p.second; /* The journal may validly contain deltas for extents in * since released segments. We can detect those cases by * checking whether the segment in question currently has a @@ -216,7 +218,11 @@ SegmentedJournal::replay_segment( return replay_ertr::now(); } } - return handler(locator, delta); + return handler( + locator, + delta, + seastar::lowres_system_clock::time_point( + seastar::lowres_system_clock::duration(commit_time))); }); }); }); diff --git a/src/crimson/os/seastore/lba_manager/btree/lba_btree.cc b/src/crimson/os/seastore/lba_manager/btree/lba_btree.cc index 23826ff989fe..f1f1eee373c7 100644 --- a/src/crimson/os/seastore/lba_manager/btree/lba_btree.cc +++ b/src/crimson/os/seastore/lba_manager/btree/lba_btree.cc @@ -421,6 +421,7 @@ LBABtree::rewrite_lba_extent_ret LBABtree::rewrite_lba_extent( lba_extent.get_length(), nlba_extent->get_bptr().c_str()); nlba_extent->pin.set_range(nlba_extent->get_node_meta()); + nlba_extent->set_last_modified(lba_extent.get_last_modified()); /* This is a bit underhanded. Any relative addrs here must necessarily * be record relative as we are rewriting a dirty extent. Thus, we diff --git a/src/crimson/os/seastore/seastore_types.cc b/src/crimson/os/seastore/seastore_types.cc index f4b8e4cbb820..0b02091b598e 100644 --- a/src/crimson/os/seastore/seastore_types.cc +++ b/src/crimson/os/seastore/seastore_types.cc @@ -233,6 +233,18 @@ std::ostream &operator<<(std::ostream &out, const segment_header_t &header) << ")"; } +std::ostream &operator<<(std::ostream &out, const segment_tail_t &tail) +{ + return out << "segment_tail_t(" + << "segment_seq=" << tail.journal_segment_seq + << ", segment_id=" << tail.physical_segment_id + << ", journal_tail=" << tail.journal_tail + << ", segment_nonce=" << tail.segment_nonce + << ", last_modified=" << tail.last_modified + << ", last_rewritten=" << tail.last_rewritten + << ")"; +} + extent_len_t record_size_t::get_raw_mdlength() const { // empty record is allowed to submit @@ -365,6 +377,8 @@ ceph::bufferlist encode_records( record_header_t rheader{ (extent_len_t)r.deltas.size(), (extent_len_t)r.extents.size(), + r.commit_time, + r.commit_type }; encode(rheader, bl); } @@ -467,8 +481,6 @@ bool validate_records_data( return success; } -namespace { - std::optional> try_decode_record_headers( const record_group_header_t& header, @@ -492,8 +504,6 @@ try_decode_record_headers( return record_headers; } -} - std::optional > try_decode_extent_infos( const record_group_header_t& header, @@ -561,7 +571,8 @@ try_decode_deltas( result_iter->deltas.resize(r.header.deltas); for (auto& i: result_iter->deltas) { try { - decode(i, bliter); + decode(i.second, bliter); + i.first = r.header.commit_time; } catch (ceph::buffer::error &e) { journal_logger().debug( "try_decode_deltas: failed, " diff --git a/src/crimson/os/seastore/seastore_types.h b/src/crimson/os/seastore/seastore_types.h index b345a0c50956..e6ddbb221507 100644 --- a/src/crimson/os/seastore/seastore_types.h +++ b/src/crimson/os/seastore/seastore_types.h @@ -10,6 +10,8 @@ #include #include +#include + #include "include/byteorder.h" #include "include/denc.h" #include "include/buffer.h" @@ -882,11 +884,23 @@ constexpr bool is_lba_node(extent_types_t type) type == extent_types_t::LADDR_LEAF; } +std::ostream &operator<<(std::ostream &out, extent_types_t t); + +enum class record_commit_type_t : uint8_t { + NONE, + MODIFY, + REWRITE +}; + +// type for extent modification time, milliseconds since the epoch +using mod_time_point_t = int64_t; + /* description of a new physical extent */ struct extent_t { extent_types_t type; ///< type of extent laddr_t addr; ///< laddr of extent (L_ADDR_NULL for non-logical) ceph::bufferlist bl; ///< payload, bl.length() == length, aligned + mod_time_point_t last_modified; }; using extent_version_t = uint32_t; @@ -1244,16 +1258,21 @@ struct extent_info_t { extent_types_t type = extent_types_t::NONE; laddr_t addr = L_ADDR_NULL; extent_len_t len = 0; + mod_time_point_t last_modified; extent_info_t() = default; extent_info_t(const extent_t &et) - : type(et.type), addr(et.addr), len(et.bl.length()) {} + : type(et.type), addr(et.addr), + len(et.bl.length()), + last_modified(et.last_modified) + {} DENC(extent_info_t, v, p) { DENC_START(1, 1, p); denc(v.type, p); denc(v.addr, p); denc(v.len, p); + denc(v.last_modified, p); DENC_FINISH(p); } }; @@ -1291,7 +1310,27 @@ struct segment_header_t { }; std::ostream &operator<<(std::ostream &out, const segment_header_t &header); -using segment_tail_t = segment_header_t; +struct segment_tail_t { + segment_seq_t journal_segment_seq; + segment_id_t physical_segment_id; // debugging + + journal_seq_t journal_tail; + segment_nonce_t segment_nonce; + mod_time_point_t last_modified; + mod_time_point_t last_rewritten; + + DENC(segment_tail_t, v, p) { + DENC_START(1, 1, p); + denc(v.journal_segment_seq, p); + denc(v.physical_segment_id, p); + denc(v.journal_tail, p); + denc(v.segment_nonce, p); + denc(v.last_modified, p); + denc(v.last_rewritten, p); + DENC_FINISH(p); + } +}; +std::ostream &operator<<(std::ostream &out, const segment_tail_t &tail); struct record_size_t { extent_len_t plain_mdlength = 0; // mdlength without the record header @@ -1319,6 +1358,8 @@ struct record_t { std::vector extents; std::vector deltas; record_size_t size; + mod_time_point_t commit_time; + record_commit_type_t commit_type; record_t() = default; record_t(std::vector&& _extents, @@ -1361,12 +1402,15 @@ std::ostream &operator<<(std::ostream&, const record_t&); struct record_header_t { uint32_t deltas; // number of deltas uint32_t extents; // number of extents - + mod_time_point_t commit_time = 0; + record_commit_type_t commit_type; DENC(record_header_t, v, p) { DENC_START(1, 1, p); denc(v.deltas, p); denc(v.extents, p); + denc(v.commit_time, p); + denc(v.commit_type, p); DENC_FINISH(p); } }; @@ -1504,10 +1548,15 @@ std::optional > try_decode_extent_infos( const record_group_header_t& header, const ceph::bufferlist& md_bl); +std::optional> +try_decode_record_headers( + const record_group_header_t& header, + const ceph::bufferlist& md_bl); struct record_deltas_t { paddr_t record_block_base; - std::vector deltas; + // the mod time here can only be modification time, not rewritten time + std::vector> deltas; }; std::optional > try_decode_deltas( @@ -1660,6 +1709,7 @@ WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::record_group_header_t) WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::extent_info_t) WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::segment_header_t) WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::rbm_alloc_delta_t) +WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::segment_tail_t) template<> struct denc_traits { diff --git a/src/crimson/os/seastore/segment_cleaner.cc b/src/crimson/os/seastore/segment_cleaner.cc index 54eabedd5fc1..3fbcaa4571b2 100644 --- a/src/crimson/os/seastore/segment_cleaner.cc +++ b/src/crimson/os/seastore/segment_cleaner.cc @@ -402,7 +402,10 @@ SegmentCleaner::gc_reclaim_space_ret SegmentCleaner::gc_reclaim_space() return trans_intr::do_for_each( extents, [this, &t](auto &extent) { - auto &[addr, info] = extent; + auto &addr = extent.first; + auto commit_time = extent.second.first.commit_time; + auto commit_type = extent.second.first.commit_type; + auto &info = extent.second.second; logger().debug( "SegmentCleaner::gc_reclaim_space: checking extent {}", info); @@ -412,7 +415,8 @@ SegmentCleaner::gc_reclaim_space_ret SegmentCleaner::gc_reclaim_space() addr, info.addr, info.len - ).si_then([addr=addr, &t, this](CachedExtentRef ext) { + ).si_then([&info, commit_type, commit_time, addr=addr, &t, this] + (CachedExtentRef ext) { if (!ext) { logger().debug( "SegmentCleaner::gc_reclaim_space: addr {} dead, skipping", @@ -423,6 +427,34 @@ SegmentCleaner::gc_reclaim_space_ret SegmentCleaner::gc_reclaim_space() "SegmentCleaner::gc_reclaim_space: addr {} alive, gc'ing {}", addr, *ext); + assert(commit_time); + assert(info.last_modified); + assert(commit_type == record_commit_type_t::MODIFY + || commit_type == record_commit_type_t::REWRITE); + if (ext->get_last_modified() == + seastar::lowres_system_clock::time_point()) { + assert(ext->get_last_rewritten() == + seastar::lowres_system_clock::time_point()); + ext->set_last_modified( + seastar::lowres_system_clock::duration( + info.last_modified)); + } + if (commit_type == record_commit_type_t::REWRITE + && ext->get_last_rewritten() == + seastar::lowres_system_clock::time_point()) { + ext->set_last_rewritten( + seastar::lowres_system_clock::duration( + commit_time)); + } + + assert( + (commit_type == record_commit_type_t::MODIFY + && commit_time <= + ext->get_last_modified().time_since_epoch().count()) + || (commit_type == record_commit_type_t::REWRITE + && commit_time == + ext->get_last_rewritten().time_since_epoch().count())); + return ecb->rewrite_extent( t, ext); @@ -479,37 +511,147 @@ SegmentCleaner::mount_ret SegmentCleaner::mount( register_metrics(); logger().debug("SegmentCleaner::mount: {} segments", segments.size()); - return crimson::do_for_each( - segments.begin(), - segments.end(), - [this](auto& it) { - auto segment_id = it.first; - return scanner->read_segment_header( - segment_id - ).safe_then([segment_id, this](auto header) { - logger().debug( - "ExtentReader::mount: segment_id={} -- {}", - segment_id, header); - auto s_type = header.get_type(); - if (s_type == segment_type_t::NULL_SEG) { - logger().error( - "ExtentReader::mount: got null segment, segment_id={} -- {}", + return seastar::do_with( + std::vector>(), + [this](auto& segment_set) { + return crimson::do_for_each( + segments.begin(), + segments.end(), + [this, &segment_set](auto& it) { + auto segment_id = it.first; + return scanner->read_segment_header( + segment_id + ).safe_then([segment_id, this, &segment_set](auto header) { + logger().debug( + "ExtentReader::mount: segment_id={} -- {}", segment_id, header); - ceph_abort(); - } - init_mark_segment_closed( - segment_id, - header.journal_segment_seq); - }).handle_error( - crimson::ct_error::enoent::handle([](auto) { - return mount_ertr::now(); - }), - crimson::ct_error::enodata::handle([](auto) { - return mount_ertr::now(); + auto s_type = header.get_type(); + if (s_type == segment_type_t::NULL_SEG) { + logger().error( + "ExtentReader::mount: got null segment, segment_id={} -- {}", + segment_id, header); + ceph_abort(); + } + return scanner->read_segment_tail( + segment_id + ).safe_then([this, segment_id, &segment_set, header](auto tail) + -> scan_extents_ertr::future<> { + if (tail.segment_nonce != header.segment_nonce) { + return scan_nonfull_segment(header, segment_set, segment_id); + } + seastar::lowres_system_clock::time_point last_modified( + seastar::lowres_system_clock::duration(tail.last_modified)); + seastar::lowres_system_clock::time_point last_rewritten( + seastar::lowres_system_clock::duration(tail.last_rewritten)); + if (segments[segment_id].last_modified < last_modified) { + segments[segment_id].last_modified = last_modified; + } + if (segments[segment_id].last_rewritten < last_rewritten) { + segments[segment_id].last_rewritten = last_rewritten; + } + init_mark_segment_closed( + segment_id, + header.journal_segment_seq); + return seastar::now(); + }).handle_error( + crimson::ct_error::enodata::handle( + [this, header, segment_id, &segment_set](auto) { + return scan_nonfull_segment(header, segment_set, segment_id); + }), + crimson::ct_error::pass_further_all{} + ); + }).handle_error( + crimson::ct_error::enoent::handle([](auto) { + return mount_ertr::now(); + }), + crimson::ct_error::enodata::handle([](auto) { + return mount_ertr::now(); + }), + crimson::ct_error::input_output_error::pass_further{}, + crimson::ct_error::assert_all{"unexpected error"} + ); + }); + }); +} + +SegmentCleaner::scan_extents_ret SegmentCleaner::scan_nonfull_segment( + const segment_header_t& header, + scan_extents_ret_bare& segment_set, + segment_id_t segment_id) +{ + if (header.get_type() == segment_type_t::OOL) { + logger().info( + "ExtentReader::init_segments: out-of-line segment {}", + segment_id); + return seastar::do_with( + scan_valid_records_cursor({ + segments[segment_id].journal_segment_seq, + paddr_t::make_seg_paddr(segment_id, 0)}), + [this, segment_id, header](auto& cursor) { + return seastar::do_with( + ExtentReader::found_record_handler_t([this, segment_id]( + record_locator_t locator, + const record_group_header_t& header, + const bufferlist& mdbuf + ) mutable -> ExtentReader::scan_valid_records_ertr::future<> { + LOG_PREFIX(SegmentCleaner::scan_nonfull_segment); + DEBUG("decodeing {} records", header.records); + auto maybe_headers = try_decode_record_headers(header, mdbuf); + if (!maybe_headers) { + ERROR("unable to decode record headers for record group {}", + locator.record_block_base); + return crimson::ct_error::input_output_error::make(); + } + + for (auto& header : *maybe_headers) { + mod_time_point_t ctime = header.commit_time; + auto commit_type = header.commit_type; + if (!ctime) { + ERROR("Scanner::init_segments: extent {} 0 commit_time", + ctime); + ceph_abort("0 commit_time"); + } + seastar::lowres_system_clock::time_point commit_time{ + seastar::lowres_system_clock::duration(ctime)}; + assert(commit_type == record_commit_type_t::MODIFY + || commit_type == record_commit_type_t::REWRITE); + if (commit_type == record_commit_type_t::MODIFY + && this->segments[segment_id].last_modified < commit_time) { + this->segments[segment_id].last_modified = commit_time; + } + if (commit_type == record_commit_type_t::REWRITE + && this->segments[segment_id].last_rewritten < commit_time) { + this->segments[segment_id].last_rewritten = commit_time; + } + } + return seastar::now(); }), - crimson::ct_error::input_output_error::pass_further{} + [&cursor, header, segment_id, this](auto& handler) { + return scanner->scan_valid_records( + cursor, + header.segment_nonce, + segments[segment_id.device_id()]->segment_size, + handler); + } ); + }).safe_then([this, segment_id, header](auto) { + init_mark_segment_closed( + segment_id, + header.journal_segment_seq); + return seastar::now(); }); + } else if (header.get_type() == segment_type_t::JOURNAL) { + logger().info( + "ExtentReader::init_segments: journal segment {}", + segment_id); + segment_set.emplace_back(std::make_pair(segment_id, std::move(header))); + } else { + ceph_abort("unexpected segment type"); + } + init_mark_segment_closed( + segment_id, + header.journal_segment_seq); + return seastar::now(); } } diff --git a/src/crimson/os/seastore/segment_cleaner.h b/src/crimson/os/seastore/segment_cleaner.h index 771cbbbcd63b..4f0596f9edcb 100644 --- a/src/crimson/os/seastore/segment_cleaner.h +++ b/src/crimson/os/seastore/segment_cleaner.h @@ -58,6 +58,9 @@ class segment_info_set_t { // Will be non-null for any segments in the current journal segment_seq_t journal_segment_seq = NULL_SEG_SEQ; + seastar::lowres_system_clock::time_point last_modified; + seastar::lowres_system_clock::time_point last_rewritten; + segment_type_t get_type() const { return segment_seq_to_type(journal_segment_seq); } @@ -294,6 +297,12 @@ public: virtual segment_seq_t get_seq(segment_id_t id) { return 0; } + virtual seastar::lowres_system_clock::time_point get_last_modified( + segment_id_t id) const = 0; + + virtual seastar::lowres_system_clock::time_point get_last_rewritten( + segment_id_t id) const = 0; + virtual void update_segment_avail_bytes(paddr_t offset) = 0; virtual ~SegmentProvider() {} @@ -763,6 +772,10 @@ public: void mark_space_used( paddr_t addr, extent_len_t len, + seastar::lowres_system_clock::time_point last_modified + = seastar::lowres_system_clock::time_point(), + seastar::lowres_system_clock::time_point last_rewritten + = seastar::lowres_system_clock::time_point(), bool init_scan = false) { auto& seg_addr = addr.as_seg_paddr(); assert(seg_addr.get_segment_id().device_id() == @@ -782,10 +795,30 @@ public: auto new_usage = space_tracker->calc_utilization(seg_addr.get_segment_id()); adjust_segment_util(old_usage, new_usage); + // use the last extent's last modified time for the calculation of the projected + // time the segments' live extents are to stay unmodified; this is an approximation + // of the sprite lfs' segment "age". + + if (last_modified > segments[seg_addr.get_segment_id()].last_modified) + segments[seg_addr.get_segment_id()].last_modified = last_modified; + + if (last_rewritten > segments[seg_addr.get_segment_id()].last_rewritten) + segments[seg_addr.get_segment_id()].last_rewritten = last_rewritten; + gc_process.maybe_wake_on_space_used(); assert(ret > 0); } + seastar::lowres_system_clock::time_point get_last_modified( + segment_id_t id) const final { + return segments[id].last_modified; + } + + seastar::lowres_system_clock::time_point get_last_rewritten( + segment_id_t id) const final { + return segments[id].last_rewritten; + } + void mark_space_free( paddr_t addr, extent_len_t len) { @@ -1224,6 +1257,15 @@ private: } } + using scan_extents_ret_bare = + std::vector>; + using scan_extents_ertr = ExtentReader::scan_extents_ertr; + using scan_extents_ret = scan_extents_ertr::future<>; + scan_extents_ret scan_nonfull_segment( + const segment_header_t& header, + scan_extents_ret_bare& segment_set, + segment_id_t segment_id); + /** * gc_should_reclaim_space * diff --git a/src/crimson/os/seastore/transaction_manager.cc b/src/crimson/os/seastore/transaction_manager.cc index 00c2863a92f4..e38c1ee9e057 100644 --- a/src/crimson/os/seastore/transaction_manager.cc +++ b/src/crimson/os/seastore/transaction_manager.cc @@ -90,14 +90,15 @@ TransactionManager::mount_ertr::future<> TransactionManager::mount() scanner.get_segment_managers() ).safe_then([this] { return journal->replay( - [this](const auto &offsets, const auto &e) { + [this](const auto &offsets, const auto &e, auto last_modified) { auto start_seq = offsets.write_result.start_seq; segment_cleaner->update_journal_tail_target( cache->get_oldest_dirty_from().value_or(start_seq)); return cache->replay_delta( start_seq, offsets.record_block_base, - e); + e, + last_modified); }); }).safe_then([this] { return journal->open_for_write(); @@ -127,6 +128,8 @@ TransactionManager::mount_ertr::future<> TransactionManager::mount() segment_cleaner->mark_space_used( addr, len , + seastar::lowres_system_clock::time_point(), + seastar::lowres_system_clock::time_point(), /* init_scan = */ true); } }); @@ -409,6 +412,7 @@ TransactionManager::rewrite_logical_extent( nlextent->get_bptr().c_str()); nlextent->set_laddr(lextent->get_laddr()); nlextent->set_pin(lextent->get_pin().duplicate()); + nlextent->last_modified = lextent->last_modified; DEBUGT("rewriting extent -- {} to {}", t, *lextent, *nlextent); diff --git a/src/test/crimson/seastore/test_btree_lba_manager.cc b/src/test/crimson/seastore/test_btree_lba_manager.cc index a92e2926656f..20342c29c788 100644 --- a/src/test/crimson/seastore/test_btree_lba_manager.cc +++ b/src/test/crimson/seastore/test_btree_lba_manager.cc @@ -41,6 +41,15 @@ struct btree_test_base : btree_test_base() = default; + seastar::lowres_system_clock::time_point get_last_modified( + segment_id_t id) const final { + return seastar::lowres_system_clock::time_point(); + } + + seastar::lowres_system_clock::time_point get_last_rewritten( + segment_id_t id) const final { + return seastar::lowres_system_clock::time_point(); + } void update_segment_avail_bytes(paddr_t offset) final {} segment_id_t get_segment(device_id_t id, segment_seq_t seq) final { diff --git a/src/test/crimson/seastore/test_seastore_journal.cc b/src/test/crimson/seastore/test_seastore_journal.cc index 65ff64c5216a..79eb1931851f 100644 --- a/src/test/crimson/seastore/test_seastore_journal.cc +++ b/src/test/crimson/seastore/test_seastore_journal.cc @@ -82,6 +82,16 @@ struct journal_test_t : seastar_test_suite_t, SegmentProvider { journal_test_t() = default; + seastar::lowres_system_clock::time_point get_last_modified( + segment_id_t id) const final { + return seastar::lowres_system_clock::time_point(); + } + + seastar::lowres_system_clock::time_point get_last_rewritten( + segment_id_t id) const final { + return seastar::lowres_system_clock::time_point(); + } + void update_segment_avail_bytes(paddr_t offset) final {} segment_id_t get_segment(device_id_t id, segment_seq_t seq) final { @@ -157,7 +167,7 @@ struct journal_test_t : seastar_test_suite_t, SegmentProvider { replay( [&advance, &delta_checker] - (const auto &offsets, const auto &di) mutable { + (const auto &offsets, const auto &di, auto t) mutable { if (!delta_checker) { EXPECT_FALSE("No Deltas Left"); } @@ -193,7 +203,10 @@ struct journal_test_t : seastar_test_suite_t, SegmentProvider { char contents = distribution(generator); bufferlist bl; bl.append(buffer::ptr(buffer::create(blocks * block_size, contents))); - return extent_t{extent_types_t::TEST_BLOCK, L_ADDR_NULL, bl}; + return extent_t{ + extent_types_t::TEST_BLOCK, + L_ADDR_NULL, + bl}; } delta_info_t generate_delta(size_t bytes) {