SegmentedAllocator::Writer::write_iertr::future<>
SegmentedAllocator::Writer::_write(
Transaction& t,
- ool_record_t& record)
+ record_t&& record,
+ std::list<LogicalCachedExtentRef>&& extents)
{
LOG_PREFIX(SegmentedAllocator::Writer::_write);
- record.set_base(segment_allocator.get_written_to());
- auto record_size = record.get_encoded_record_length();
- bufferlist bl = record.encode(
- segment_allocator.get_segment_id(),
- segment_allocator.get_nonce());
+ assert(extents.size());
+ assert(extents.size() == record.extents.size());
+ assert(!record.deltas.size());
+ auto record_group = record_group_t(
+ std::move(record), segment_allocator.get_block_size());
+ auto record_size = record_group.size;
+ ceph::bufferlist bl = encode_records(
+ record_group,
+ JOURNAL_SEQ_NULL,
+ segment_allocator.get_nonce()); // 0
assert(bl.length() == record_size.get_encoded_length());
- DEBUGT(
- "written {} extents, {} bytes to segment {} at {}",
- t,
- record.get_num_extents(),
- bl.length(),
- segment_allocator.get_segment_id(),
- record.get_base());
+ DEBUGT("writing {} bytes to segment {}",
+ t, bl.length(), segment_allocator.get_segment_id());
// account transactional ool writes before write()
auto& stats = t.get_ool_write_stats();
- stats.extents.num += record.get_num_extents();
+ stats.extents.num += extents.size();
stats.extents.bytes += record_size.dlength;
stats.header_raw_bytes += record_size.get_raw_mdlength();
stats.header_bytes += record_size.get_mdlength();
stats.data_bytes += record_size.dlength;
stats.num_records += 1;
- for (auto& ool_extent : record.get_extents()) {
- auto& lextent = ool_extent.get_lextent();
- auto paddr = ool_extent.get_ool_paddr();
- TRACET("ool extent written at {} -- {}", t, *lextent, paddr);
- lextent->hint = placement_hint_t::NUM_HINTS; // invalidate hint
- t.mark_delayed_extent_ool(lextent, paddr);
- }
-
return trans_intr::make_interruptible(
- segment_allocator.write(bl).discard_result()
- );
+ segment_allocator.write(bl)
+ ).si_then([FNAME, record_size, &t,
+ extents=std::move(extents)](write_result_t wr) mutable {
+ assert(wr.start_seq.segment_seq == OOL_SEG_SEQ);
+ paddr_t extent_addr = wr.start_seq.offset;
+ extent_addr = extent_addr.as_seg_paddr().add_offset(
+ record_size.get_mdlength());
+ for (auto& extent : extents) {
+ TRACET("ool extent written at {} -- {}", t, *extent, extent_addr);
+ extent->hint = placement_hint_t::NUM_HINTS; // invalidate hint
+ t.mark_delayed_extent_ool(extent, extent_addr);
+ extent_addr = extent_addr.as_seg_paddr().add_offset(
+ extent->get_length());
+ }
+ assert(extent_addr == wr.get_end_seq().offset);
+ });
}
SegmentedAllocator::Writer::write_iertr::future<>
}
assert(segment_allocator.can_write());
- ool_record_t record(
- segment_allocator.get_block_size(),
- (t.get_src() == Transaction::src_t::MUTATE)
- ? record_commit_type_t::MODIFY
- : record_commit_type_t::REWRITE);
+ record_t record;
+ std::list<LogicalCachedExtentRef> pending_extents;
+
+ auto commit_time = seastar::lowres_system_clock::now();
+ record_commit_type_t commit_type;
+ if (t.get_src() == Transaction::src_t::MUTATE) {
+ commit_type = record_commit_type_t::MODIFY;
+ } else {
+ assert(t.get_src() == Transaction::src_t::CLEANER_TRIM ||
+ t.get_src() == Transaction::src_t::CLEANER_RECLAIM);
+ commit_type = record_commit_type_t::REWRITE;
+ }
+ record.commit_time = commit_time.time_since_epoch().count();
+ record.commit_type = commit_type;
+
for (auto it = extents.begin(); it != extents.end();) {
auto& extent = *it;
- auto wouldbe_length = record.get_wouldbe_encoded_record_length(extent);
+ record_size_t wouldbe_rsize = record.size;
+ wouldbe_rsize.account_extent(extent->get_bptr().length());
+ auto wouldbe_length = record_group_size_t(
+ wouldbe_rsize, segment_allocator.get_block_size()
+ ).get_encoded_length();
if (segment_allocator.needs_roll(wouldbe_length)) {
// reached the segment end, write and roll
assert(!roll_promise.has_value());
roll_promise = seastar::shared_promise<>();
- auto num_extents = record.get_num_extents();
+ auto num_extents = pending_extents.size();
DEBUGT("end of segment, writing {} extents", t, num_extents);
return (num_extents ?
- _write(t, record) :
+ _write(t, std::move(record), std::move(pending_extents)) :
write_iertr::now()
).si_then([this] {
return segment_allocator.roll();
return write_iertr::now();
});
}
+
DEBUGT("add extent to record -- {}", t, *extent);
+ if (commit_type == record_commit_type_t::MODIFY) {
+ extent->set_last_modified(commit_time);
+ } else {
+ assert(commit_type == record_commit_type_t::REWRITE);
+ extent->set_last_rewritten(commit_time);
+ }
+ ceph::bufferlist bl;
extent->prepare_write();
- record.add_extent(extent);
+ bl.append(extent->get_bptr());
+ assert(bl.length() == extent->get_length());
+ record.push_back(extent_t{
+ extent->get_type(),
+ extent->get_laddr(),
+ std::move(bl),
+ extent->get_last_modified().time_since_epoch().count()});
+ pending_extents.push_back(extent);
it = extents.erase(it);
}
- DEBUGT("writing {} extents", t, record.get_num_extents());
- return _write(t, record);
+ DEBUGT("writing {} extents", t, pending_extents.size());
+ return _write(t, std::move(record), std::move(pending_extents));
}
SegmentedAllocator::Writer::write_iertr::future<>
namespace crimson::os::seastore {
-/**
- * ool_record_t
- *
- * Encapsulates logic for building and encoding an ool record destined for
- * an ool segment.
- *
- * Uses a metadata header to enable scanning the ool segment for gc purposes.
- * Introducing a seperate physical->logical mapping would enable removing the
- * metadata block overhead.
- */
-class ool_record_t {
- class OolExtent {
- public:
- OolExtent(LogicalCachedExtentRef& lextent)
- : lextent(lextent) {}
-
- void set_ool_paddr(paddr_t addr) {
- ool_offset = addr;
- }
- paddr_t get_ool_paddr() const {
- return ool_offset;
- }
- bufferptr& get_bptr() {
- return lextent->get_bptr();
- }
- LogicalCachedExtentRef& get_lextent() {
- return lextent;
- }
- private:
- paddr_t ool_offset;
- LogicalCachedExtentRef lextent;
- };
-
-public:
- ool_record_t(
- size_t block_size,
- record_commit_type_t commit_type)
- : block_size(block_size),
- commit_type(commit_type) {}
- record_group_size_t get_encoded_record_length() {
- assert(extents.size() == record.extents.size());
- return record_group_size_t(record.size, block_size);
- }
- size_t get_wouldbe_encoded_record_length(LogicalCachedExtentRef& extent) {
- record_size_t rsize = record.size;
- rsize.account_extent(extent->get_bptr().length());
- return record_group_size_t(rsize, block_size).get_encoded_length();
- }
- ceph::bufferlist encode(segment_id_t segment,
- segment_nonce_t nonce) {
- assert(extents.size() == record.extents.size());
- assert(!record.deltas.size());
- auto commit_time = seastar::lowres_system_clock::now();
- record.commit_time = commit_time.time_since_epoch().count();
- record.commit_type = commit_type;
- auto record_group = record_group_t(std::move(record), block_size);
- seastore_off_t extent_offset = base + record_group.size.get_mdlength();
- for (auto& extent : extents) {
- extent.set_ool_paddr(
- paddr_t::make_seg_paddr(segment, extent_offset));
- if (commit_type == record_commit_type_t::MODIFY) {
- extent.get_lextent()->set_last_modified(commit_time);
- } else {
- assert(commit_type == record_commit_type_t::REWRITE);
- extent.get_lextent()->set_last_rewritten(commit_time);
- }
- extent_offset += extent.get_bptr().length();
- }
- assert(extent_offset ==
- (seastore_off_t)(base + record_group.size.get_encoded_length()));
- return encode_records(record_group, JOURNAL_SEQ_NULL, nonce);
- }
- void add_extent(LogicalCachedExtentRef& extent) {
- extents.emplace_back(extent);
- ceph::bufferlist bl;
- bl.append(extent->get_bptr());
- record.push_back(extent_t{
- extent->get_type(),
- extent->get_laddr(),
- std::move(bl),
- extent->get_last_modified().time_since_epoch().count()});
- }
- std::vector<OolExtent>& get_extents() {
- return extents;
- }
- void set_base(seastore_off_t b) {
- base = b;
- }
- seastore_off_t get_base() const {
- return base;
- }
- void clear() {
- record = {};
- extents.clear();
- base = MAX_SEG_OFF;
- }
- uint64_t get_num_extents() const {
- return extents.size();
- }
-
-private:
- std::vector<OolExtent> extents;
- record_t record;
- size_t block_size;
- seastore_off_t base = MAX_SEG_OFF;
- record_commit_type_t commit_type =
- record_commit_type_t::NONE;
-};
-
/**
* ExtentOolWriter
*
write_iertr::future<> _write(
Transaction& t,
- ool_record_t& record);
+ record_t&& record,
+ std::list<LogicalCachedExtentRef>&& extents);
journal::SegmentAllocator segment_allocator;
std::optional<seastar::shared_promise<>> roll_promise;