// Note: make sure no one can access the current segment once closing
auto seg_to_close = std::move(current_segment);
auto close_segment_id = seg_to_close->get_segment_id();
- segment_provider.close_segment(close_segment_id);
auto close_seg_info = segment_provider.get_seg_info(close_segment_id);
ceph_assert((close_seg_info.modify_time == NULL_TIME &&
close_seg_info.num_extents == 0) ||
bl.append(bp);
assert(bl.length() == sm_group.get_rounded_tail_length());
- return seg_to_close->write(
- sm_group.get_segment_size() - sm_group.get_rounded_tail_length(),
- bl
- ).safe_then([seg_to_close=std::move(seg_to_close)] {
- return seg_to_close->close();
+
+ auto p_seg_to_close = seg_to_close.get();
+ return p_seg_to_close->advance_wp(
+ sm_group.get_segment_size() - sm_group.get_rounded_tail_length()
+ ).safe_then([this, FNAME, bl=std::move(bl), p_seg_to_close]() mutable {
+ DEBUG("Writing tail info to segment {}", p_seg_to_close->get_segment_id());
+ return p_seg_to_close->write(
+ sm_group.get_segment_size() - sm_group.get_rounded_tail_length(),
+ std::move(bl));
+ }).safe_then([p_seg_to_close] {
+ return p_seg_to_close->close();
+ }).safe_then([this, seg_to_close=std::move(seg_to_close)] {
+ segment_provider.close_segment(seg_to_close->get_segment_id());
}).handle_error(
close_segment_ertr::pass_further{},
- crimson::ct_error::assert_all{
- "Invalid error in SegmentAllocator::close_segment"
- }
- );
+ crimson::ct_error::assert_all {
+ "Invalid error in SegmentAllocator::close_segment"
+ });
+
}
RecordBatch::add_pending_ret
virtual write_ertr::future<> write(
seastore_off_t offset, ceph::bufferlist bl) = 0;
+ /**
+ * advance_wp
+ *
+ * advance the segment write pointer,
+ * needed when writing at wp is strictly implemented. ex: ZNS backed segments
+ * @param offset: advance write pointer till the given offset
+ */
+ virtual write_ertr::future<> advance_wp(
+ seastore_off_t offset) = 0;
+
virtual ~Segment() {}
};
using SegmentRef = boost::intrusive_ptr<Segment>;
return manager.segment_write(paddr, bl);
}
+Segment::write_ertr::future<> BlockSegment::advance_wp(
+ seastore_off_t offset) {
+ return write_ertr::now();
+}
+
Segment::close_ertr::future<> BlockSegmentManager::segment_close(
segment_id_t id, seastore_off_t write_pointer)
{
seastore_off_t get_write_ptr() const final { return write_pointer; }
close_ertr::future<> close() final;
write_ertr::future<> write(seastore_off_t offset, ceph::bufferlist bl) final;
+ write_ertr::future<> advance_wp(seastore_off_t offset) final;
~BlockSegment() {}
};
return manager.segment_write(paddr_t::make_seg_paddr(id, offset), bl);
}
+Segment::write_ertr::future<> EphemeralSegment::advance_wp(
+ seastore_off_t offset)
+{
+ return write_ertr::now();
+}
+
Segment::close_ertr::future<> EphemeralSegmentManager::segment_close(segment_id_t id)
{
auto s_id = id.device_segment_id();
seastore_off_t get_write_ptr() const final { return write_pointer; }
close_ertr::future<> close() final;
write_ertr::future<> write(seastore_off_t offset, ceph::bufferlist bl) final;
+ write_ertr::future<> advance_wp(seastore_off_t offset) final;
~EphemeralSegment() {}
};
#define SECT_SHIFT 9
#define RESERVED_ZONES 1
+// limit the max padding buf size to 1MB
+#define MAX_PADDING_SIZE 1048576
namespace crimson::os::seastore::segment_manager::zns {
);
}
).safe_then([=] {
- DEBUG("segment open successful");
+ DEBUG("segment {}, open successful", id);
return open_ertr::future<SegmentRef>(
open_ertr::ready_future_marker{},
SegmentRef(new ZNSSegment(*this, id))
{
LOG_PREFIX(ZNSSegment::write);
if (offset != write_pointer || offset % manager.metadata.block_size != 0) {
- ERROR("Invalid segment write on segment {} to offset {}",
- id,
- offset);
+ ERROR("Segment offset and zone write pointer mismatch. "
+ "segment {} segment-offset {} write pointer {}",
+ id, offset, write_pointer);
return crimson::ct_error::invarg::make();
}
if (offset + bl.length() > manager.metadata.segment_size)
return manager.segment_write(paddr_t::make_seg_paddr(id, offset), bl);
}
+Segment::write_ertr::future<> ZNSSegment::write_padding_bytes(
+ size_t padding_bytes)
+{
+ LOG_PREFIX(ZNSSegment::write_padding_bytes);
+ DEBUG("Writing {} padding bytes to segment {} at wp {}",
+ padding_bytes, id, write_pointer);
+
+ return crimson::repeat([FNAME, padding_bytes, this] () mutable {
+ size_t bufsize = 0;
+ if (padding_bytes >= MAX_PADDING_SIZE) {
+ bufsize = MAX_PADDING_SIZE;
+ } else {
+ bufsize = padding_bytes;
+ }
+
+ padding_bytes -= bufsize;
+ bufferptr bp(ceph::buffer::create_page_aligned(bufsize));
+ bp.zero();
+ bufferlist padd_bl;
+ padd_bl.append(bp);
+ return write(write_pointer, padd_bl).safe_then([FNAME, padding_bytes, this]() {
+ if (padding_bytes == 0) {
+ return write_ertr::make_ready_future<seastar::stop_iteration>(seastar::stop_iteration::yes);
+ } else {
+ return write_ertr::make_ready_future<seastar::stop_iteration>(seastar::stop_iteration::no);
+ }
+ });
+ });
+}
+
+// Advance write pointer, to given offset.
+Segment::write_ertr::future<> ZNSSegment::advance_wp(
+ seastore_off_t offset)
+{
+ LOG_PREFIX(ZNSSegment::advance_wp);
+
+ DEBUG("Advancing write pointer from {} to {}", write_pointer, offset);
+ if (offset < write_pointer) {
+ return crimson::ct_error::invarg::make();
+ }
+
+ size_t padding_bytes = offset - write_pointer;
+
+ if (padding_bytes == 0) {
+ return write_ertr::now();
+ }
+
+ assert(padding_bytes % manager.metadata.block_size == 0);
+
+ return write_padding_bytes(padding_bytes);
+}
+
}
seastore_off_t get_write_ptr() const final { return write_pointer; }
close_ertr::future<> close() final;
write_ertr::future<> write(seastore_off_t offset, ceph::bufferlist bl) final;
+ write_ertr::future<> advance_wp(seastore_off_t offset) final;
~ZNSSegment() {}
private:
ZNSSegmentManager &manager;
const segment_id_t id;
seastore_off_t write_pointer = 0;
+ write_ertr::future<> write_padding_bytes(size_t padding_bytes);
};
class ZNSSegmentManager final : public SegmentManager{