Extent Placement Manager will be using record too.
Signed-off-by: Xuehan Xu <xxhdx1985126@gmail.com>
<< ", physical_segment_id=" << header.physical_segment_id
<< ", journal_tail=" << header.journal_tail
<< ", segment_nonce=" << header.segment_nonce
+ << ", out-of-line=" << header.out_of_line
<< ")";
}
seq,
segment.get_segment_id(),
segment_provider->get_journal_tail_target(),
- current_segment_nonce};
+ current_segment_nonce,
+ false};
encode(header, bl);
bufferptr bp(
});
}
-ceph::bufferlist Journal::encode_record(
- record_size_t rsize,
- record_t &&record)
-{
- bufferlist data_bl;
- for (auto &i: record.extents) {
- data_bl.append(i.bl);
- }
-
- bufferlist bl;
- record_header_t header{
- rsize.mdlength,
- rsize.dlength,
- (uint32_t)record.deltas.size(),
- (uint32_t)record.extents.size(),
- current_segment_nonce,
- committed_to,
- data_bl.crc32c(-1)
- };
- encode(header, bl);
-
- auto metadata_crc_filler = bl.append_hole(sizeof(uint32_t));
-
- for (const auto &i: record.extents) {
- encode(extent_info_t(i), bl);
- }
- for (const auto &i: record.deltas) {
- encode(i, bl);
- }
- auto block_size = segment_manager.get_block_size();
- if (bl.length() % block_size != 0) {
- bl.append_zero(
- block_size - (bl.length() % block_size));
- }
- ceph_assert(bl.length() == rsize.mdlength);
-
-
- auto bliter = bl.cbegin();
- auto metadata_crc = bliter.crc32c(
- ceph::encoded_sizeof_bounded<record_header_t>(),
- -1);
- bliter += sizeof(checksum_t); /* crc hole again */
- metadata_crc = bliter.crc32c(
- bliter.get_remaining(),
- metadata_crc);
- ceph_le32 metadata_crc_le;
- metadata_crc_le = metadata_crc;
- metadata_crc_filler.copy_in(
- sizeof(checksum_t),
- reinterpret_cast<const char *>(&metadata_crc_le));
-
- bl.claim_append(data_bl);
- ceph_assert(bl.length() == (rsize.dlength + rsize.mdlength));
-
- return bl;
-}
-
bool Journal::validate_metadata(const bufferlist &bl)
{
auto bliter = bl.cbegin();
OrderingHandle &handle)
{
ceph::bufferlist to_write = encode_record(
- rsize, std::move(record));
+ rsize, std::move(record), segment_manager.get_block_size(),
+ committed_to, current_segment_nonce);
auto target = written_to;
assert((to_write.length() % segment_manager.get_block_size()) == 0);
written_to += to_write.length();
});
}
-Journal::record_size_t Journal::get_encoded_record_length(
- const record_t &record) const {
- extent_len_t metadata =
- (extent_len_t)ceph::encoded_sizeof_bounded<record_header_t>();
- metadata += sizeof(checksum_t) /* crc */;
- metadata += record.extents.size() *
- ceph::encoded_sizeof_bounded<extent_info_t>();
- extent_len_t data = 0;
- for (const auto &i: record.deltas) {
- metadata += ceph::encoded_sizeof(i);
- }
- for (const auto &i: record.extents) {
- data += i.bl.length();
- }
- metadata = p2roundup(metadata, (extent_len_t)segment_manager.get_block_size());
- return record_size_t{metadata, data};
-}
-
bool Journal::needs_roll(segment_off_t length) const
{
return length + written_to >
namespace crimson::os::seastore {
-using segment_nonce_t = uint32_t;
-
class SegmentProvider;
-/**
- * Segment header
- *
- * Every segment contains and encode segment_header_t in the first block.
- * Our strategy for finding the journal replay point is:
- * 1) Find the segment with the highest journal_segment_seq
- * 2) Replay starting at record located at that segment's journal_tail
- */
-struct segment_header_t {
- segment_seq_t journal_segment_seq;
- segment_id_t physical_segment_id; // debugging
-
- journal_seq_t journal_tail;
- segment_nonce_t segment_nonce;
-
- DENC(segment_header_t, v, p) {
- DENC_START(1, 1, p);
- denc(v.journal_segment_seq, p);
- denc(v.physical_segment_id, p);
- denc(v.journal_tail, p);
- denc(v.segment_nonce, p);
- DENC_FINISH(p);
- }
-};
-std::ostream &operator<<(std::ostream &out, const segment_header_t &header);
-
-struct record_header_t {
- // Fixed portion
- extent_len_t mdlength; // block aligned, length of metadata
- extent_len_t dlength; // block aligned, length of data
- uint32_t deltas; // number of deltas
- uint32_t extents; // number of extents
- segment_nonce_t segment_nonce;// nonce of containing segment
- segment_off_t committed_to; // records in this segment prior to committed_to
- // have been fully written
- checksum_t data_crc; // crc of data payload
-
-
- DENC(record_header_t, v, p) {
- DENC_START(1, 1, p);
- denc(v.mdlength, p);
- denc(v.dlength, p);
- denc(v.deltas, p);
- denc(v.extents, p);
- denc(v.segment_nonce, p);
- denc(v.committed_to, p);
- denc(v.data_crc, p);
- DENC_FINISH(p);
- }
-};
-
-struct extent_info_t {
- extent_types_t type = extent_types_t::NONE;
- laddr_t addr = L_ADDR_NULL;
- extent_len_t len = 0;
-
- extent_info_t() = default;
- extent_info_t(const extent_t &et)
- : type(et.type), addr(et.addr), len(et.bl.length()) {}
-
- DENC(extent_info_t, v, p) {
- DENC_START(1, 1, p);
- denc(v.type, p);
- denc(v.addr, p);
- denc(v.len, p);
- DENC_FINISH(p);
- }
-};
-std::ostream &operator<<(std::ostream &out, const extent_info_t &header);
-
/**
* Manages stream of atomically written records to a SegmentManager.
*/
OrderingHandle &handle
) {
assert(write_pipeline);
- auto rsize = get_encoded_record_length(record);
+ auto rsize = get_encoded_record_length(
+ record, segment_manager.get_block_size());
auto total = rsize.mdlength + rsize.dlength;
if (total > max_record_length()) {
auto &logger = crimson::get_logger(ceph_subsys_seastore);
initialize_segment_ertr::future<segment_seq_t> initialize_segment(
Segment &segment);
- struct record_size_t {
- extent_len_t mdlength = 0;
- extent_len_t dlength = 0;
-
- record_size_t(
- extent_len_t mdlength,
- extent_len_t dlength)
- : mdlength(mdlength), dlength(dlength) {}
- };
-
- /**
- * Return <mdlength, dlength> pair denoting length of
- * metadata and blocks respectively.
- */
- record_size_t get_encoded_record_length(
- const record_t &record) const;
-
- /// create encoded record bl
- ceph::bufferlist encode_record(
- record_size_t rsize,
- record_t &&record);
-
/// validate embedded metadata checksum
static bool validate_metadata(const bufferlist &bl);
using JournalRef = std::unique_ptr<Journal>;
}
-WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::segment_header_t)
-WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::record_header_t)
-WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::extent_info_t)
namespace crimson::os::seastore {
<< ")";
}
+extent_len_t get_encoded_record_raw_mdlength(
+ const record_t &record,
+ size_t block_size) {
+ extent_len_t metadata =
+ (extent_len_t)ceph::encoded_sizeof_bounded<record_header_t>();
+ metadata += sizeof(checksum_t) /* crc */;
+ metadata += record.extents.size() *
+ ceph::encoded_sizeof_bounded<extent_info_t>();
+ for (const auto &i: record.deltas) {
+ metadata += ceph::encoded_sizeof(i);
+ }
+ return metadata;
+}
+
+record_size_t get_encoded_record_length(
+ const record_t &record,
+ size_t block_size) {
+ extent_len_t metadata =
+ get_encoded_record_raw_mdlength(record, block_size);
+ extent_len_t data = 0;
+ for (const auto &i: record.extents) {
+ data += i.bl.length();
+ }
+ metadata = p2roundup(metadata, (extent_len_t)block_size);
+ return record_size_t{metadata, data};
+}
+
+ceph::bufferlist encode_record(
+ record_size_t rsize,
+ record_t &&record,
+ size_t block_size,
+ segment_off_t committed_to,
+ segment_nonce_t current_segment_nonce)
+{
+ bufferlist data_bl;
+ for (auto &i: record.extents) {
+ data_bl.append(i.bl);
+ }
+
+ bufferlist bl;
+ record_header_t header{
+ rsize.mdlength,
+ rsize.dlength,
+ (uint32_t)record.deltas.size(),
+ (uint32_t)record.extents.size(),
+ current_segment_nonce,
+ committed_to,
+ data_bl.crc32c(-1)
+ };
+ encode(header, bl);
+
+ auto metadata_crc_filler = bl.append_hole(sizeof(uint32_t));
+
+ for (const auto &i: record.extents) {
+ encode(extent_info_t(i), bl);
+ }
+ for (const auto &i: record.deltas) {
+ encode(i, bl);
+ }
+ if (bl.length() % block_size != 0) {
+ bl.append_zero(
+ block_size - (bl.length() % block_size));
+ }
+ ceph_assert(bl.length() == rsize.mdlength);
+
+
+ auto bliter = bl.cbegin();
+ auto metadata_crc = bliter.crc32c(
+ ceph::encoded_sizeof_bounded<record_header_t>(),
+ -1);
+ bliter += sizeof(checksum_t); /* crc hole again */
+ metadata_crc = bliter.crc32c(
+ bliter.get_remaining(),
+ metadata_crc);
+ ceph_le32 metadata_crc_le;
+ metadata_crc_le = metadata_crc;
+ metadata_crc_filler.copy_in(
+ sizeof(checksum_t),
+ reinterpret_cast<const char *>(&metadata_crc_le));
+
+ bl.claim_append(data_bl);
+ ceph_assert(bl.length() == (rsize.dlength + rsize.mdlength));
+
+ return bl;
+}
+
}
op_types_t op;
};
+struct extent_info_t {
+ extent_types_t type = extent_types_t::NONE;
+ laddr_t addr = L_ADDR_NULL;
+ extent_len_t len = 0;
+
+ extent_info_t() = default;
+ extent_info_t(const extent_t &et)
+ : type(et.type), addr(et.addr), len(et.bl.length()) {}
+
+ DENC(extent_info_t, v, p) {
+ DENC_START(1, 1, p);
+ denc(v.type, p);
+ denc(v.addr, p);
+ denc(v.len, p);
+ DENC_FINISH(p);
+ }
+};
+
+using segment_nonce_t = uint32_t;
+
+/**
+ * Segment header
+ *
+ * Every segment contains and encode segment_header_t in the first block.
+ * Our strategy for finding the journal replay point is:
+ * 1) Find the segment with the highest journal_segment_seq
+ * 2) Replay starting at record located at that segment's journal_tail
+ */
+struct segment_header_t {
+ segment_seq_t journal_segment_seq;
+ segment_id_t physical_segment_id; // debugging
+
+ journal_seq_t journal_tail;
+ segment_nonce_t segment_nonce;
+ bool out_of_line;
+
+ DENC(segment_header_t, v, p) {
+ DENC_START(1, 1, p);
+ denc(v.journal_segment_seq, p);
+ denc(v.physical_segment_id, p);
+ denc(v.journal_tail, p);
+ denc(v.segment_nonce, p);
+ denc(v.out_of_line, p);
+ DENC_FINISH(p);
+ }
+};
+std::ostream &operator<<(std::ostream &out, const segment_header_t &header);
+
+struct record_header_t {
+ // Fixed portion
+ extent_len_t mdlength; // block aligned, length of metadata
+ extent_len_t dlength; // block aligned, length of data
+ uint32_t deltas; // number of deltas
+ uint32_t extents; // number of extents
+ segment_nonce_t segment_nonce;// nonce of containing segment
+ segment_off_t committed_to; // records in this segment prior to committed_to
+ // have been fully written
+ checksum_t data_crc; // crc of data payload
+
+
+ DENC(record_header_t, v, p) {
+ DENC_START(1, 1, p);
+ denc(v.mdlength, p);
+ denc(v.dlength, p);
+ denc(v.deltas, p);
+ denc(v.extents, p);
+ denc(v.segment_nonce, p);
+ denc(v.committed_to, p);
+ denc(v.data_crc, p);
+ DENC_FINISH(p);
+ }
+};
+
+std::ostream &operator<<(std::ostream &out, const extent_info_t &header);
+
+struct record_size_t {
+ extent_len_t mdlength = 0;
+ extent_len_t dlength = 0;
+
+ record_size_t(
+ extent_len_t mdlength,
+ extent_len_t dlength)
+ : mdlength(mdlength), dlength(dlength) {}
+};
+
+extent_len_t get_encoded_record_raw_mdlength(
+ const record_t &record,
+ size_t block_size);
+
+/**
+ * Return <mdlength, dlength> pair denoting length of
+ * metadata and blocks respectively.
+ */
+record_size_t get_encoded_record_length(
+ const record_t &record,
+ size_t block_size);
+
+ceph::bufferlist encode_record(
+ record_size_t rsize,
+ record_t &&record,
+ size_t block_size,
+ segment_off_t committed_to,
+ segment_nonce_t current_segment_nonce = 0);
+
}
WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::seastore_meta_t)
WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::paddr_t)
WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::journal_seq_t)
WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::delta_info_t)
+WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::record_header_t)
+WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::extent_info_t)
+WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::segment_header_t)