using context_t = ObjectDataHandler::context_t;
using get_iertr = ObjectDataHandler::write_iertr;
-/**
- * extent_to_write_t
- *
- * Encapsulates smallest write operations in overwrite.
- * Indicates a zero/existing extent or a data extent based on whether
- * to_write is populate.
- * Should be handled by prepare_ops_list.
- */
-struct extent_to_write_t {
- enum class type_t {
- DATA,
- ZERO,
- EXISTING,
- };
- type_t type;
-
- /// pin of original extent, not std::nullopt if type == EXISTING
- std::optional<LBAMapping> pin;
-
- laddr_t addr;
- extent_len_t len;
-
- /// non-nullopt if and only if type == DATA
- std::optional<bufferlist> to_write;
-
- extent_to_write_t(const extent_to_write_t &) = delete;
- extent_to_write_t(extent_to_write_t &&) = default;
- extent_to_write_t& operator=(const extent_to_write_t&) = delete;
- extent_to_write_t& operator=(extent_to_write_t&&) = default;
-
- bool is_data() const {
- return type == type_t::DATA;
- }
-
- bool is_zero() const {
- return type == type_t::ZERO;
- }
-
- bool is_existing() const {
- return type == type_t::EXISTING;
- }
-
- laddr_t get_end_addr() const {
- return (addr + len).checked_to_laddr();
- }
-
- static extent_to_write_t create_data(
- laddr_t addr, bufferlist to_write) {
- return extent_to_write_t(addr, to_write);
- }
-
- static extent_to_write_t create_zero(
- laddr_t addr, extent_len_t len) {
- return extent_to_write_t(addr, len);
- }
-
- static extent_to_write_t create_existing(
- LBAMapping &&pin, laddr_t addr, extent_len_t len) {
- return extent_to_write_t(std::move(pin), addr, len);
- }
-
-private:
- extent_to_write_t(laddr_t addr, bufferlist to_write)
- : type(type_t::DATA), addr(addr), len(to_write.length()),
- to_write(to_write) {}
-
- extent_to_write_t(laddr_t addr, extent_len_t len)
- : type(type_t::ZERO), addr(addr), len(len) {}
-
- extent_to_write_t(LBAMapping &&pin, laddr_t addr, extent_len_t len)
- : type(type_t::EXISTING), pin(std::move(pin)), addr(addr), len(len) {}
-};
-using extent_to_write_list_t = std::list<extent_to_write_t>;
-
-// Encapsulates extents to be written out using do_remappings.
-struct extent_to_remap_t {
- enum class type_t {
- REMAP1,
- REMAP2,
- OVERWRITE
- };
- type_t type;
- /// pin of original extent
- LBAMapping pin;
- /// offset of remapped extent or overwrite part of overwrite extent.
- /// overwrite part of overwrite extent might correspond to mutiple
- /// fresh write extent.
- extent_len_t new_offset;
- /// length of remapped extent or overwrite part of overwrite extent
- extent_len_t new_len;
-
- extent_to_remap_t(const extent_to_remap_t &) = delete;
- extent_to_remap_t(extent_to_remap_t &&) = default;
-
- bool is_remap1() const {
- return type == type_t::REMAP1;
- }
-
- bool is_remap2() const {
- assert((new_offset != 0) && (pin.get_length() != new_offset + new_len));
- return type == type_t::REMAP2;
- }
-
- bool is_overwrite() const {
- return type == type_t::OVERWRITE;
- }
-
- using remap_entry_t = TransactionManager::remap_entry_t;
- remap_entry_t create_remap_entry() {
- assert(is_remap1());
- return remap_entry_t(
- new_offset,
- new_len);
- }
-
- remap_entry_t create_left_remap_entry() {
- assert(is_remap2());
- return remap_entry_t(
- 0,
- new_offset);
- }
-
- remap_entry_t create_right_remap_entry() {
- assert(is_remap2());
- return remap_entry_t(
- new_offset + new_len,
- pin.get_length() - new_offset - new_len);
- }
-
- static extent_to_remap_t create_remap1(
- LBAMapping &&pin, extent_len_t new_offset, extent_len_t new_len) {
- return extent_to_remap_t(type_t::REMAP1,
- std::move(pin), new_offset, new_len);
- }
-
- static extent_to_remap_t create_remap2(
- LBAMapping &&pin, extent_len_t new_offset, extent_len_t new_len) {
- return extent_to_remap_t(type_t::REMAP2,
- std::move(pin), new_offset, new_len);
- }
-
- static extent_to_remap_t create_overwrite(
- extent_len_t new_offset, extent_len_t new_len, LBAMapping p,
- bufferlist b) {
- auto key = p.get_key();
- auto len = p.get_length();
- return extent_to_remap_t(type_t::OVERWRITE, std::move(p),
- new_offset, new_len, key, len, b);
- }
-
- laddr_t laddr_start;
- extent_len_t length;
- std::optional<bufferlist> bl;
-
-private:
- extent_to_remap_t(type_t type,
- LBAMapping &&pin, extent_len_t new_offset, extent_len_t new_len)
- : type(type),
- pin(std::move(pin)), new_offset(new_offset), new_len(new_len) {}
- extent_to_remap_t(type_t type, LBAMapping &&pin,
- extent_len_t new_offset, extent_len_t new_len,
- laddr_t ori_laddr, extent_len_t ori_len, std::optional<bufferlist> b)
- : type(type), pin(std::move(pin)),
- new_offset(new_offset), new_len(new_len),
- laddr_start(ori_laddr), length(ori_len), bl(b) {}
-};
-using extent_to_remap_list_t = std::list<extent_to_remap_t>;
-
-// Encapsulates extents to be written out using do_insertions.
-struct extent_to_insert_t {
- enum class type_t {
- DATA,
- ZERO
- };
- type_t type;
- /// laddr of new extent
- laddr_t addr;
- /// length of new extent
- extent_len_t len;
- /// non-nullopt if type == DATA
- std::optional<bufferlist> bl;
-
- extent_to_insert_t(const extent_to_insert_t &) = default;
- extent_to_insert_t(extent_to_insert_t &&) = default;
-
- bool is_data() const {
- return type == type_t::DATA;
- }
-
- bool is_zero() const {
- return type == type_t::ZERO;
- }
-
- static extent_to_insert_t create_data(
- laddr_t addr, extent_len_t len, std::optional<bufferlist> bl) {
- return extent_to_insert_t(addr, len, bl);
- }
-
- static extent_to_insert_t create_zero(
- laddr_t addr, extent_len_t len) {
- return extent_to_insert_t(addr, len);
- }
-
-private:
- extent_to_insert_t(laddr_t addr, extent_len_t len,
- std::optional<bufferlist> bl)
- :type(type_t::DATA), addr(addr), len(len), bl(bl) {}
-
- extent_to_insert_t(laddr_t addr, extent_len_t len)
- :type(type_t::ZERO), addr(addr), len(len) {}
-};
-using extent_to_insert_list_t = std::list<extent_to_insert_t>;
-
-// Encapsulates extents to be retired in do_removals.
-using extent_to_remove_list_t = std::list<LBAMapping>;
-
-struct overwrite_ops_t {
- extent_to_remap_list_t to_remap;
- extent_to_insert_list_t to_insert;
- extent_to_remove_list_t to_remove;
-};
-
-// prepare to_remap, to_retire, to_insert list
-overwrite_ops_t prepare_ops_list(
- lba_mapping_list_t &pins_to_remove,
- extent_to_write_list_t &to_write,
- size_t delta_based_overwrite_max_extent_size) {
- assert(pins_to_remove.size() != 0);
- overwrite_ops_t ops;
- ops.to_remove.swap(pins_to_remove);
- if (to_write.empty()) {
- logger().debug("empty to_write");
- return ops;
- }
- long unsigned int visitted = 0;
- auto& front = to_write.front();
- auto& back = to_write.back();
-
- // prepare overwrite, happens in one original extent.
- if (ops.to_remove.size() == 1 &&
- front.is_existing() && back.is_existing()) {
- visitted += 2;
- assert(to_write.size() > 2);
- assert(front.pin);
- assert(front.addr == front.pin->get_key());
- assert(back.addr > back.pin->get_key());
- ops.to_remap.push_back(extent_to_remap_t::create_remap2(
- std::move(*front.pin),
- front.len,
- back.addr.get_byte_distance<extent_len_t>(front.addr) - front.len));
- ops.to_remove.pop_front();
- } else {
- // prepare to_remap, happens in one or multiple extents
- if (front.is_existing()) {
- visitted++;
- assert(to_write.size() > 1);
- assert(front.pin);
- assert(front.addr == front.pin->get_key());
- ops.to_remap.push_back(extent_to_remap_t::create_remap1(
- std::move(*front.pin),
- 0,
- front.len));
- ops.to_remove.pop_front();
- }
- if (back.is_existing()) {
- visitted++;
- assert(to_write.size() > 1);
- assert(back.pin);
- assert(back.addr + back.len ==
- back.pin->get_key() + back.pin->get_length());
- auto key = back.pin->get_key();
- ops.to_remap.push_back(extent_to_remap_t::create_remap1(
- std::move(*back.pin),
- back.addr.get_byte_distance<extent_len_t>(key),
- back.len));
- ops.to_remove.pop_back();
- }
- }
-
- laddr_interval_set_t pre_alloc_addr_removed, pre_alloc_addr_remapped;
- if (delta_based_overwrite_max_extent_size) {
- for (auto &r : ops.to_remove) {
- if (r.is_data_stable() && !r.is_zero_reserved()) {
- pre_alloc_addr_removed.insert(r.get_key(), r.get_length());
-
- }
- }
- for (auto &r : ops.to_remap) {
- if (r.pin.is_data_stable() && !r.pin.is_zero_reserved()) {
- pre_alloc_addr_remapped.insert(r.pin.get_key(), r.pin.get_length());
- }
- }
- }
-
- // prepare to insert
- extent_to_remap_list_t to_remap;
- for (auto ®ion : to_write) {
- if (region.is_data()) {
- visitted++;
- assert(region.to_write.has_value());
- int erased_num = 0;
- if (pre_alloc_addr_removed.contains(region.addr, region.len) &&
- region.len <= delta_based_overwrite_max_extent_size) {
- erased_num = std::erase_if(
- ops.to_remove,
- [®ion, &to_remap](auto &r) {
- laddr_interval_set_t range;
- range.insert(r.get_key(), r.get_length());
- if (range.contains(region.addr, region.len) && !r.is_clone()) {
- to_remap.push_back(extent_to_remap_t::create_overwrite(
- 0, region.len, std::move(r), *region.to_write));
- return true;
- }
- return false;
- });
- // if the size of the region is wider than the ragne from the enry in to_remove,
- // we create a separated extent in the original way.
- } else if (pre_alloc_addr_remapped.contains(region.addr, region.len) &&
- region.len <= delta_based_overwrite_max_extent_size) {
- erased_num = std::erase_if(
- ops.to_remap,
- [®ion, &to_remap](auto &r) {
- laddr_interval_set_t range;
- range.insert(r.pin.get_key(), r.pin.get_length());
- if (range.contains(region.addr, region.len) && !r.pin.is_clone()) {
- to_remap.push_back(extent_to_remap_t::create_overwrite(
- region.addr.get_byte_distance<
- extent_len_t> (range.begin().get_start()),
- region.len, std::move(r.pin), *region.to_write));
- return true;
- }
- return false;
- });
- assert(erased_num > 0);
- }
- if (erased_num == 0) {
- ops.to_insert.push_back(extent_to_insert_t::create_data(
- region.addr, region.len, region.to_write));
- }
- } else if (region.is_zero()) {
- visitted++;
- assert(!(region.to_write.has_value()));
- ops.to_insert.push_back(extent_to_insert_t::create_zero(
- region.addr, region.len));
- }
- }
- ops.to_remap.splice(ops.to_remap.end(), to_remap);
-
- logger().debug(
- "to_remap list size: {}"
- " to_insert list size: {}"
- " to_remove list size: {}",
- ops.to_remap.size(), ops.to_insert.size(), ops.to_remove.size());
- assert(visitted == to_write.size());
- return ops;
-}
-
-/**
- * append_extent_to_write
- *
- * Appends passed extent_to_write_t maintaining invariant that the
- * list may not contain consecutive zero elements by checking and
- * combining them.
- */
-void append_extent_to_write(
- extent_to_write_list_t &to_write, extent_to_write_t &&to_append)
-{
- assert(to_write.empty() ||
- to_write.back().get_end_addr() == to_append.addr);
- if (to_write.empty() ||
- to_write.back().is_data() ||
- to_append.is_data() ||
- to_write.back().type != to_append.type) {
- to_write.push_back(std::move(to_append));
- } else {
- to_write.back().len += to_append.len;
- }
-}
-
-/**
- * splice_extent_to_write
- *
- * splices passed extent_to_write_list_t maintaining invariant that the
- * list may not contain consecutive zero elements by checking and
- * combining them.
- */
-void splice_extent_to_write(
- extent_to_write_list_t &to_write, extent_to_write_list_t &&to_splice)
-{
- if (!to_splice.empty()) {
- append_extent_to_write(to_write, std::move(to_splice.front()));
- to_splice.pop_front();
- to_write.splice(to_write.end(), std::move(to_splice));
- }
-}
-
ceph::bufferlist ObjectDataBlock::get_delta() {
ceph::bufferlist bl;
encode(delta, bl);
}
}
-/// Creates remap extents in to_remap
-ObjectDataHandler::write_ret do_remappings(
- context_t ctx,
- extent_to_remap_list_t &to_remap)
-{
- return trans_intr::do_for_each(
- to_remap,
- [ctx](auto ®ion) {
- if (region.is_remap1()) {
- return ctx.tm.remap_pin<ObjectDataBlock, 1>(
- ctx.t,
- std::move(region.pin),
- std::array{
- region.create_remap_entry()
- }
- ).si_then([®ion](auto pins) {
- ceph_assert(pins.size() == 1);
- ceph_assert(region.new_len == pins[0].get_length());
- return ObjectDataHandler::write_iertr::now();
- });
- } else if (region.is_overwrite()) {
- return ctx.tm.get_mutable_extent_by_laddr<ObjectDataBlock>(
- ctx.t,
- region.laddr_start,
- region.length
- ).handle_error_interruptible(
- TransactionManager::base_iertr::pass_further{},
- crimson::ct_error::assert_all{
- "ObjectDataHandler::do_remapping hit invalid error"
- }
- ).si_then([®ion](auto extent) {
- extent_len_t off = region.new_offset;
- assert(region.bl->length() == region.new_len);
- extent->overwrite(off, *region.bl);
- return ObjectDataHandler::write_iertr::now();
- });
- } else if (region.is_remap2()) {
- auto pin_key = region.pin.get_key();
- return ctx.tm.remap_pin<ObjectDataBlock, 2>(
- ctx.t,
- std::move(region.pin),
- std::array{
- region.create_left_remap_entry(),
- region.create_right_remap_entry()
- }
- ).si_then([®ion, pin_key](auto pins) {
- ceph_assert(pins.size() == 2);
- ceph_assert(pin_key == pins[0].get_key());
- ceph_assert(pin_key + pins[0].get_length() +
- region.new_len == pins[1].get_key());
- return ObjectDataHandler::write_iertr::now();
- });
- } else {
- ceph_abort_msg("impossible");
- return ObjectDataHandler::write_iertr::now();
- }
- });
-}
-
-ObjectDataHandler::write_ret do_removals(
- context_t ctx,
- lba_mapping_list_t &to_remove)
-{
- return trans_intr::do_for_each(
- to_remove,
- [ctx](auto &pin) {
- LOG_PREFIX(object_data_handler.cc::do_removals);
- DEBUGT("decreasing ref: {}",
- ctx.t,
- pin.get_key());
- return ctx.tm.remove(
- ctx.t,
- pin.get_key()
- ).discard_result().handle_error_interruptible(
- ObjectDataHandler::write_iertr::pass_further{},
- crimson::ct_error::assert_all{
- "object_data_handler::do_removals invalid error"
- }
- );
- });
-}
-
-/// Creates zero/data extents in to_insert
-ObjectDataHandler::write_ret do_insertions(
- context_t ctx,
- extent_to_insert_list_t &to_insert)
-{
- return trans_intr::do_for_each(
- to_insert,
- [ctx](auto ®ion) {
- LOG_PREFIX(object_data_handler.cc::do_insertions);
- if (region.is_data()) {
- assert_aligned(region.len);
- ceph_assert(region.len == region.bl->length());
- DEBUGT("allocating extent: {}~0x{:x}",
- ctx.t,
- region.addr,
- region.len);
- return ctx.tm.alloc_data_extents<ObjectDataBlock>(
- ctx.t,
- region.addr,
- region.len
- ).si_then([®ion](auto extents) {
- auto off = region.addr;
- auto left = region.len;
- auto iter = region.bl->cbegin();
- for (auto &extent : extents) {
- ceph_assert(left >= extent->get_length());
- if (extent->get_laddr() != off) {
- logger().debug(
- "object_data_handler::do_insertions alloc got addr {},"
- " should have been {}",
- extent->get_laddr(),
- off);
- }
- iter.copy(extent->get_length(), extent->get_bptr().c_str());
- off = (off + extent->get_length()).checked_to_laddr();
- left -= extent->get_length();
- }
- return ObjectDataHandler::write_iertr::now();
- }).handle_error_interruptible(
- crimson::ct_error::enospc::assert_failure{"unexpected enospc"},
- ObjectDataHandler::write_iertr::pass_further{}
- );
- } else if (region.is_zero()) {
- DEBUGT("reserving: {}~0x{:x}",
- ctx.t,
- region.addr,
- region.len);
- return ctx.tm.reserve_region(
- ctx.t,
- region.addr,
- region.len
- ).si_then([FNAME, ctx, ®ion](auto pin) {
- ceph_assert(pin.get_length() == region.len);
- if (pin.get_key() != region.addr) {
- ERRORT(
- "inconsistent laddr: pin: {} region {}",
- ctx.t,
- pin.get_key(),
- region.addr);
- }
- ceph_assert(pin.get_key() == region.addr);
- return ObjectDataHandler::write_iertr::now();
- }).handle_error_interruptible(
- crimson::ct_error::enospc::assert_failure{"unexpected enospc"},
- ObjectDataHandler::write_iertr::pass_further{}
- );
- } else {
- ceph_abort_msg("impossible");
- return ObjectDataHandler::write_iertr::now();
- }
- });
-}
-
-enum class overwrite_operation_t {
- UNKNOWN,
- OVERWRITE_ZERO, // fill unaligned data with zero
- MERGE_EXISTING, // if present, merge data with the clean/pending extent
- SPLIT_EXISTING, // split the existing extent, and fill unaligned data
-};
-
-std::ostream& operator<<(
- std::ostream &out,
- const overwrite_operation_t &operation)
-{
- switch (operation) {
- case overwrite_operation_t::UNKNOWN:
- return out << "UNKNOWN";
- case overwrite_operation_t::OVERWRITE_ZERO:
- return out << "OVERWRITE_ZERO";
- case overwrite_operation_t::MERGE_EXISTING:
- return out << "MERGE_EXISTING";
- case overwrite_operation_t::SPLIT_EXISTING:
- return out << "SPLIT_EXISTING";
- default:
- return out << "!IMPOSSIBLE_OPERATION";
- }
-}
-
-/**
- * overwrite_plan_t
- *
- * |<--------------------------pins_size---------------------------------------------->|
- * pin_begin(aligned) pin_end(aligned)
- * |<------aligned_data_size-------------------------->| (aligned-bl)
- * aligned_data_begin aligned_data_end
- * |<-data_size->| (bl)
- * data_begin end
- * left(l) right(r)
- * |<l_extent_size>|<l_alignment_size>| |<r_alignment_size>|<r_extent_size>|
- * |<-----------left_size------------>| |<-----------right_size----------->|
- *
- * |<-----(existing left extent/pin)----->| |<-----(existing right extent/pin)----->|
- * left_paddr right_paddr
- */
-struct overwrite_plan_t {
- // reserved data base of object data
- laddr_t data_base;
-
- // addresses about extents
- laddr_t pin_begin;
- laddr_t pin_end;
- paddr_t left_paddr;
- paddr_t right_paddr;
- laddr_offset_t data_begin;
- laddr_offset_t data_end;
- laddr_t aligned_data_begin;
- laddr_t aligned_data_end;
-
- // operations
- overwrite_operation_t left_operation;
- overwrite_operation_t right_operation;
-
- // helper member
- extent_len_t block_size;
- bool is_left_fresh;
- bool is_right_fresh;
-
-public:
- extent_len_t get_left_size() const {
- return data_begin.get_byte_distance<extent_len_t>(pin_begin);
- }
-
- extent_len_t get_left_extent_size() const {
- return aligned_data_begin.get_byte_distance<extent_len_t>(pin_begin);
- }
-
- extent_len_t get_left_alignment_size() const {
- return data_begin.get_byte_distance<extent_len_t>(aligned_data_begin);
- }
-
- extent_len_t get_right_size() const {
- return pin_end.get_byte_distance<extent_len_t>(data_end);
- }
-
- extent_len_t get_right_extent_size() const {
- return pin_end.get_byte_distance<extent_len_t>(aligned_data_end);
- }
-
- extent_len_t get_right_alignment_size() const {
- return aligned_data_end.get_byte_distance<extent_len_t>(data_end);
- }
-
- extent_len_t get_aligned_data_size() const {
- return aligned_data_end.get_byte_distance<extent_len_t>(aligned_data_begin);
- }
-
- extent_len_t get_pins_size() const {
- return pin_end.get_byte_distance<extent_len_t>(pin_begin);
- }
-
- friend std::ostream& operator<<(
- std::ostream& out,
- const overwrite_plan_t& overwrite_plan) {
- return out << "overwrite_plan_t("
- << "data_base=" << overwrite_plan.data_base
- << ", pin_begin=" << overwrite_plan.pin_begin
- << ", pin_end=" << overwrite_plan.pin_end
- << ", left_paddr=" << overwrite_plan.left_paddr
- << ", right_paddr=" << overwrite_plan.right_paddr
- << ", data_begin=" << overwrite_plan.data_begin
- << ", data_end=" << overwrite_plan.data_end
- << ", aligned_data_begin=" << overwrite_plan.aligned_data_begin
- << ", aligned_data_end=" << overwrite_plan.aligned_data_end
- << ", left_operation=" << overwrite_plan.left_operation
- << ", right_operation=" << overwrite_plan.right_operation
- << ", block_size=0x" << std::hex << overwrite_plan.block_size << std::dec
- << ", is_left_fresh=" << overwrite_plan.is_left_fresh
- << ", is_right_fresh=" << overwrite_plan.is_right_fresh
- << ")";
- }
-
- overwrite_plan_t(laddr_t data_base,
- objaddr_t offset,
- extent_len_t len,
- const lba_mapping_list_t& pins,
- extent_len_t block_size) :
- data_base(data_base),
- pin_begin(pins.front().get_key()),
- pin_end((pins.back().get_key() + pins.back().get_length()).checked_to_laddr()),
- left_paddr(pins.front().get_val()),
- right_paddr(pins.back().get_val()),
- data_begin(data_base + offset),
- data_end(data_base + offset + len),
- aligned_data_begin(data_begin.get_aligned_laddr()),
- aligned_data_end(data_end.get_roundup_laddr()),
- left_operation(overwrite_operation_t::UNKNOWN),
- right_operation(overwrite_operation_t::UNKNOWN),
- block_size(block_size),
- // TODO: introduce LBAMapping::is_fresh()
- // Note: fresh write can be merged with overwrite if they overlap.
- is_left_fresh(!pins.front().is_stable()),
- is_right_fresh(!pins.back().is_stable()) {
- validate();
- evaluate_operations();
- assert(left_operation != overwrite_operation_t::UNKNOWN);
- assert(right_operation != overwrite_operation_t::UNKNOWN);
- }
-
-private:
- // refer to overwrite_plan_t description
- void validate() const {
- ceph_assert(pin_begin <= aligned_data_begin);
- ceph_assert(aligned_data_begin <= data_begin);
- ceph_assert(data_begin <= data_end);
- ceph_assert(data_end <= aligned_data_end);
- ceph_assert(aligned_data_end <= pin_end);
- }
-
- /*
- * When trying to modify a portion of an object data block, follow
- * the read-full-extent-then-merge-new-data strategy, if the write
- * amplification caused by it is not greater than
- * seastore_obj_data_write_amplification; otherwise, split the
- * original extent into at most three parts: origin-left, part-to-be-modified
- * and origin-right.
- *
- * TODO: seastore_obj_data_write_amplification needs to be reconsidered because
- * delta-based overwrite is introduced
- */
- void evaluate_operations() {
- auto actual_write_size = get_pins_size();
- auto aligned_data_size = get_aligned_data_size();
- auto left_ext_size = get_left_extent_size();
- auto right_ext_size = get_right_extent_size();
-
- if (left_paddr.is_zero()) {
- actual_write_size -= left_ext_size;
- left_ext_size = 0;
- left_operation = overwrite_operation_t::OVERWRITE_ZERO;
- } else if (is_left_fresh) {
- aligned_data_size += left_ext_size;
- left_ext_size = 0;
- left_operation = overwrite_operation_t::MERGE_EXISTING;
- }
-
- if (right_paddr.is_zero()) {
- actual_write_size -= right_ext_size;
- right_ext_size = 0;
- right_operation = overwrite_operation_t::OVERWRITE_ZERO;
- } else if (is_right_fresh) {
- aligned_data_size += right_ext_size;
- right_ext_size = 0;
- right_operation = overwrite_operation_t::MERGE_EXISTING;
- }
-
- while (left_operation == overwrite_operation_t::UNKNOWN ||
- right_operation == overwrite_operation_t::UNKNOWN) {
- if (((double)actual_write_size / (double)aligned_data_size) <=
- crimson::common::get_conf<double>("seastore_obj_data_write_amplification")) {
- break;
- }
- if (left_ext_size == 0 && right_ext_size == 0) {
- break;
- }
- if (left_ext_size >= right_ext_size) {
- // split left
- assert(left_operation == overwrite_operation_t::UNKNOWN);
- actual_write_size -= left_ext_size;
- left_ext_size = 0;
- left_operation = overwrite_operation_t::SPLIT_EXISTING;
- } else { // left_ext_size < right_ext_size
- // split right
- assert(right_operation == overwrite_operation_t::UNKNOWN);
- actual_write_size -= right_ext_size;
- right_ext_size = 0;
- right_operation = overwrite_operation_t::SPLIT_EXISTING;
- }
- }
-
- if (left_operation == overwrite_operation_t::UNKNOWN) {
- // no split left, so merge with left
- left_operation = overwrite_operation_t::MERGE_EXISTING;
- }
-
- if (right_operation == overwrite_operation_t::UNKNOWN) {
- // no split right, so merge with right
- right_operation = overwrite_operation_t::MERGE_EXISTING;
- }
- }
-};
-
} // namespace crimson::os::seastore
-#if FMT_VERSION >= 90000
-template<> struct fmt::formatter<crimson::os::seastore::overwrite_plan_t> : fmt::ostream_formatter {};
-#endif
-
namespace crimson::os::seastore {
-/**
- * operate_left
- *
- * Proceed overwrite_plan.left_operation.
- */
-using operate_ret_bare = std::pair<
- std::optional<extent_to_write_t>,
- std::optional<ceph::bufferlist>>;
-using operate_ret = get_iertr::future<operate_ret_bare>;
-operate_ret operate_left(context_t ctx, LBAMapping &pin, const overwrite_plan_t &overwrite_plan)
-{
- if (overwrite_plan.get_left_size() == 0) {
- return get_iertr::make_ready_future<operate_ret_bare>(
- std::nullopt,
- std::nullopt);
- }
-
- if (overwrite_plan.left_operation == overwrite_operation_t::OVERWRITE_ZERO) {
- assert(pin.get_val().is_zero());
-
- auto zero_extent_len = overwrite_plan.get_left_extent_size();
- assert_aligned(zero_extent_len);
- std::optional<extent_to_write_t> extent_to_write;
- if (zero_extent_len != 0) {
- extent_to_write = extent_to_write_t::create_zero(
- overwrite_plan.pin_begin, zero_extent_len);
- }
-
- auto zero_prepend_len = overwrite_plan.get_left_alignment_size();
- std::optional<ceph::bufferlist> prepend_bl;
- if (zero_prepend_len != 0) {
- ceph::bufferlist zero_bl;
- zero_bl.append_zero(zero_prepend_len);
- prepend_bl = std::move(zero_bl);
- }
-
- return get_iertr::make_ready_future<operate_ret_bare>(
- std::move(extent_to_write),
- std::move(prepend_bl));
- } else if (overwrite_plan.left_operation == overwrite_operation_t::MERGE_EXISTING) {
- auto prepend_len = overwrite_plan.get_left_size();
- if (prepend_len == 0) {
- return get_iertr::make_ready_future<operate_ret_bare>(
- std::nullopt,
- std::nullopt);
- } else {
- return ctx.tm.read_pin<ObjectDataBlock>(
- ctx.t, pin.duplicate()
- ).si_then([prepend_len](auto maybe_indirect_left_extent) {
- auto read_bl = maybe_indirect_left_extent.get_bl();
- ceph::bufferlist prepend_bl;
- prepend_bl.substr_of(read_bl, 0, prepend_len);
- return get_iertr::make_ready_future<operate_ret_bare>(
- std::nullopt,
- std::move(prepend_bl));
- });
- }
- } else {
- assert(overwrite_plan.left_operation == overwrite_operation_t::SPLIT_EXISTING);
-
- auto extent_len = overwrite_plan.get_left_extent_size();
- assert(extent_len);
- std::optional<extent_to_write_t> left_to_write_extent =
- std::make_optional(extent_to_write_t::create_existing(
- pin.duplicate(),
- pin.get_key(),
- extent_len));
-
- auto prepend_len = overwrite_plan.get_left_alignment_size();
- if (prepend_len == 0) {
- return get_iertr::make_ready_future<operate_ret_bare>(
- std::move(left_to_write_extent),
- std::nullopt);
- } else {
- return ctx.tm.read_pin<ObjectDataBlock>(
- ctx.t, pin.duplicate()
- ).si_then([prepend_offset=extent_len, prepend_len,
- left_to_write_extent=std::move(left_to_write_extent)]
- (auto left_maybe_indirect_extent) mutable {
- auto read_bl = left_maybe_indirect_extent.get_bl();
- ceph::bufferlist prepend_bl;
- prepend_bl.substr_of(read_bl, prepend_offset, prepend_len);
- return get_iertr::make_ready_future<operate_ret_bare>(
- std::move(left_to_write_extent),
- std::move(prepend_bl));
- });
- }
- }
-};
-
-/**
- * operate_right
- *
- * Proceed overwrite_plan.right_operation.
- */
-operate_ret operate_right(context_t ctx, LBAMapping &pin, const overwrite_plan_t &overwrite_plan)
-{
- if (overwrite_plan.get_right_size() == 0) {
- return get_iertr::make_ready_future<operate_ret_bare>(
- std::nullopt,
- std::nullopt);
- }
-
- auto right_pin_begin = pin.get_key();
- assert(overwrite_plan.data_end >= right_pin_begin);
- if (overwrite_plan.right_operation == overwrite_operation_t::OVERWRITE_ZERO) {
- assert(pin.get_val().is_zero());
-
- auto zero_suffix_len = overwrite_plan.get_right_alignment_size();
- std::optional<ceph::bufferlist> suffix_bl;
- if (zero_suffix_len != 0) {
- ceph::bufferlist zero_bl;
- zero_bl.append_zero(zero_suffix_len);
- suffix_bl = std::move(zero_bl);
- }
-
- auto zero_extent_len = overwrite_plan.get_right_extent_size();
- assert_aligned(zero_extent_len);
- std::optional<extent_to_write_t> extent_to_write;
- if (zero_extent_len != 0) {
- extent_to_write = extent_to_write_t::create_zero(
- overwrite_plan.aligned_data_end, zero_extent_len);
- }
-
- return get_iertr::make_ready_future<operate_ret_bare>(
- std::move(extent_to_write),
- std::move(suffix_bl));
- } else if (overwrite_plan.right_operation == overwrite_operation_t::MERGE_EXISTING) {
- auto append_len = overwrite_plan.get_right_size();
- if (append_len == 0) {
- return get_iertr::make_ready_future<operate_ret_bare>(
- std::nullopt,
- std::nullopt);
- } else {
- auto append_offset =
- overwrite_plan.data_end.get_byte_distance<
- extent_len_t>(right_pin_begin);
- return ctx.tm.read_pin<ObjectDataBlock>(
- ctx.t, pin.duplicate()
- ).si_then([append_offset, append_len]
- (auto right_maybe_indirect_extent) {
- auto read_bl = right_maybe_indirect_extent.get_bl();
- ceph::bufferlist suffix_bl;
- suffix_bl.substr_of(read_bl, append_offset, append_len);
- return get_iertr::make_ready_future<operate_ret_bare>(
- std::nullopt,
- std::move(suffix_bl));
- });
- }
- } else {
- assert(overwrite_plan.right_operation == overwrite_operation_t::SPLIT_EXISTING);
-
- auto extent_len = overwrite_plan.get_right_extent_size();
- assert(extent_len);
- std::optional<extent_to_write_t> right_to_write_extent =
- std::make_optional(extent_to_write_t::create_existing(
- pin.duplicate(),
- overwrite_plan.aligned_data_end,
- extent_len));
-
- auto append_len = overwrite_plan.get_right_alignment_size();
- if (append_len == 0) {
- return get_iertr::make_ready_future<operate_ret_bare>(
- std::move(right_to_write_extent),
- std::nullopt);
- } else {
- auto append_offset =
- overwrite_plan.data_end.get_byte_distance<
- extent_len_t>(right_pin_begin);
- return ctx.tm.read_pin<ObjectDataBlock>(
- ctx.t, pin.duplicate()
- ).si_then([append_offset, append_len,
- right_to_write_extent=std::move(right_to_write_extent)]
- (auto maybe_indirect_right_extent) mutable {
- auto read_bl = maybe_indirect_right_extent.get_bl();
- ceph::bufferlist suffix_bl;
- suffix_bl.substr_of(read_bl, append_offset, append_len);
- return get_iertr::make_ready_future<operate_ret_bare>(
- std::move(right_to_write_extent),
- std::move(suffix_bl));
- });
- }
- }
-};
-
template <typename F>
auto with_object_data(
ObjectDataHandler::context_t ctx,