/**
* extent_to_write_t
*
- * Encapsulates extents to be written out using do_insertions.
+ * Encapsulates smallest write operations in overwrite.
* Indicates a zero/existing extent or a data extent based on whether
* to_write is populate.
- * The meaning of existing_paddr is that the new extent to be
- * written is the part of exising extent on the disk. existing_paddr
- * must be absolute.
+ * Should be handled by prepare_ops_list.
*/
struct extent_to_write_t {
enum class type_t {
ZERO,
EXISTING,
};
-
type_t type;
+
+ /// pin of original extent, not nullptr if type == EXISTING
+ LBAMappingRef pin;
+
laddr_t addr;
extent_len_t len;
+
/// non-nullopt if and only if type == DATA
std::optional<bufferlist> to_write;
- /// non-nullopt if and only if type == EXISTING
- std::optional<paddr_t> existing_paddr;
- extent_to_write_t(const extent_to_write_t &) = default;
+ extent_to_write_t(const extent_to_write_t &) = delete;
extent_to_write_t(extent_to_write_t &&) = default;
bool is_data() const {
}
static extent_to_write_t create_zero(
- laddr_t addr, extent_len_t len) {
+ laddr_t addr, extent_len_t len) {
return extent_to_write_t(addr, len);
}
static extent_to_write_t create_existing(
- laddr_t addr, paddr_t existing_paddr, extent_len_t len) {
- return extent_to_write_t(addr, existing_paddr, len);
+ LBAMappingRef &&pin, laddr_t addr, extent_len_t len) {
+ assert(pin);
+ return extent_to_write_t(std::move(pin), addr, len);
}
private:
extent_to_write_t(laddr_t addr, extent_len_t len)
: type(type_t::ZERO), addr(addr), len(len) {}
- extent_to_write_t(laddr_t addr, paddr_t existing_paddr, extent_len_t len)
- : type(type_t::EXISTING), addr(addr), len(len),
- to_write(std::nullopt), existing_paddr(existing_paddr) {}
+ extent_to_write_t(LBAMappingRef &&pin, laddr_t addr, extent_len_t len)
+ : type(type_t::EXISTING), pin(std::move(pin)), addr(addr), len(len) {}
};
using extent_to_write_list_t = std::list<extent_to_write_t>;
+// Encapsulates extents to be written out using do_remappings.
+struct extent_to_remap_t {
+ enum class type_t {
+ REMAP,
+ OVERWRITE
+ };
+ type_t type;
+ /// pin of original extent
+ LBAMappingRef pin;
+ /// offset of remapped extent or overwrite part of overwrite extent.
+ /// overwrite part of overwrite extent might correspond to mutiple
+ /// fresh write extent.
+ extent_len_t new_offset;
+ /// length of remapped extent or overwrite part of overwrite extent
+ extent_len_t new_len;
+
+ extent_to_remap_t(const extent_to_remap_t &) = delete;
+ extent_to_remap_t(extent_to_remap_t &&) = default;
+
+ bool is_remap() const {
+ return type == type_t::REMAP;
+ }
+
+ bool is_overwrite() const {
+ assert((new_offset != 0) && (pin->get_length() != new_offset + new_len));
+ return type == type_t::OVERWRITE;
+ }
+
+ using remap_entry = TransactionManager::remap_entry;
+ remap_entry create_remap_entry() {
+ assert(is_remap());
+ return remap_entry(
+ new_offset,
+ new_len);
+ }
+
+ remap_entry create_left_remap_entry() {
+ assert(is_overwrite());
+ return remap_entry(
+ 0,
+ new_offset);
+ }
+
+ remap_entry create_right_remap_entry() {
+ assert(is_overwrite());
+ return remap_entry(
+ new_offset + new_len,
+ pin->get_length() - new_offset - new_len);
+ }
+
+ static extent_to_remap_t create_remap(
+ LBAMappingRef &&pin, extent_len_t new_offset, extent_len_t new_len) {
+ return extent_to_remap_t(type_t::REMAP,
+ std::move(pin), new_offset, new_len);
+ }
+
+ static extent_to_remap_t create_overwrite(
+ LBAMappingRef &&pin, extent_len_t new_offset, extent_len_t new_len) {
+ return extent_to_remap_t(type_t::OVERWRITE,
+ std::move(pin), new_offset, new_len);
+ }
+
+private:
+ extent_to_remap_t(type_t type,
+ LBAMappingRef &&pin, extent_len_t new_offset, extent_len_t new_len)
+ : type(type),
+ pin(std::move(pin)), new_offset(new_offset), new_len(new_len) {}
+};
+using extent_to_remap_list_t = std::list<extent_to_remap_t>;
+
+// Encapsulates extents to be written out using do_insertions.
+struct extent_to_insert_t {
+ enum class type_t {
+ DATA,
+ ZERO
+ };
+ type_t type;
+ /// laddr of new extent
+ laddr_t addr;
+ /// length of new extent
+ extent_len_t len;
+ /// non-nullopt if type == DATA
+ std::optional<bufferlist> bl;
+
+ extent_to_insert_t(const extent_to_insert_t &) = default;
+ extent_to_insert_t(extent_to_insert_t &&) = default;
+
+ bool is_data() const {
+ return type == type_t::DATA;
+ }
+
+ bool is_zero() const {
+ return type == type_t::ZERO;
+ }
+
+ static extent_to_insert_t create_data(
+ laddr_t addr, extent_len_t len, std::optional<bufferlist> bl) {
+ return extent_to_insert_t(addr, len, bl);
+ }
+
+ static extent_to_insert_t create_zero(
+ laddr_t addr, extent_len_t len) {
+ return extent_to_insert_t(addr, len);
+ }
+
+private:
+ extent_to_insert_t(laddr_t addr, extent_len_t len,
+ std::optional<bufferlist> bl)
+ :type(type_t::DATA), addr(addr), len(len), bl(bl) {}
+
+ extent_to_insert_t(laddr_t addr, extent_len_t len)
+ :type(type_t::ZERO), addr(addr), len(len) {}
+};
+using extent_to_insert_list_t = std::list<extent_to_insert_t>;
+
+// Encapsulates extents to be retired in do_removals.
+using extent_to_remove_list_t = std::list<LBAMappingRef>;
+
+struct overwrite_ops_t {
+ extent_to_remap_list_t to_remap;
+ extent_to_insert_list_t to_insert;
+ extent_to_remove_list_t to_remove;
+};
+
+// prepare to_remap, to_retire, to_insert list
+overwrite_ops_t prepare_ops_list(
+ lba_pin_list_t &pins_to_remove,
+ extent_to_write_list_t &to_write) {
+ assert(to_write.size() != 0 && pins_to_remove.size() != 0);
+ overwrite_ops_t ops;
+ ops.to_remove.swap(pins_to_remove);
+ auto& front = to_write.front();
+ auto& back = to_write.back();
+ long unsigned int visitted = 0;
+
+ // prepare overwrite, happens in one original extent.
+ if (ops.to_remove.size() == 1 &&
+ front.is_existing() && back.is_existing()) {
+ visitted += 2;
+ assert(to_write.size() > 2);
+ assert(front.addr == front.pin->get_key());
+ assert(back.addr > back.pin->get_key());
+ ops.to_remap.push_back(extent_to_remap_t::create_overwrite(
+ std::move(front.pin),
+ front.len,
+ back.addr - front.addr - front.len));
+ ops.to_remove.pop_front();
+ } else {
+ // prepare to_remap, happens in one or multiple extents
+ if (front.is_existing()) {
+ visitted++;
+ assert(to_write.size() > 1);
+ assert(front.addr == front.pin->get_key());
+ ops.to_remap.push_back(extent_to_remap_t::create_remap(
+ std::move(front.pin),
+ 0,
+ front.len));
+ ops.to_remove.pop_front();
+ }
+ if (back.is_existing()) {
+ visitted++;
+ assert(to_write.size() > 1);
+ assert(back.addr + back.len ==
+ back.pin->get_key() + back.pin->get_length());
+ assert((*(to_write.begin())).addr == back.pin->get_key());
+ ops.to_remap.push_back(extent_to_remap_t::create_remap(
+ std::move(back.pin),
+ back.addr - back.pin->get_key(),
+ back.len));
+ ops.to_remove.pop_back();
+ }
+ }
+
+ // prepare to_insert
+ for (auto ®ion : to_write) {
+ if (region.is_data()) {
+ visitted++;
+ assert(region.to_write.has_value());
+ ops.to_insert.push_back(extent_to_insert_t::create_data(
+ region.addr, region.len, region.to_write));
+ } else if (region.is_zero()) {
+ visitted++;
+ assert(!(region.to_write.has_value()));
+ ops.to_insert.push_back(extent_to_insert_t::create_zero(
+ region.addr, region.len));
+ }
+ }
+
+ logger().debug(
+ "to_remap list size: {}"
+ " to_insert list size: {}"
+ " to_remove list size: {}",
+ ops.to_remap.size(), ops.to_insert.size(), ops.to_remove.size());
+ assert(visitted == to_write.size());
+ return ops;
+}
+
/**
* append_extent_to_write
*
}
}
-/// Removes extents/mappings in pins
+/// Creates remap extents in to_remap
+ObjectDataHandler::write_ret do_remappings(
+ context_t ctx,
+ extent_to_remap_list_t &to_remap)
+{
+ return trans_intr::do_for_each(
+ to_remap,
+ [ctx](auto ®ion) {
+ if (region.is_remap()) {
+ return ctx.tm.remap_pin<ObjectDataBlock, 1>(
+ ctx.t,
+ std::move(region.pin),
+ std::array{
+ region.create_remap_entry()
+ }
+ ).si_then([®ion](auto pins) {
+ ceph_assert(pins.size() == 1);
+ ceph_assert(region.new_len == pins[0]->get_length());
+ return ObjectDataHandler::write_iertr::now();
+ });
+ } else if (region.is_overwrite()) {
+ return ctx.tm.remap_pin<ObjectDataBlock, 2>(
+ ctx.t,
+ std::move(region.pin),
+ std::array{
+ region.create_left_remap_entry(),
+ region.create_right_remap_entry()
+ }
+ ).si_then([®ion](auto pins) {
+ ceph_assert(pins.size() == 2);
+ ceph_assert(region.pin->get_key() == pins[0]->get_key());
+ ceph_assert(region.pin->get_key() + pins[0]->get_length() +
+ region.new_len == pins[1]->get_key());
+ return ObjectDataHandler::write_iertr::now();
+ });
+ } else {
+ ceph_abort("impossible");
+ return ObjectDataHandler::write_iertr::now();
+ }
+ });
+}
+
ObjectDataHandler::write_ret do_removals(
context_t ctx,
- lba_pin_list_t &pins)
+ lba_pin_list_t &to_remove)
{
return trans_intr::do_for_each(
- pins,
+ to_remove,
[ctx](auto &pin) {
LOG_PREFIX(object_data_handler.cc::do_removals);
DEBUGT("decreasing ref: {}",
});
}
-/// Creates zero/data extents in to_write
+/// Creates zero/data extents in to_insert
ObjectDataHandler::write_ret do_insertions(
context_t ctx,
- extent_to_write_list_t &to_write)
+ extent_to_insert_list_t &to_insert)
{
return trans_intr::do_for_each(
- to_write,
+ to_insert,
[ctx](auto ®ion) {
LOG_PREFIX(object_data_handler.cc::do_insertions);
if (region.is_data()) {
assert_aligned(region.addr);
assert_aligned(region.len);
- ceph_assert(region.len == region.to_write->length());
+ ceph_assert(region.len == region.bl->length());
DEBUGT("allocating extent: {}~{}",
ctx.t,
region.addr,
}
ceph_assert(extent->get_laddr() == region.addr);
ceph_assert(extent->get_length() == region.len);
- auto iter = region.to_write->cbegin();
+ auto iter = region.bl->cbegin();
iter.copy(region.len, extent->get_bptr().c_str());
return ObjectDataHandler::write_iertr::now();
});
return ObjectDataHandler::write_iertr::now();
});
} else {
- ceph_assert(region.is_existing());
- DEBUGT("map existing extent: laddr {} len {} {}",
- ctx.t, region.addr, region.len, *region.existing_paddr);
- return ctx.tm.map_existing_extent<ObjectDataBlock>(
- ctx.t, region.addr, *region.existing_paddr, region.len
- ).handle_error_interruptible(
- TransactionManager::alloc_extent_iertr::pass_further{},
- Device::read_ertr::assert_all{"ignore read error"}
- ).si_then([FNAME, ctx, ®ion](auto extent) {
- if (extent->get_laddr() != region.addr) {
- ERRORT(
- "inconsistent laddr: extent: {} region {}",
- ctx.t,
- extent->get_laddr(),
- region.addr);
- }
- ceph_assert(extent->get_laddr() == region.addr);
- return ObjectDataHandler::write_iertr::now();
- });
+ ceph_abort("impossible");
+ return ObjectDataHandler::write_iertr::now();
}
});
}
assert(extent_len);
std::optional<extent_to_write_t> left_to_write_extent =
std::make_optional(extent_to_write_t::create_existing(
- overwrite_plan.pin_begin,
- overwrite_plan.left_paddr,
+ pin->duplicate(),
+ pin->get_key(),
extent_len));
auto prepend_len = overwrite_plan.get_left_alignment_size();
if (prepend_len == 0) {
return get_iertr::make_ready_future<operate_ret_bare>(
- left_to_write_extent,
+ std::move(left_to_write_extent),
std::nullopt);
} else {
return ctx.tm.read_pin<ObjectDataBlock>(
left_to_write_extent=std::move(left_to_write_extent)]
(auto left_extent) mutable {
return get_iertr::make_ready_future<operate_ret_bare>(
- left_to_write_extent,
+ std::move(left_to_write_extent),
std::make_optional(bufferptr(
left_extent->get_bptr(),
prepend_offset,
assert(extent_len);
std::optional<extent_to_write_t> right_to_write_extent =
std::make_optional(extent_to_write_t::create_existing(
+ pin->duplicate(),
overwrite_plan.aligned_data_end,
- overwrite_plan.right_paddr.add_offset(overwrite_plan.aligned_data_end - right_pin_begin),
extent_len));
auto append_len = overwrite_plan.get_right_alignment_size();
if (append_len == 0) {
return get_iertr::make_ready_future<operate_ret_bare>(
- right_to_write_extent,
+ std::move(right_to_write_extent),
std::nullopt);
} else {
auto append_offset = overwrite_plan.data_end - right_pin_begin;
right_to_write_extent=std::move(right_to_write_extent)]
(auto right_extent) mutable {
return get_iertr::make_ready_future<operate_ret_bare>(
- right_to_write_extent,
+ std::move(right_to_write_extent),
std::make_optional(bufferptr(
right_extent->get_bptr(),
append_offset,
object_data.get_reserved_data_len() - pin_offset));
return clear_iertr::now();
} else {
- /* First pin overlaps the boundary and has data, read in extent
- * and rewrite portion prior to size */
- return ctx.tm.read_pin<ObjectDataBlock>(
- ctx.t,
- pin.duplicate()
- ).si_then([ctx, size, pin_offset, &pin, &object_data, &to_write](
- auto extent) {
- bufferlist bl;
- bl.append(
- bufferptr(
- extent->get_bptr(),
- 0,
- size - pin_offset
- ));
- bl.append_zero(p2roundup(size, ctx.tm.get_block_size()) - size);
- to_write.push_back(extent_to_write_t::create_data(
- pin.get_key(),
- bl));
+ /* First pin overlaps the boundary and has data, remap it
+ * if aligned or rewrite it if not aligned to size */
+ auto roundup_size = p2roundup(size, ctx.tm.get_block_size());
+ auto append_len = roundup_size - size;
+ if (append_len == 0) {
+ LOG_PREFIX(ObjectDataHandler::trim_data_reservation);
+ TRACET("First pin overlaps the boundary and has aligned data"
+ "create existing at addr:{}, len:{}",
+ ctx.t, pin.get_key(), size - pin_offset);
+ to_write.push_back(extent_to_write_t::create_existing(
+ pin.duplicate(),
+ pin.get_key(),
+ size - pin_offset));
to_write.push_back(extent_to_write_t::create_zero(
- object_data.get_reserved_data_base() +
- p2roundup(size, ctx.tm.get_block_size()),
- object_data.get_reserved_data_len() -
- p2roundup(size, ctx.tm.get_block_size())));
- return clear_iertr::now();
- });
- }
- }).si_then([ctx, &pins] {
- return do_removals(ctx, pins);
- }).si_then([ctx, &to_write] {
- return do_insertions(ctx, to_write);
- }).si_then([size, &object_data] {
- if (size == 0) {
- object_data.clear();
+ object_data.get_reserved_data_base() + roundup_size,
+ object_data.get_reserved_data_len() - roundup_size));
+ return clear_iertr::now();
+ } else {
+ return ctx.tm.read_pin<ObjectDataBlock>(
+ ctx.t,
+ pin.duplicate()
+ ).si_then([ctx, size, pin_offset, append_len, roundup_size,
+ &pin, &object_data, &to_write](auto extent) {
+ bufferlist bl;
+ bl.append(
+ bufferptr(
+ extent->get_bptr(),
+ 0,
+ size - pin_offset
+ ));
+ bl.append_zero(append_len);
+ LOG_PREFIX(ObjectDataHandler::trim_data_reservation);
+ TRACET("First pin overlaps the boundary and has unaligned data"
+ "create data at addr:{}, len:{}",
+ ctx.t, pin.get_key(), bl.length());
+ to_write.push_back(extent_to_write_t::create_data(
+ pin.get_key(),
+ bl));
+ to_write.push_back(extent_to_write_t::create_zero(
+ object_data.get_reserved_data_base() + roundup_size,
+ object_data.get_reserved_data_len() - roundup_size));
+ return clear_iertr::now();
+ });
+ }
}
- return ObjectDataHandler::clear_iertr::now();
+ }).si_then([ctx, size, &to_write, &object_data, &pins] {
+ assert(to_write.size());
+ return seastar::do_with(
+ prepare_ops_list(pins, to_write),
+ [ctx, size, &object_data](auto &ops) {
+ return do_remappings(ctx, ops.to_remap
+ ).si_then([ctx, &ops] {
+ return do_removals(ctx, ops.to_remove);
+ }).si_then([ctx, &ops] {
+ return do_insertions(ctx, ops.to_insert);
+ }).si_then([size, &object_data] {
+ if (size == 0) {
+ object_data.clear();
+ }
+ return ObjectDataHandler::clear_iertr::now();
+ });
+ });
});
});
}
}
assert(bl.length() % block_size == 0);
assert(bl.length() == (right - left));
- return {extent_to_write_t::create_data(left, bl)};
+ extent_to_write_list_t ret;
+ ret.push_back(extent_to_write_t::create_data(left, bl));
+ return ret;
} else {
// reserved section between ends, headptr and tailptr in different extents
extent_to_write_list_t ret;
assert(pin_begin == to_write.front().addr);
assert(pin_end == to_write.back().get_end_addr());
- return do_removals(ctx, pins);
- }).si_then([ctx, &to_write] {
- return do_insertions(ctx, to_write);
+ return seastar::do_with(
+ prepare_ops_list(pins, to_write),
+ [ctx](auto &ops) {
+ return do_remappings(ctx, ops.to_remap
+ ).si_then([ctx, &ops] {
+ return do_removals(ctx, ops.to_remove);
+ }).si_then([ctx, &ops] {
+ return do_insertions(ctx, ops.to_insert);
+ });
+ });
});
});
});