From 7998a03a2febcc2d969336f336186aabbd1a5a7c Mon Sep 17 00:00:00 2001 From: Yingxin Cheng Date: Mon, 18 Nov 2024 16:06:40 +0800 Subject: [PATCH] crimson/os/seastore/transaction_manager: implement and use maybe_indirect_extent_t::get_bl() Return bufferlist because the extent may be partially loaded under indirection. Signed-off-by: Yingxin Cheng --- .../os/seastore/object_data_handler.cc | 191 ++++++++++-------- src/crimson/os/seastore/transaction_manager.h | 30 +-- 2 files changed, 120 insertions(+), 101 deletions(-) diff --git a/src/crimson/os/seastore/object_data_handler.cc b/src/crimson/os/seastore/object_data_handler.cc index 70e6fe58e3a..36413f1206a 100644 --- a/src/crimson/os/seastore/object_data_handler.cc +++ b/src/crimson/os/seastore/object_data_handler.cc @@ -50,6 +50,8 @@ struct extent_to_write_t { extent_to_write_t(const extent_to_write_t &) = delete; extent_to_write_t(extent_to_write_t &&) = default; + extent_to_write_t& operator=(const extent_to_write_t&) = delete; + extent_to_write_t& operator=(extent_to_write_t&&) = default; bool is_data() const { return type == type_t::DATA; @@ -827,7 +829,7 @@ namespace crimson::os::seastore { */ using operate_ret_bare = std::pair< std::optional, - std::optional>; + std::optional>; using operate_ret = get_iertr::future; operate_ret operate_left(context_t ctx, LBAMappingRef &pin, const overwrite_plan_t &overwrite_plan) { @@ -839,19 +841,26 @@ operate_ret operate_left(context_t ctx, LBAMappingRef &pin, const overwrite_plan if (overwrite_plan.left_operation == overwrite_operation_t::OVERWRITE_ZERO) { assert(pin->get_val().is_zero()); + auto zero_extent_len = overwrite_plan.get_left_extent_size(); assert_aligned(zero_extent_len); + std::optional extent_to_write; + if (zero_extent_len != 0) { + extent_to_write = extent_to_write_t::create_zero( + overwrite_plan.pin_begin, zero_extent_len); + } + auto zero_prepend_len = overwrite_plan.get_left_alignment_size(); + std::optional prepend_bl; + if (zero_prepend_len != 0) { + ceph::bufferlist zero_bl; + zero_bl.append_zero(zero_prepend_len); + prepend_bl = std::move(zero_bl); + } + return get_iertr::make_ready_future( - (zero_extent_len == 0 - ? std::nullopt - : std::make_optional(extent_to_write_t::create_zero( - overwrite_plan.pin_begin, zero_extent_len))), - (zero_prepend_len == 0 - ? std::nullopt - : std::make_optional(bufferptr( - ceph::buffer::create(zero_prepend_len, 0)))) - ); + std::move(extent_to_write), + std::move(prepend_bl)); } else if (overwrite_plan.left_operation == overwrite_operation_t::MERGE_EXISTING) { auto prepend_len = overwrite_plan.get_left_size(); if (prepend_len == 0) { @@ -862,12 +871,12 @@ operate_ret operate_left(context_t ctx, LBAMappingRef &pin, const overwrite_plan return ctx.tm.read_pin( ctx.t, pin->duplicate() ).si_then([prepend_len](auto maybe_indirect_left_extent) { + auto read_bl = maybe_indirect_left_extent.get_bl(); + ceph::bufferlist prepend_bl; + prepend_bl.substr_of(read_bl, 0, prepend_len); return get_iertr::make_ready_future( std::nullopt, - std::make_optional(bufferptr( - maybe_indirect_left_extent.get_bptr(), - 0, - prepend_len))); + std::move(prepend_bl)); }); } } else { @@ -892,12 +901,12 @@ operate_ret operate_left(context_t ctx, LBAMappingRef &pin, const overwrite_plan ).si_then([prepend_offset=extent_len, prepend_len, left_to_write_extent=std::move(left_to_write_extent)] (auto left_maybe_indirect_extent) mutable { + auto read_bl = left_maybe_indirect_extent.get_bl(); + ceph::bufferlist prepend_bl; + prepend_bl.substr_of(read_bl, prepend_offset, prepend_len); return get_iertr::make_ready_future( std::move(left_to_write_extent), - std::make_optional(bufferptr( - left_maybe_indirect_extent.get_bptr(), - prepend_offset, - prepend_len))); + std::move(prepend_bl)); }); } } @@ -920,19 +929,26 @@ operate_ret operate_right(context_t ctx, LBAMappingRef &pin, const overwrite_pla assert(overwrite_plan.data_end >= right_pin_begin); if (overwrite_plan.right_operation == overwrite_operation_t::OVERWRITE_ZERO) { assert(pin->get_val().is_zero()); + auto zero_suffix_len = overwrite_plan.get_right_alignment_size(); + std::optional suffix_bl; + if (zero_suffix_len != 0) { + ceph::bufferlist zero_bl; + zero_bl.append_zero(zero_suffix_len); + suffix_bl = std::move(zero_bl); + } + auto zero_extent_len = overwrite_plan.get_right_extent_size(); assert_aligned(zero_extent_len); + std::optional extent_to_write; + if (zero_extent_len != 0) { + extent_to_write = extent_to_write_t::create_zero( + overwrite_plan.aligned_data_end, zero_extent_len); + } + return get_iertr::make_ready_future( - (zero_extent_len == 0 - ? std::nullopt - : std::make_optional(extent_to_write_t::create_zero( - overwrite_plan.aligned_data_end, zero_extent_len))), - (zero_suffix_len == 0 - ? std::nullopt - : std::make_optional(bufferptr( - ceph::buffer::create(zero_suffix_len, 0)))) - ); + std::move(extent_to_write), + std::move(suffix_bl)); } else if (overwrite_plan.right_operation == overwrite_operation_t::MERGE_EXISTING) { auto append_len = overwrite_plan.get_right_size(); if (append_len == 0) { @@ -947,12 +963,12 @@ operate_ret operate_right(context_t ctx, LBAMappingRef &pin, const overwrite_pla ctx.t, pin->duplicate() ).si_then([append_offset, append_len] (auto right_maybe_indirect_extent) { + auto read_bl = right_maybe_indirect_extent.get_bl(); + ceph::bufferlist suffix_bl; + suffix_bl.substr_of(read_bl, append_offset, append_len); return get_iertr::make_ready_future( std::nullopt, - std::make_optional(bufferptr( - right_maybe_indirect_extent.get_bptr(), - append_offset, - append_len))); + std::move(suffix_bl)); }); } } else { @@ -980,12 +996,12 @@ operate_ret operate_right(context_t ctx, LBAMappingRef &pin, const overwrite_pla ).si_then([append_offset, append_len, right_to_write_extent=std::move(right_to_write_extent)] (auto maybe_indirect_right_extent) mutable { + auto read_bl = maybe_indirect_right_extent.get_bl(); + ceph::bufferlist suffix_bl; + suffix_bl.substr_of(read_bl, append_offset, append_len); return get_iertr::make_ready_future( std::move(right_to_write_extent), - std::make_optional(bufferptr( - maybe_indirect_right_extent.get_bptr(), - append_offset, - append_len))); + std::move(suffix_bl)); }); } } @@ -1134,21 +1150,17 @@ ObjectDataHandler::clear_ret ObjectDataHandler::trim_data_reservation( pin.duplicate() ).si_then([ctx, size, pin_offset, append_len, roundup_size, &pin, &object_data, &to_write](auto maybe_indirect_extent) { - bufferlist bl; - bl.append( - bufferptr( - maybe_indirect_extent.get_bptr(), - 0, - size - pin_offset - )); - bl.append_zero(append_len); + auto read_bl = maybe_indirect_extent.get_bl(); + ceph::bufferlist write_bl; + write_bl.substr_of(read_bl, 0, size - pin_offset); + write_bl.append_zero(append_len); LOG_PREFIX(ObjectDataHandler::trim_data_reservation); TRACET("First pin overlaps the boundary and has unaligned data" "create data at addr:{}, len:{}", - ctx.t, pin.get_key(), bl.length()); + ctx.t, pin.get_key(), write_bl.length()); to_write.push_back(extent_to_write_t::create_data( pin.get_key(), - bl)); + write_bl)); to_write.push_back(extent_to_write_t::create_zero( (object_data.get_reserved_data_base() + roundup_size).checked_to_laddr(), object_data.get_reserved_data_len() - roundup_size)); @@ -1181,44 +1193,45 @@ ObjectDataHandler::clear_ret ObjectDataHandler::trim_data_reservation( * get_to_writes_with_zero_buffer * * Returns extent_to_write_t's reflecting a zero region extending - * from offset~len with headptr optionally on the left and tailptr + * from offset~len with headbl optionally on the left and tailbl * optionally on the right. */ extent_to_write_list_t get_to_writes_with_zero_buffer( laddr_t data_base, const extent_len_t block_size, objaddr_t offset, extent_len_t len, - std::optional &&headptr, std::optional &&tailptr) + std::optional &&headbl, + std::optional &&tailbl) { auto zero_left = p2roundup(offset, (objaddr_t)block_size); auto zero_right = p2align(offset + len, (objaddr_t)block_size); - auto left = headptr ? (offset - headptr->length()) : offset; - auto right = tailptr ? - (offset + len + tailptr->length()) : + auto left = headbl ? (offset - headbl->length()) : offset; + auto right = tailbl ? + (offset + len + tailbl->length()) : (offset + len); assert( - (headptr && ((zero_left - left) == - p2roundup(headptr->length(), block_size))) ^ - (!headptr && (zero_left == left))); + (headbl && ((zero_left - left) == + p2roundup(headbl->length(), block_size))) ^ + (!headbl && (zero_left == left))); assert( - (tailptr && ((right - zero_right) == - p2roundup(tailptr->length(), block_size))) ^ - (!tailptr && (right == zero_right))); + (tailbl && ((right - zero_right) == + p2roundup(tailbl->length(), block_size))) ^ + (!tailbl && (right == zero_right))); assert(right > left); // zero region too small for a reserved section, - // headptr and tailptr in same extent + // headbl and tailbl in same extent if (zero_right <= zero_left) { bufferlist bl; - if (headptr) { - bl.append(*headptr); + if (headbl) { + bl.append(*headbl); } bl.append_zero( - right - left - bl.length() - (tailptr ? tailptr->length() : 0)); - if (tailptr) { - bl.append(*tailptr); + right - left - bl.length() - (tailbl ? tailbl->length() : 0)); + if (tailbl) { + bl.append(*tailbl); } assert(bl.length() % block_size == 0); assert(bl.length() == (right - left)); @@ -1227,16 +1240,16 @@ extent_to_write_list_t get_to_writes_with_zero_buffer( (data_base + left).checked_to_laddr(), bl)); return ret; } else { - // reserved section between ends, headptr and tailptr in different extents + // reserved section between ends, headbl and tailbl in different extents extent_to_write_list_t ret; - if (headptr) { - bufferlist headbl; - headbl.append(*headptr); - headbl.append_zero(zero_left - left - headbl.length()); - assert(headbl.length() % block_size == 0); - assert(headbl.length() > 0); + if (headbl) { + bufferlist head_zero_bl; + head_zero_bl.append(*headbl); + head_zero_bl.append_zero(zero_left - left - head_zero_bl.length()); + assert(head_zero_bl.length() % block_size == 0); + assert(head_zero_bl.length() > 0); ret.push_back(extent_to_write_t::create_data( - (data_base + left).checked_to_laddr(), headbl)); + (data_base + left).checked_to_laddr(), head_zero_bl)); } // reserved zero region ret.push_back(extent_to_write_t::create_zero( @@ -1244,14 +1257,14 @@ extent_to_write_list_t get_to_writes_with_zero_buffer( zero_right - zero_left)); assert(ret.back().len % block_size == 0); assert(ret.back().len > 0); - if (tailptr) { - bufferlist tailbl; - tailbl.append(*tailptr); - tailbl.append_zero(right - zero_right - tailbl.length()); - assert(tailbl.length() % block_size == 0); - assert(tailbl.length() > 0); + if (tailbl) { + bufferlist tail_zero_bl; + tail_zero_bl.append(*tailbl); + tail_zero_bl.append_zero(right - zero_right - tail_zero_bl.length()); + assert(tail_zero_bl.length() % block_size == 0); + assert(tail_zero_bl.length() > 0); ret.push_back(extent_to_write_t::create_data( - (data_base + zero_right).checked_to_laddr(), tailbl)); + (data_base + zero_right).checked_to_laddr(), tail_zero_bl)); } return ret; } @@ -1303,13 +1316,13 @@ ObjectDataHandler::write_ret ObjectDataHandler::overwrite( overwrite_plan ).si_then([ctx, data_base, len, offset, overwrite_plan, bl=std::move(bl), &to_write, &pins, this](auto p) mutable { - auto &[left_extent, headptr] = p; + auto &[left_extent, headbl] = p; if (left_extent) { ceph_assert(left_extent->addr == overwrite_plan.pin_begin); append_extent_to_write(to_write, std::move(*left_extent)); } - if (headptr) { - assert(headptr->length() > 0); + if (headbl) { + assert(headbl->length() > 0); } return operate_right( ctx, @@ -1318,19 +1331,19 @@ ObjectDataHandler::write_ret ObjectDataHandler::overwrite( ).si_then([ctx, data_base, len, offset, pin_begin=overwrite_plan.pin_begin, pin_end=overwrite_plan.pin_end, - bl=std::move(bl), headptr=std::move(headptr), + bl=std::move(bl), headbl=std::move(headbl), &to_write, &pins, this](auto p) mutable { - auto &[right_extent, tailptr] = p; + auto &[right_extent, tailbl] = p; if (bl.has_value()) { auto write_offset = offset; bufferlist write_bl; - if (headptr) { - write_bl.append(*headptr); - write_offset = write_offset - headptr->length(); + if (headbl) { + write_bl.append(*headbl); + write_offset = write_offset - headbl->length(); } write_bl.claim_append(*bl); - if (tailptr) { - write_bl.append(*tailptr); + if (tailbl) { + write_bl.append(*tailbl); assert_aligned(write_bl.length()); } splice_extent_to_write( @@ -1344,8 +1357,8 @@ ObjectDataHandler::write_ret ObjectDataHandler::overwrite( ctx.tm.get_block_size(), offset, len, - std::move(headptr), - std::move(tailptr))); + std::move(headbl), + std::move(tailbl))); } if (right_extent) { ceph_assert(right_extent->get_end_addr() == pin_end); diff --git a/src/crimson/os/seastore/transaction_manager.h b/src/crimson/os/seastore/transaction_manager.h index 8f2870a6a27..c9ce635ee8c 100644 --- a/src/crimson/os/seastore/transaction_manager.h +++ b/src/crimson/os/seastore/transaction_manager.h @@ -156,30 +156,36 @@ public: return maybe_indirect_info.has_value(); } - ceph::bufferptr get_bptr() const { + ceph::bufferlist get_bl() const { if (is_indirect()) { - return ceph::bufferptr( - extent->get_bptr(), - maybe_indirect_info->intermediate_offset, - maybe_indirect_info->length); + return do_get_indirect_range(0, maybe_indirect_info->length); } else { - return extent->get_bptr(); + assert(extent->is_fully_loaded()); + bufferlist bl; + bl.append(extent->get_bptr()); + return bl; } } ceph::bufferlist get_range( extent_len_t offset, extent_len_t length) const { if (is_indirect()) { - assert(maybe_indirect_info->intermediate_offset + offset + length <= - extent->get_length()); - assert(offset + length <= maybe_indirect_info->length); - return extent->get_range( - maybe_indirect_info->intermediate_offset + offset, - length); + return do_get_indirect_range(offset, length); } else { return extent->get_range(offset, length); } } + private: + ceph::bufferlist do_get_indirect_range( + extent_len_t offset, extent_len_t length) const { + assert(is_indirect()); + assert(maybe_indirect_info->intermediate_offset + offset + length <= + extent->get_length()); + assert(offset + length <= maybe_indirect_info->length); + return extent->get_range( + maybe_indirect_info->intermediate_offset + offset, + length); + } }; /** -- 2.39.5