From: Samuel Just Date: Sat, 10 Apr 2021 01:04:59 +0000 (-0700) Subject: crimson/os/seastore/object_data_handler: add logic for handling object data X-Git-Tag: v17.1.0~2198^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=2b50b23cc158019cbcd044462ba0994d0268cea3;p=ceph.git crimson/os/seastore/object_data_handler: add logic for handling object data Signed-off-by: Samuel Just --- diff --git a/src/crimson/os/seastore/CMakeLists.txt b/src/crimson/os/seastore/CMakeLists.txt index a28fd55131be..9b50b345adf4 100644 --- a/src/crimson/os/seastore/CMakeLists.txt +++ b/src/crimson/os/seastore/CMakeLists.txt @@ -30,6 +30,7 @@ add_library(crimson-seastore STATIC collection_manager.cc collection_manager/flat_collection_manager.cc collection_manager/collection_flat_node.cc + object_data_handler.cc seastore.cc ../../../test/crimson/seastore/test_block.cc ${PROJECT_SOURCE_DIR}/src/os/Transaction.cc diff --git a/src/crimson/os/seastore/cache.cc b/src/crimson/os/seastore/cache.cc index bcf2e5165e02..4745b7d14f15 100644 --- a/src/crimson/os/seastore/cache.cc +++ b/src/crimson/os/seastore/cache.cc @@ -8,6 +8,7 @@ #include "crimson/os/seastore/collection_manager/collection_flat_node.h" #include "crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.h" #include "crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.h" +#include "crimson/os/seastore/object_data_handler.h" #include "crimson/os/seastore/collection_manager/collection_flat_node.h" #include "crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.h" #include "test/crimson/seastore/test_block.h" @@ -149,6 +150,8 @@ CachedExtentRef Cache::alloc_new_extent_by_type( return alloc_new_extent(t, length); case extent_types_t::COLL_BLOCK: return alloc_new_extent(t, length); + case extent_types_t::OBJECT_DATA_BLOCK: + return alloc_new_extent(t, length); case extent_types_t::TEST_BLOCK: return alloc_new_extent(t, length); case extent_types_t::TEST_BLOCK_PHYSICAL: @@ -544,6 +547,11 @@ Cache::get_extent_ertr::future Cache::get_extent_by_type( ).safe_then([](auto extent) { return CachedExtentRef(extent.detach(), false /* add_ref */); }); + case extent_types_t::OBJECT_DATA_BLOCK: + return get_extent(offset, length + ).safe_then([](auto extent) { + return CachedExtentRef(extent.detach(), false /* add_ref */); + }); case extent_types_t::TEST_BLOCK: return get_extent(offset, length ).safe_then([](auto extent) { diff --git a/src/crimson/os/seastore/object_data_handler.cc b/src/crimson/os/seastore/object_data_handler.cc new file mode 100644 index 000000000000..59f2f5462a94 --- /dev/null +++ b/src/crimson/os/seastore/object_data_handler.cc @@ -0,0 +1,536 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include +#include + +#include "crimson/common/log.h" + +#include "crimson/os/seastore/object_data_handler.h" + +namespace { + seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_filestore); + } +} + +namespace crimson::os::seastore { + +/** + * MAX_OBJECT_SIZE + * + * For now, we allocate a fixed region of laddr space of size MAX_OBJECT_SIZE + * for any object. In the future, once we have the ability to remap logical + * mappings (necessary for clone), we'll add the ability to grow and shrink + * these regions and remove this assumption. + */ +static constexpr extent_len_t MAX_OBJECT_SIZE = 16<<20; +#define assert_aligned(x) ceph_assert(((x)%ctx.tm.get_block_size()) == 0) + +using context_t = ObjectDataHandler::context_t; +using get_ertr = ObjectDataHandler::write_ertr; + +auto read_pin( + context_t ctx, + LBAPinRef pin) { + return ctx.tm.pin_to_extent( + ctx.t, + std::move(pin) + ).handle_error( + get_ertr::pass_further{}, + crimson::ct_error::assert_all{ "read_pin: invalid error" } + ); +} + +/** + * extent_to_write_t + * + * Encapsulates extents to be written out using do_insertions. + * Indicates a zero extent or a data extent based on whether + * to_write is populate. + */ +struct extent_to_write_t { + laddr_t addr = L_ADDR_NULL; + extent_len_t len; + std::optional to_write; + + extent_to_write_t() = default; + extent_to_write_t(const extent_to_write_t &) = default; + extent_to_write_t(extent_to_write_t &&) = default; + + extent_to_write_t(laddr_t addr, bufferlist to_write) + : addr(addr), len(to_write.length()), to_write(to_write) {} + + extent_to_write_t(laddr_t addr, extent_len_t len) + : addr(addr), len(len) {} +}; +using extent_to_write_list_t = std::list; + +/// Removes extents/mappings in pins +ObjectDataHandler::write_ret do_removals( + context_t ctx, + lba_pin_list_t &pins) +{ + return crimson::do_for_each( + pins.begin(), + pins.end(), + [ctx](auto &pin) { + return ctx.tm.dec_ref( + ctx.t, + pin->get_laddr() + ).safe_then( + [](auto){}, + ObjectDataHandler::write_ertr::pass_further{}, + crimson::ct_error::assert_all{ + "object_data_handler::do_removals invalid error" + } + ); + }); +} + +/// Creates zero/data extents in to_write +ObjectDataHandler::write_ret do_insertions( + context_t ctx, + extent_to_write_list_t &to_write) +{ + return crimson::do_for_each( + to_write.begin(), + to_write.end(), + [ctx](auto ®ion) { + if (region.to_write) { + assert_aligned(region.addr); + assert_aligned(region.len); + ceph_assert(region.len == region.to_write->length()); + return ctx.tm.alloc_extent( + ctx.t, + region.addr, + region.len + ).safe_then([ctx, ®ion](auto extent) { + if (extent->get_laddr() != region.addr) { + logger().debug( + "object_data_handler::do_insertions alloc got addr {}," + " should have been {}", + extent->get_laddr(), + region.addr); + } + ceph_assert(extent->get_laddr() == region.addr); + ceph_assert(extent->get_length() == region.len); + auto iter = region.to_write->cbegin(); + iter.copy(region.len, extent->get_bptr().c_str()); + return ObjectDataHandler::write_ertr::now(); + }); + } else { + return ctx.tm.reserve_region( + ctx.t, + region.addr, + region.len + ).safe_then([®ion](auto pin) { + ceph_assert(pin->get_length() == region.len); + ceph_assert(pin->get_laddr() == region.addr); + return ObjectDataHandler::write_ertr::now(); + }); + } + }); +} + +/** + * split_pin_left + * + * Splits the passed pin returning aligned extent to be rewritten + * to the left (if a zero extent), tail to be prepended to write + * beginning at offset. See below for details. + */ +using split_ret_bare = std::pair< + std::optional, + std::optional>; +using split_ret = get_ertr::future; +split_ret split_pin_left(context_t ctx, LBAPinRef &pin, laddr_t offset) +{ + const auto pin_offset = pin->get_laddr(); + assert_aligned(pin_offset); + ceph_assert(offset >= pin_offset); + if (offset == pin_offset) { + // Aligned, no tail and no extra extent + return get_ertr::make_ready_future( + std::nullopt, + std::nullopt); + } else if (pin->get_paddr().is_zero()) { + /* Zero extent unaligned, return largest aligned zero extent to + * the left and the gap between aligned_offset and offset to prepend. */ + auto aligned_offset = p2align(offset, (uint64_t)ctx.tm.get_block_size()); + assert_aligned(aligned_offset); + ceph_assert(aligned_offset <= offset); + auto zero_extent_len = aligned_offset - pin_offset; + assert_aligned(zero_extent_len); + auto zero_prepend_len = offset - aligned_offset; + return get_ertr::make_ready_future( + (zero_extent_len == 0 + ? std::nullopt + : std::make_optional(extent_to_write_t(pin_offset, zero_extent_len))), + bufferptr(ceph::buffer::create(zero_prepend_len, 0)) + ); + } else { + // Data, return up to offset to prepend + auto to_prepend = offset - pin->get_laddr(); + return read_pin(ctx, pin->duplicate() + ).safe_then([to_prepend](auto extent) { + return get_ertr::make_ready_future( + std::nullopt, + bufferptr(extent->get_bptr(), 0, to_prepend)); + }); + } +}; + +/// Reverse of split_pin_left +split_ret split_pin_right(context_t ctx, LBAPinRef &pin, laddr_t end) +{ + const auto pin_begin = pin->get_laddr(); + const auto pin_end = pin->get_laddr() + pin->get_length(); + assert_aligned(pin_end); + ceph_assert(pin_end >= end); + if (end == pin_end) { + return get_ertr::make_ready_future( + std::nullopt, + std::nullopt); + } else if (pin->get_paddr().is_zero()) { + auto aligned_end = p2roundup(end, (uint64_t)ctx.tm.get_block_size()); + assert_aligned(aligned_end); + ceph_assert(aligned_end >= end); + auto zero_suffix_len = aligned_end - end; + auto zero_extent_len = pin_end - aligned_end; + assert_aligned(zero_extent_len); + return get_ertr::make_ready_future( + (zero_extent_len == 0 + ? std::nullopt + : std::make_optional(extent_to_write_t(aligned_end, zero_extent_len))), + bufferptr(ceph::buffer::create(zero_suffix_len, 0)) + ); + } else { + return read_pin(ctx, pin->duplicate() + ).safe_then([end, pin_begin, pin_end](auto extent) { + return get_ertr::make_ready_future( + std::nullopt, + bufferptr( + extent->get_bptr(), + end - pin_begin, + pin_end - end)); + }); + } +}; + +template +auto with_object_data( + ObjectDataHandler::context_t ctx, + F &&f) +{ + return seastar::do_with( + ctx.onode.get_layout().object_data.get(), + std::forward(f), + [ctx](auto &object_data, auto &f) { + return std::invoke(f, object_data + ).safe_then([ctx, &object_data] { + if (object_data.must_update()) { + ctx.onode.get_mutable_layout(ctx.t).object_data.update(object_data); + } + return seastar::now(); + }); + }); +} + +ObjectDataHandler::write_ret ObjectDataHandler::prepare_data_reservation( + context_t ctx, + object_data_t &object_data, + extent_len_t size) +{ + ceph_assert(size <= MAX_OBJECT_SIZE); + if (!object_data.is_null()) { + ceph_assert(object_data.get_reserved_data_len() == MAX_OBJECT_SIZE); + return write_ertr::now(); + } else { + return ctx.tm.reserve_region( + ctx.t, + 0 /* TODO -- pass hint based on object hash */, + MAX_OBJECT_SIZE + ).safe_then([size, &object_data](auto pin) { + ceph_assert(pin->get_length() == MAX_OBJECT_SIZE); + object_data.update_reserved( + pin->get_laddr(), + pin->get_length()); + return write_ertr::now(); + }); + } +} + +ObjectDataHandler::clear_ret ObjectDataHandler::trim_data_reservation( + context_t ctx, object_data_t &object_data, extent_len_t size) +{ + ceph_assert(!object_data.is_null()); + assert_aligned(size); + ceph_assert(size <= object_data.get_reserved_data_len()); + return seastar::do_with( + lba_pin_list_t(), + extent_to_write_list_t(), + [this, ctx, size, &object_data](auto &pins, auto &to_write) { + return ctx.tm.get_pins( + ctx.t, + object_data.get_reserved_data_base() + size, + object_data.get_reserved_data_len() - size + ).safe_then([this, ctx, size, &pins, &object_data, &to_write](auto _pins) { + _pins.swap(pins); + ceph_assert(pins.size()); + auto &pin = *pins.front(); + ceph_assert(pin.get_laddr() >= object_data.get_reserved_data_base()); + ceph_assert( + pin.get_laddr() <= object_data.get_reserved_data_base() + size); + auto pin_offset = pin.get_laddr() - + object_data.get_reserved_data_base(); + if (pin.get_paddr().is_zero()) { + to_write.emplace_back( + pin.get_laddr(), + object_data.get_reserved_data_len() - pin_offset); + return clear_ertr::now(); + } else { + return read_pin( + ctx, + pin.duplicate() + ).safe_then([ctx, size, pin_offset, &pin, &object_data, &to_write]( + auto extent) { + bufferlist bl; + bl.append( + bufferptr( + extent->get_bptr(), + 0, + size - pin_offset + )); + to_write.emplace_back( + pin.get_laddr(), + bl); + to_write.emplace_back( + object_data.get_reserved_data_base() + size, + object_data.get_reserved_data_len() - size); + return clear_ertr::now(); + }); + } + }).safe_then([ctx, size, &pins] { + return do_removals(ctx, pins); + }).safe_then([ctx, size, &to_write] { + return do_insertions(ctx, to_write); + }).safe_then([size, &object_data] { + if (size == 0) { + object_data.clear(); + } + return ObjectDataHandler::clear_ertr::now(); + }); + }); +} + +/** + * get_buffers + * + * Returns extent_to_write_t's from bl. + * + * TODO: probably add some kind of upper limit on extent size. + */ +extent_to_write_list_t get_buffers(laddr_t offset, bufferlist &bl) +{ + auto ret = extent_to_write_list_t(); + ret.emplace_back(offset, bl); + return ret; +}; + +ObjectDataHandler::write_ret ObjectDataHandler::overwrite( + context_t ctx, + laddr_t _offset, + bufferlist &&bl, + lba_pin_list_t &&_pins) +{ + return seastar::do_with( + _offset, + std::move(bl), + std::move(_pins), + extent_to_write_list_t(), + [this, ctx](laddr_t &offset, auto &bl, auto &pins, auto &to_write) { + ceph_assert(pins.size() >= 1); + auto pin_begin = pins.front()->get_laddr(); + ceph_assert(pin_begin <= offset); + auto pin_end = pins.back()->get_laddr() + pins.back()->get_length(); + ceph_assert(pin_end >= (offset > bl.length())); + + return split_pin_left( + ctx, + pins.front(), + offset + ).safe_then([this, ctx, pin_begin, &offset, &bl, &pins, &to_write]( + auto p) { + auto &[left_extent, headptr] = p; + if (left_extent) { + ceph_assert(left_extent->addr == pin_begin); + to_write.push_front(std::move(*left_extent)); + } + if (headptr) { + bufferlist newbl; + newbl.append(*headptr); + newbl.append(bl); + bl.swap(newbl); + offset -= headptr->length(); + assert_aligned(offset); + } + return split_pin_right( + ctx, + pins.back(), + offset + bl.length()); + }).safe_then([this, ctx, pin_end, &offset, &bl, &pins, &to_write]( + auto p) { + auto &[right_extent, tailptr] = p; + if (tailptr) { + bl.append(*tailptr); + assert_aligned(bl.length()); + } + to_write.splice(to_write.end(), get_buffers(offset, bl)); + if (right_extent) { + ceph_assert((right_extent->addr + right_extent->len) == pin_end); + to_write.push_back(std::move(*right_extent)); + } + return write_ertr::now(); + }).safe_then([this, ctx, &pins] { + return do_removals(ctx, pins); + }).safe_then([this, ctx, &to_write] { + return do_insertions(ctx, to_write); + }); + }); +} + +ObjectDataHandler::write_ret ObjectDataHandler::write( + context_t ctx, + objaddr_t offset, + const bufferlist &bl) +{ + return with_object_data( + ctx, + [this, ctx, offset, &bl](auto &object_data) { + return prepare_data_reservation( + ctx, + object_data, + p2roundup(offset + bl.length(), ctx.tm.get_block_size()) + ).safe_then([this, ctx, offset, &object_data, &bl] { + auto logical_offset = object_data.get_reserved_data_base() + offset; + return ctx.tm.get_pins( + ctx.t, + logical_offset, + bl.length() + ).safe_then([this, ctx, offset, logical_offset, &object_data, &bl]( + auto pins) { + return overwrite(ctx, logical_offset, bufferlist(bl), std::move(pins)); + }); + }); + }); +} + +ObjectDataHandler::read_ret ObjectDataHandler::read( + context_t ctx, + objaddr_t obj_offset, + extent_len_t len) +{ + return seastar::do_with( + bufferlist(), + [this, ctx, obj_offset, len](auto &ret) { + return with_object_data( + ctx, + [this, ctx, obj_offset, len, &ret](const auto &object_data) { + /* Assumption: callers ensure that onode size is <= reserved + * size and that len is adjusted here prior to call */ + ceph_assert(!object_data.is_null()); + ceph_assert((obj_offset + len) <= object_data.get_reserved_data_len()); + ceph_assert(len > 0); + laddr_t loffset = + object_data.get_reserved_data_base() + obj_offset; + return ctx.tm.get_pins( + ctx.t, + loffset, + len + ).safe_then([this, ctx, loffset, len, &ret](auto _pins) { + // offset~len falls within reserved region and len > 0 + ceph_assert(_pins.size() >= 1); + ceph_assert((*_pins.begin())->get_laddr() <= loffset); + return seastar::do_with( + std::move(_pins), + loffset, + [this, ctx, loffset, len, &ret](auto &pins, auto ¤t) { + return crimson::do_for_each( + std::begin(pins), + std::end(pins), + [this, ctx, loffset, len, ¤t, &ret](auto &pin) + -> read_ertr::future<> { + ceph_assert(current <= (loffset + len)); + ceph_assert( + (loffset + len) > pin->get_laddr()); + laddr_t end = std::min( + pin->get_laddr() + pin->get_length(), + loffset + len); + if (pin->get_paddr().is_zero()) { + ceph_assert(end > current); // See LBAManager::get_mappings + ret.append_zero(end - current); + current = end; + return seastar::now(); + } else { + return ctx.tm.pin_to_extent( + ctx.t, + std::move(pin) + ).safe_then([&ret, ¤t, end](auto extent) { + ceph_assert( + (extent->get_laddr() + extent->get_length()) >= end); + ceph_assert(end > current); + ret.append( + bufferptr( + extent->get_bptr(), + current - extent->get_laddr(), + end - current)); + current = end; + return seastar::now(); + }).handle_error( + read_ertr::pass_further{}, + crimson::ct_error::assert_all{ + "ObjectDataHandler::read hit invalid error" + } + ); + } + }); + }); + }); + }).safe_then([&ret] { + return std::move(ret); + }); + }); +} + +ObjectDataHandler::truncate_ret ObjectDataHandler::truncate( + context_t ctx, + objaddr_t offset) +{ + return with_object_data( + ctx, + [this, ctx, offset](auto &object_data) { + if (offset < object_data.get_reserved_data_len()) { + return trim_data_reservation(ctx, object_data, offset); + } else if (offset > object_data.get_reserved_data_len()) { + return prepare_data_reservation( + ctx, + object_data, + offset); + } else { + return truncate_ertr::now(); + } + }); +} + +ObjectDataHandler::clear_ret ObjectDataHandler::clear( + context_t ctx) +{ + return with_object_data( + ctx, + [this, ctx](auto &object_data) { + return trim_data_reservation(ctx, object_data, 0); + }); +} + +} diff --git a/src/crimson/os/seastore/object_data_handler.h b/src/crimson/os/seastore/object_data_handler.h new file mode 100644 index 000000000000..c7dc078fc4b5 --- /dev/null +++ b/src/crimson/os/seastore/object_data_handler.h @@ -0,0 +1,109 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include +#include + +#include "include/buffer.h" + +#include "test/crimson/seastore/test_block.h" // TODO + +#include "crimson/os/seastore/onode.h" +#include "crimson/os/seastore/transaction_manager.h" +#include "crimson/os/seastore/transaction.h" + +namespace crimson::os::seastore { + +struct ObjectDataBlock : crimson::os::seastore::LogicalCachedExtent { + using Ref = TCachedExtentRef; + + ObjectDataBlock(ceph::bufferptr &&ptr) + : LogicalCachedExtent(std::move(ptr)) {} + ObjectDataBlock(const ObjectDataBlock &other) + : LogicalCachedExtent(other) {} + + CachedExtentRef duplicate_for_write() final { + return CachedExtentRef(new ObjectDataBlock(*this)); + }; + + static constexpr extent_types_t TYPE = extent_types_t::OBJECT_DATA_BLOCK; + extent_types_t get_type() const final { + return TYPE; + } + + ceph::bufferlist get_delta() final { + /* Currently, we always allocate fresh ObjectDataBlock's rather than + * mutating existing ones. */ + ceph_assert(0 == "Should be impossible"); + } + + void apply_delta(const ceph::bufferlist &bl) final { + // See get_delta() + ceph_assert(0 == "Should be impossible"); + } +}; +using ObjectDataBlockRef = TCachedExtentRef; + +class ObjectDataHandler { +public: + using base_ertr = TransactionManager::base_ertr; + + struct context_t { + TransactionManager &tm; + Transaction &t; + Onode &onode; + }; + + /// Writes bl to [offset, offset + bl.length()) + using write_ertr = base_ertr; + using write_ret = write_ertr::future<>; + write_ret write( + context_t ctx, + objaddr_t offset, + const bufferlist &bl); + + /// Reads data in [offset, offset + len) + using read_ertr = base_ertr; + using read_ret = read_ertr::future; + read_ret read( + context_t ctx, + objaddr_t offset, + extent_len_t len); + + /// Clears data past offset + using truncate_ertr = base_ertr; + using truncate_ret = truncate_ertr::future<>; + truncate_ret truncate( + context_t ctx, + objaddr_t offset); + + /// Clears data and reservation + using clear_ertr = base_ertr; + using clear_ret = clear_ertr::future<>; + clear_ret clear(context_t ctx); + +private: + /// Updates region [_offset, _offset + bl.length) to bl + write_ret overwrite( + context_t ctx, ///< [in] ctx + laddr_t offset, ///< [in] write offset + bufferlist &&bl, ///< [in] buffer to write + lba_pin_list_t &&pins ///< [in] set of pins overlapping above region + ); + + /// Ensures object_data reserved region is prepared + write_ret prepare_data_reservation( + context_t ctx, + object_data_t &object_data, + extent_len_t size); + + /// Trims data past size + clear_ret trim_data_reservation( + context_t ctx, + object_data_t &object_data, + extent_len_t size); +}; + +} diff --git a/src/crimson/os/seastore/onode.h b/src/crimson/os/seastore/onode.h index 3b9dd383a90b..21c0fba8aa55 100644 --- a/src/crimson/os/seastore/onode.h +++ b/src/crimson/os/seastore/onode.h @@ -16,6 +16,8 @@ namespace crimson::os::seastore { struct onode_layout_t { ceph_le32 size{0}; omap_root_le_t omap_root; + + object_data_le_t object_data; } __attribute__((packed)); class Transaction; diff --git a/src/crimson/os/seastore/seastore_types.cc b/src/crimson/os/seastore/seastore_types.cc index f5a5debbb6c0..a651d23cd909 100644 --- a/src/crimson/os/seastore/seastore_types.cc +++ b/src/crimson/os/seastore/seastore_types.cc @@ -63,6 +63,8 @@ std::ostream &operator<<(std::ostream &out, extent_types_t t) return out << "OMAP_LEAF"; case extent_types_t::COLL_BLOCK: return out << "COLL_BLOCK"; + case extent_types_t::OBJECT_DATA_BLOCK: + return out << "OBJECT_DATA_BLOCK"; case extent_types_t::TEST_BLOCK: return out << "TEST_BLOCK"; case extent_types_t::TEST_BLOCK_PHYSICAL: diff --git a/src/crimson/os/seastore/seastore_types.h b/src/crimson/os/seastore/seastore_types.h index d6c78a400a3f..de6485ba4b01 100644 --- a/src/crimson/os/seastore/seastore_types.h +++ b/src/crimson/os/seastore/seastore_types.h @@ -316,6 +316,7 @@ enum class extent_types_t : uint8_t { OMAP_LEAF = 5, ONODE_BLOCK_STAGED = 6, COLL_BLOCK = 7, + OBJECT_DATA_BLOCK = 8, // Test Block Types TEST_BLOCK = 0xF0, @@ -395,6 +396,71 @@ struct record_t { std::vector deltas; }; +class object_data_t { + laddr_t reserved_data_base = L_ADDR_NULL; + extent_len_t reserved_data_len = 0; + + bool dirty = false; +public: + object_data_t( + laddr_t reserved_data_base, + extent_len_t reserved_data_len) + : reserved_data_base(reserved_data_base), + reserved_data_len(reserved_data_len) {} + + laddr_t get_reserved_data_base() const { + return reserved_data_base; + } + + extent_len_t get_reserved_data_len() const { + return reserved_data_len; + } + + bool is_null() const { + return reserved_data_base == L_ADDR_NULL; + } + + bool must_update() const { + return dirty; + } + + void update_reserved( + laddr_t base, + extent_len_t len) { + dirty = true; + reserved_data_base = base; + reserved_data_len = len; + } + + void update_len( + extent_len_t len) { + dirty = true; + reserved_data_len = len; + } + + void clear() { + dirty = true; + reserved_data_base = L_ADDR_NULL; + reserved_data_len = 0; + } +}; + +struct __attribute__((packed)) object_data_le_t { + laddr_le_t reserved_data_base = laddr_le_t(L_ADDR_NULL); + extent_len_le_t reserved_data_len = init_extent_len_le(0); + + void update(const object_data_t &nroot) { + reserved_data_base = nroot.get_reserved_data_base(); + reserved_data_len = init_extent_len_le(nroot.get_reserved_data_len()); + } + + object_data_t get() const { + return object_data_t( + reserved_data_base, + reserved_data_len); + } +}; + struct omap_root_t { laddr_t addr = L_ADDR_NULL; depth_t depth = 0; diff --git a/src/test/crimson/seastore/CMakeLists.txt b/src/test/crimson/seastore/CMakeLists.txt index 153a3fbcd12f..f1585bf0ec08 100644 --- a/src/test/crimson/seastore/CMakeLists.txt +++ b/src/test/crimson/seastore/CMakeLists.txt @@ -38,6 +38,18 @@ target_link_libraries( crimson::gtest crimson-seastore) +add_executable(unittest-object-data-handler + test_object_data_handler.cc + ../gtest_seastar.cc) +add_ceph_unittest(unittest-object-data-handler + --memory 256M --smp 1) +target_link_libraries( + unittest-object-data-handler + crimson::gtest + crimson-seastore + crimson-os + crimson-common) + add_executable(unittest-collection-manager test_collection_manager.cc ../gtest_seastar.cc) diff --git a/src/test/crimson/seastore/test_object_data_handler.cc b/src/test/crimson/seastore/test_object_data_handler.cc new file mode 100644 index 000000000000..45e941ea8d20 --- /dev/null +++ b/src/test/crimson/seastore/test_object_data_handler.cc @@ -0,0 +1,300 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "test/crimson/gtest_seastar.h" +#include "test/crimson/seastore/transaction_manager_test_state.h" + +#include "crimson/os/seastore/onode.h" +#include "crimson/os/seastore/object_data_handler.h" + +using namespace crimson; +using namespace crimson::os; +using namespace crimson::os::seastore; + +namespace { + [[maybe_unused]] seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_test); + } +} + +class TestOnode : public Onode { + onode_layout_t layout; + bool dirty = false; + +public: + const onode_layout_t &get_layout() const final { + return layout; + } + onode_layout_t &get_mutable_layout(Transaction &t) final { + dirty = true; + return layout; + } + bool is_dirty() const { return dirty; } + ~TestOnode() final = default; +}; + +struct object_data_handler_test_t: + public seastar_test_suite_t, + TMTestState { + OnodeRef onode; + + bufferptr known_contents; + extent_len_t size = 0; + + object_data_handler_test_t() {} + + auto submit_transaction(TransactionRef &&t) { + return tm->submit_transaction(std::move(t) + ).safe_then([this] { + return segment_cleaner->run_until_halt(); + }); + } + + void write(Transaction &t, objaddr_t offset, extent_len_t len, char fill) { + ceph_assert(offset + len <= known_contents.length()); + size = std::max(size, offset + len); + memset( + known_contents.c_str() + offset, + fill, + len); + bufferlist bl; + bl.append( + bufferptr( + known_contents, + offset, + len)); + return ObjectDataHandler().write( + ObjectDataHandler::context_t{ + *tm, + t, + *onode, + }, + offset, + bl).unsafe_get0(); + } + void write(objaddr_t offset, extent_len_t len, char fill) { + auto t = tm->create_transaction(); + write(*t, offset, len, fill); + return submit_transaction(std::move(t)).unsafe_get0(); + } + + void truncate(Transaction &t, objaddr_t offset) { + if (size > offset) { + memset( + known_contents.c_str() + offset, + 0, + size - offset); + ObjectDataHandler().truncate( + ObjectDataHandler::context_t{ + *tm, + t, + *onode + }, + offset).unsafe_get0(); + } + size = offset; + } + void truncate(objaddr_t offset) { + auto t = tm->create_transaction(); + truncate(*t, offset); + return submit_transaction(std::move(t)).unsafe_get0(); + } + + void read(Transaction &t, objaddr_t offset, extent_len_t len) { + bufferlist bl = ObjectDataHandler().read( + ObjectDataHandler::context_t{ + *tm, + t, + *onode + }, + offset, + len).unsafe_get0(); + bufferlist known; + known.append( + bufferptr( + known_contents, + offset, + len)); + EXPECT_EQ(bl.length(), known.length()); + EXPECT_EQ(bl, known); + } + void read(objaddr_t offset, extent_len_t len) { + auto t = tm->create_transaction(); + read(*t, offset, len); + } + void read_near(objaddr_t offset, extent_len_t len, extent_len_t fuzz) { + auto fuzzes = std::vector{-1 * (int32_t)fuzz, 0, (int32_t)fuzz}; + for (auto left_fuzz : fuzzes) { + for (auto right_fuzz : fuzzes) { + read(offset + left_fuzz, len - left_fuzz + right_fuzz); + } + } + } + + seastar::future<> set_up_fut() final { + onode = new TestOnode{}; + known_contents = buffer::create(4<<20 /* 4MB */); + size = 0; + return tm_setup(); + } + + seastar::future<> tear_down_fut() final { + onode.reset(); + size = 0; + return tm_teardown(); + } +}; + +TEST_F(object_data_handler_test_t, single_write) +{ + run_async([this] { + write(1<<20, 8<<10, 'c'); + + read_near(1<<20, 8<<10, 1); + read_near(1<<20, 8<<10, 512); + }); +} + +TEST_F(object_data_handler_test_t, multi_write) +{ + run_async([this] { + write((1<<20) - (4<<10), 4<<10, 'a'); + write(1<<20, 4<<10, 'b'); + write((1<<20) + (4<<10), 4<<10, 'c'); + + read_near(1<<20, 4<<10, 1); + read_near(1<<20, 4<<10, 512); + + read_near((1<<20)-(4<<10), 12<<10, 1); + read_near((1<<20)-(4<<10), 12<<10, 512); + }); +} + +TEST_F(object_data_handler_test_t, write_hole) +{ + run_async([this] { + write((1<<20) - (4<<10), 4<<10, 'a'); + // hole at 1<<20 + write((1<<20) + (4<<10), 4<<10, 'c'); + + read_near(1<<20, 4<<10, 1); + read_near(1<<20, 4<<10, 512); + + read_near((1<<20)-(4<<10), 12<<10, 1); + read_near((1<<20)-(4<<10), 12<<10, 512); + }); +} + +TEST_F(object_data_handler_test_t, overwrite_single) +{ + run_async([this] { + write((1<<20), 4<<10, 'a'); + write((1<<20), 4<<10, 'c'); + + read_near(1<<20, 4<<10, 1); + read_near(1<<20, 4<<10, 512); + }); +} + +TEST_F(object_data_handler_test_t, overwrite_double) +{ + run_async([this] { + write((1<<20), 4<<10, 'a'); + write((1<<20)+(4<<10), 4<<10, 'c'); + write((1<<20), 8<<10, 'b'); + + read_near(1<<20, 8<<10, 1); + read_near(1<<20, 8<<10, 512); + + read_near(1<<20, 4<<10, 1); + read_near(1<<20, 4<<10, 512); + + read_near((1<<20) + (4<<10), 4<<10, 1); + read_near((1<<20) + (4<<10), 4<<10, 512); + }); +} + +TEST_F(object_data_handler_test_t, overwrite_partial) +{ + run_async([this] { + write((1<<20), 12<<10, 'a'); + read_near(1<<20, 12<<10, 1); + + write((1<<20)+(8<<10), 4<<10, 'b'); + read_near(1<<20, 12<<10, 1); + + write((1<<20)+(4<<10), 4<<10, 'c'); + read_near(1<<20, 12<<10, 1); + + write((1<<20), 4<<10, 'd'); + + read_near(1<<20, 12<<10, 1); + read_near(1<<20, 12<<10, 512); + + read_near(1<<20, 4<<10, 1); + read_near(1<<20, 4<<10, 512); + + read_near((1<<20) + (4<<10), 4<<10, 1); + read_near((1<<20) + (4<<10), 4<<10, 512); + }); +} + +TEST_F(object_data_handler_test_t, unaligned_write) +{ + run_async([this] { + objaddr_t base = 1<<20; + write(base, (4<<10)+(1<<10), 'a'); + read_near(base-(4<<10), 12<<10, 512); + + base = (1<<20) + (64<<10); + write(base+(1<<10), (4<<10)+(1<<10), 'b'); + read_near(base-(4<<10), 12<<10, 512); + + base = (1<<20) + (128<<10); + write(base-(1<<10), (4<<10)+(2<<20), 'c'); + read_near(base-(4<<10), 12<<10, 512); + }); +} + +TEST_F(object_data_handler_test_t, unaligned_overwrite) +{ + run_async([this] { + objaddr_t base = 1<<20; + write(base, (128<<10) + (16<<10), 'x'); + + write(base, (4<<10)+(1<<10), 'a'); + read_near(base-(4<<10), 12<<10, 2<<10); + + base = (1<<20) + (64<<10); + write(base+(1<<10), (4<<10)+(1<<10), 'b'); + read_near(base-(4<<10), 12<<10, 2<<10); + + base = (1<<20) + (128<<10); + write(base-(1<<10), (4<<10)+(2<<20), 'c'); + read_near(base-(4<<10), 12<<10, 2<<10); + + read(base, (128<<10) + (16<<10)); + }); +} + +TEST_F(object_data_handler_test_t, truncate) +{ + run_async([this] { + objaddr_t base = 1<<20; + write(base, 8<<10, 'a'); + write(base+(8<<10), 8<<10, 'b'); + write(base+(16<<10), 8<<10, 'c'); + + truncate(base + (32<<10)); + read(base, 64<<10); + + truncate(base + (24<<10)); + read(base, 64<<10); + + truncate(base + (12<<10)); + read(base, 64<<10); + + truncate(base - (12<<10)); + read(base, 64<<10); + }); +}