From 86107a4af0330e584384f68bf27048b83f4fc63f Mon Sep 17 00:00:00 2001 From: Xuehan Xu Date: Wed, 13 Aug 2025 11:49:23 +0800 Subject: [PATCH] crimson/os/seastore: do "copy_on_write" if the to-be-modified object needs it At present, only clone objects may need COW, as HEAD objects won't be sharing any direct lba mapping with other objects in non-recovery scenarios. Although the HEAD object may share its direct mappings with the temp object that's going to be recovered and replace it, it won't be accepting any modifications at that time. Signed-off-by: Xuehan Xu --- .../os/seastore/object_data_handler.cc | 30 ++++++++ src/crimson/os/seastore/object_data_handler.h | 2 + src/crimson/os/seastore/seastore.cc | 77 ++++++++++++------- 3 files changed, 80 insertions(+), 29 deletions(-) diff --git a/src/crimson/os/seastore/object_data_handler.cc b/src/crimson/os/seastore/object_data_handler.cc index 4fb7fb98b72a2..e99a56bcbdb8d 100644 --- a/src/crimson/os/seastore/object_data_handler.cc +++ b/src/crimson/os/seastore/object_data_handler.cc @@ -1383,6 +1383,36 @@ ObjectDataHandler::clear_ret ObjectDataHandler::clear( }); } +ObjectDataHandler::clone_ret +ObjectDataHandler::copy_on_write( + context_t ctx) +{ + return with_object_data( + ctx, + [ctx, this](auto &object_data) -> clone_iertr::future<> { + auto mapping = co_await ctx.tm.get_pin( + ctx.t, object_data.get_reserved_data_base() + ).handle_error_interruptible( + clone_iertr::pass_further{}, + crimson::ct_error::assert_all{"unexpected enoent"} + ); + object_data_t d_object_data = get_null_object_data(); + co_await do_clone(ctx, object_data, d_object_data, mapping, false); + auto old_base = object_data.get_reserved_data_base(); + auto old_len = object_data.get_reserved_data_len(); + object_data.update_reserved( + d_object_data.get_reserved_data_base(), + d_object_data.get_reserved_data_len()); + ctx.onode.unset_need_cow(ctx.t); + co_await ctx.tm.remove_mappings_in_range( + ctx.t, old_base, old_len, std::move(mapping), {false, true} + ).handle_error_interruptible( + clone_iertr::pass_further{}, + crimson::ct_error::assert_all{"unexpected enoent"} + ).discard_result(); + }); +} + ObjectDataHandler::clone_ret ObjectDataHandler::do_clone( context_t ctx, diff --git a/src/crimson/os/seastore/object_data_handler.h b/src/crimson/os/seastore/object_data_handler.h index 0438acff4c01c..204a72e11e96a 100644 --- a/src/crimson/os/seastore/object_data_handler.h +++ b/src/crimson/os/seastore/object_data_handler.h @@ -331,6 +331,8 @@ public: using clone_ret = clone_iertr::future<>; clone_ret clone(context_t ctx); + clone_ret copy_on_write(context_t ctx); + private: /// Updates region [_offset, _offset + bl.length) to bl write_ret overwrite( diff --git a/src/crimson/os/seastore/seastore.cc b/src/crimson/os/seastore/seastore.cc index a46010d2c89c1..a5963800b2a1b 100644 --- a/src/crimson/os/seastore/seastore.cc +++ b/src/crimson/os/seastore/seastore.cc @@ -1891,12 +1891,18 @@ SeaStore::Shard::_remove( ObjectDataHandler(max_object_size), [&onode, this, &ctx](auto &objhandler) { - return objhandler.clear( - ObjectDataHandler::context_t{ - *transaction_manager, - *ctx.transaction, - *onode, - }); + auto fut = ObjectDataHandler::clone_iertr::now(); + auto objctx = ObjectDataHandler::context_t{ + *transaction_manager, + *ctx.transaction, + *onode, + }; + if (onode->need_cow()) { + fut = objhandler.copy_on_write(objctx); + } + return fut.si_then([&objhandler, objctx] { + return objhandler.clear(objctx); + }); }); }).si_then([this, &ctx, &onode] { return onode_manager->erase_onode(*ctx.transaction, onode); @@ -1934,14 +1940,18 @@ SeaStore::Shard::_write( std::move(_bl), ObjectDataHandler(max_object_size), [=, this, &ctx, &onode](auto &bl, auto &objhandler) { - return objhandler.write( - ObjectDataHandler::context_t{ - *transaction_manager, - *ctx.transaction, - onode, - }, - offset, - bl); + auto fut = ObjectDataHandler::clone_iertr::now(); + auto objctx = ObjectDataHandler::context_t{ + *transaction_manager, + *ctx.transaction, + onode, + }; + if (onode.need_cow()) { + fut = objhandler.copy_on_write(objctx); + } + return fut.si_then([&objhandler, objctx, offset, &bl] { + return objhandler.write(objctx, offset, bl); + }); }); } @@ -2023,14 +2033,18 @@ SeaStore::Shard::_zero( return seastar::do_with( ObjectDataHandler(max_object_size), [=, this, &ctx, &onode](auto &objhandler) { - return objhandler.zero( - ObjectDataHandler::context_t{ - *transaction_manager, - *ctx.transaction, - onode, - }, - offset, - len); + auto fut = ObjectDataHandler::clone_iertr::now(); + auto objctx = ObjectDataHandler::context_t{ + *transaction_manager, + *ctx.transaction, + onode, + }; + if (onode.need_cow()) { + fut = objhandler.copy_on_write(objctx); + } + return fut.si_then([&objhandler, objctx, offset, len] { + return objhandler.zero(objctx, offset, len); + }); }); } @@ -2079,13 +2093,18 @@ SeaStore::Shard::_truncate( return seastar::do_with( ObjectDataHandler(max_object_size), [=, this, &ctx, &onode](auto &objhandler) { - return objhandler.truncate( - ObjectDataHandler::context_t{ - *transaction_manager, - *ctx.transaction, - onode - }, - size); + auto fut = ObjectDataHandler::clone_iertr::now(); + auto objctx = ObjectDataHandler::context_t{ + *transaction_manager, + *ctx.transaction, + onode, + }; + if (onode.need_cow()) { + fut = objhandler.copy_on_write(objctx); + } + return fut.si_then([&objhandler, objctx, size] { + return objhandler.truncate(objctx, size); + }); }); } -- 2.39.5