]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/os/seastore: do "copy_on_write" if the to-be-modified object
authorXuehan Xu <xuxuehan@qianxin.com>
Wed, 13 Aug 2025 03:49:23 +0000 (11:49 +0800)
committerXuehan Xu <xuxuehan@qianxin.com>
Sun, 28 Sep 2025 06:48:59 +0000 (14:48 +0800)
needs it

At present, only clone objects may need COW, as HEAD objects won't be
sharing any direct lba mapping with other objects in non-recovery
scenarios.

Although the HEAD object may share its direct mappings with the temp
object that's going to be recovered and replace it, it won't be
accepting any modifications at that time.

Signed-off-by: Xuehan Xu <xuxuehan@qianxin.com>
src/crimson/os/seastore/object_data_handler.cc
src/crimson/os/seastore/object_data_handler.h
src/crimson/os/seastore/seastore.cc

index 4fb7fb98b72a21810a972b8083e4a3c74a65d084..e99a56bcbdb8dacfc1968004098fc3894c81089f 100644 (file)
@@ -1383,6 +1383,36 @@ ObjectDataHandler::clear_ret ObjectDataHandler::clear(
     });
 }
 
+ObjectDataHandler::clone_ret
+ObjectDataHandler::copy_on_write(
+  context_t ctx)
+{
+  return with_object_data(
+    ctx,
+    [ctx, this](auto &object_data) -> clone_iertr::future<> {
+    auto mapping = co_await ctx.tm.get_pin(
+      ctx.t, object_data.get_reserved_data_base()
+    ).handle_error_interruptible(
+      clone_iertr::pass_further{},
+      crimson::ct_error::assert_all{"unexpected enoent"}
+    );
+    object_data_t d_object_data = get_null_object_data();
+    co_await do_clone(ctx, object_data, d_object_data, mapping, false);
+    auto old_base = object_data.get_reserved_data_base();
+    auto old_len = object_data.get_reserved_data_len();
+    object_data.update_reserved(
+      d_object_data.get_reserved_data_base(),
+      d_object_data.get_reserved_data_len());
+    ctx.onode.unset_need_cow(ctx.t);
+    co_await ctx.tm.remove_mappings_in_range(
+      ctx.t, old_base, old_len, std::move(mapping), {false, true}
+    ).handle_error_interruptible(
+      clone_iertr::pass_further{},
+      crimson::ct_error::assert_all{"unexpected enoent"}
+    ).discard_result();
+  });
+}
+
 ObjectDataHandler::clone_ret
 ObjectDataHandler::do_clone(
   context_t ctx,
index 0438acff4c01c94896dc36839ddc06899130309c..204a72e11e96afdcc899a7fd8e2f8ffb13a506d4 100644 (file)
@@ -331,6 +331,8 @@ public:
   using clone_ret = clone_iertr::future<>;
   clone_ret clone(context_t ctx);
 
+  clone_ret copy_on_write(context_t ctx);
+
 private:
   /// Updates region [_offset, _offset + bl.length) to bl
   write_ret overwrite(
index a46010d2c89c196cd822a7dae970bf8754933b50..a5963800b2a1b704a92eae79df2d27a2e0c6092f 100644 (file)
@@ -1891,12 +1891,18 @@ SeaStore::Shard::_remove(
       ObjectDataHandler(max_object_size),
       [&onode, this, &ctx](auto &objhandler)
     {
-      return objhandler.clear(
-        ObjectDataHandler::context_t{
-          *transaction_manager,
-          *ctx.transaction,
-          *onode,
-        });
+      auto fut = ObjectDataHandler::clone_iertr::now();
+      auto objctx = ObjectDataHandler::context_t{
+         *transaction_manager,
+         *ctx.transaction,
+         *onode,
+       };
+      if (onode->need_cow()) {
+       fut = objhandler.copy_on_write(objctx);
+      }
+      return fut.si_then([&objhandler, objctx] {
+       return objhandler.clear(objctx);
+      });
     });
   }).si_then([this, &ctx, &onode] {
     return onode_manager->erase_onode(*ctx.transaction, onode);
@@ -1934,14 +1940,18 @@ SeaStore::Shard::_write(
     std::move(_bl),
     ObjectDataHandler(max_object_size),
     [=, this, &ctx, &onode](auto &bl, auto &objhandler) {
-      return objhandler.write(
-        ObjectDataHandler::context_t{
-          *transaction_manager,
-          *ctx.transaction,
-          onode,
-        },
-        offset,
-        bl);
+      auto fut = ObjectDataHandler::clone_iertr::now();
+      auto objctx = ObjectDataHandler::context_t{
+         *transaction_manager,
+         *ctx.transaction,
+         onode,
+       };
+      if (onode.need_cow()) {
+       fut = objhandler.copy_on_write(objctx);
+      }
+      return fut.si_then([&objhandler, objctx, offset, &bl] {
+       return objhandler.write(objctx, offset, bl);
+      });
     });
 }
 
@@ -2023,14 +2033,18 @@ SeaStore::Shard::_zero(
   return seastar::do_with(
     ObjectDataHandler(max_object_size),
     [=, this, &ctx, &onode](auto &objhandler) {
-      return objhandler.zero(
-        ObjectDataHandler::context_t{
-          *transaction_manager,
-          *ctx.transaction,
-          onode,
-        },
-        offset,
-        len);
+    auto fut = ObjectDataHandler::clone_iertr::now();
+    auto objctx = ObjectDataHandler::context_t{
+       *transaction_manager,
+       *ctx.transaction,
+       onode,
+      };
+    if (onode.need_cow()) {
+      fut = objhandler.copy_on_write(objctx);
+    }
+    return fut.si_then([&objhandler, objctx, offset, len] {
+      return objhandler.zero(objctx, offset, len);
+    });
   });
 }
 
@@ -2079,13 +2093,18 @@ SeaStore::Shard::_truncate(
   return seastar::do_with(
     ObjectDataHandler(max_object_size),
     [=, this, &ctx, &onode](auto &objhandler) {
-    return objhandler.truncate(
-      ObjectDataHandler::context_t{
-        *transaction_manager,
-        *ctx.transaction,
-        onode
-      },
-      size);
+    auto fut = ObjectDataHandler::clone_iertr::now();
+    auto objctx = ObjectDataHandler::context_t{
+       *transaction_manager,
+       *ctx.transaction,
+       onode,
+      };
+    if (onode.need_cow()) {
+      fut = objhandler.copy_on_write(objctx);
+    }
+    return fut.si_then([&objhandler, objctx, size] {
+      return objhandler.truncate(objctx, size);
+    });
   });
 }