ObjectDataHandler::copy_on_write(
context_t ctx)
{
- return with_object_data(
+ return with_objects_data(
ctx,
- [ctx, this](auto &object_data) -> clone_ret {
+ [ctx, this](auto &object_data, auto &d_object_data) -> clone_ret
+ {
auto mapping = co_await ctx.tm.get_pin(
ctx.t, object_data.get_reserved_data_base()
).handle_error_interruptible(
clone_iertr::pass_further{},
crimson::ct_error::assert_all{"unexpected enoent"}
);
- object_data_t d_object_data = get_null_object_data();
co_await do_clone(ctx, object_data, d_object_data, mapping, false);
auto old_base = object_data.get_reserved_data_base();
auto old_len = object_data.get_reserved_data_len();
- object_data.update_reserved(
- d_object_data.get_reserved_data_base(),
- d_object_data.get_reserved_data_len());
- ctx.onode.unset_need_cow(ctx.t);
+ assert(ctx.d_onode->need_cow());
+ ctx.d_onode->unset_need_cow(ctx.t);
co_await ctx.tm.remove_mappings_in_range(
ctx.t, old_base, old_len, std::move(mapping), {false, true}
).handle_error_interruptible(
clone_iertr::pass_further{},
crimson::ct_error::assert_all{"unexpected enoent"}
).discard_result();
+
+ auto old_md_start = old_base.with_metadata().with_offset_by_blocks(0);
+ auto md_mapping = co_await ctx.tm.lower_bound_pin(ctx.t, old_md_start);
+ if (md_mapping.is_end() ||
+ md_mapping.get_key().get_clone_prefix() !=
+ old_md_start.get_clone_prefix()) {
+ co_return;
+ }
+ auto new_prefix = d_object_data
+ .get_reserved_data_base()
+ .get_clone_prefix()
+ .with_metadata();
+ auto md_dst_mapping = co_await ctx.tm.lower_bound_pin(ctx.t, new_prefix);
+ co_await ctx.tm.move_region(ctx.t, md_mapping, md_dst_mapping, new_prefix);
});
}
auto mapping = co_await prepare_data_reservation(
ctx, *ctx.d_onode, d_object_data, old_len);
ceph_assert(mapping.has_value());
+ assert(old_base.get_object_prefix() == mapping->get_key().get_object_prefix());
+ assert(old_base.get_clone_prefix() != mapping->get_key().get_clone_prefix());
DEBUGT("new obj reserve_data_base: {}, len 0x{:x}",
ctx.t,
d_object_data.get_reserved_data_base(),
namespace crimson::os::seastore::onode {
+struct FakeOnode final : Onode {
+ FakeOnode(const hobject_t &hobj, onode_layout_t layout)
+ : Onode(hobj), layout(layout) {}
+
+ onode_layout_t layout{};
+
+ laddr_hint_t init_hint(extent_len_t block_size, bool is_metadata) const final {
+ ceph_abort("impossible");
+ return LADDR_HINT_NULL;
+ }
+ laddr_hint_t generate_clone_hint(
+ local_object_id_t object_id,
+ extent_len_t block_size,
+ bool is_metadata) const final {
+ ceph_abort("impossible");
+ return LADDR_HINT_NULL;
+ }
+
+ bool is_alive() const final { return true; }
+ const onode_layout_t &get_layout() const final {
+ return layout;
+ }
+ void update_onode_size(Transaction &, uint32_t) final {
+ ceph_abort("impossible");
+ }
+ void update_omap_root(Transaction &, omap_root_t &root) final {
+ ceph_abort("impossible");
+ }
+ void update_log_root(Transaction &, omap_root_t &root) final {
+ ceph_abort("impossible");
+ }
+ void update_xattr_root(Transaction &, omap_root_t &root) final {
+ ceph_abort("impossible");
+ }
+ void update_object_data(Transaction &, object_data_t &data) final {
+ ceph_abort("impossible");
+ }
+ void update_object_info(Transaction &, ceph::bufferlist &) final {
+ ceph_abort("impossible");
+ }
+ void update_snapset(Transaction &, ceph::bufferlist &) final {
+ ceph_abort("impossible");
+ }
+ void clear_object_info(Transaction &) final { ceph_abort("impossible"); }
+ void clear_snapset(Transaction &) final { ceph_abort("impossible"); }
+ void set_need_cow(Transaction &) final {}
+ void unset_need_cow(Transaction &) final {}
+ void swap_layout(Transaction &, Onode &o) final { ceph_abort("impossible"); }
+ boost::intrusive_ptr<Onode> offload_data_and_md(Transaction &t) final {
+ ceph_abort("impossible");
+ return nullptr;
+ }
+};
+
struct FLTreeOnode final : Onode, Value {
static constexpr tree_conf_t TREE_CONF = {
value_magic_t::ONODE,
_swap_layout(t, static_cast<FLTreeOnode&>(onode));
}
+ boost::intrusive_ptr<Onode> offload_data_and_md(Transaction & t) final {
+ assert(status != status_t::DELETED);
+ auto fake_onode = new FakeOnode(hobj, get_layout());
+ object_data_t data{L_ADDR_NULL, 0};
+ update_object_data(t, data);
+ omap_root_t root;
+ root.type = omap_type_t::OMAP;
+ update_omap_root(t, root);
+ root.type = omap_type_t::XATTR;
+ update_xattr_root(t, root);
+ root.type = omap_type_t::LOG;
+ update_log_root(t, root);
+ return fake_onode;
+ }
+
void _swap_layout(Transaction &t, FLTreeOnode &other) {
assert(status != status_t::DELETED);
assert(other.status != status_t::DELETED);
);
}
+namespace {
+void rename_onode_omap_metadata(
+ Transaction &t, Onode &src, Onode &dst)
+{
+ auto src_prefix = *src.get_clone_prefix();
+ auto dst_prefix = *dst.get_clone_prefix();
+
+ auto rename_root = [&src, &dst, src_prefix, dst_prefix](omap_type_t type) {
+ auto root = src.get_root(type).get(dst.get_metadata_hint());
+ if (root.is_null()) {
+ return root;
+ }
+ auto offset = root.addr.get_byte_distance<loffset_t>(src_prefix);
+ root.update(
+ (dst_prefix + offset).checked_to_laddr(),
+ root.depth, dst.get_metadata_hint(), type);
+ return root;
+ };
+
+ auto omap_root = rename_root(omap_type_t::OMAP);
+ auto xattr_root = rename_root(omap_type_t::XATTR);
+ auto log_root = rename_root(omap_type_t::LOG);
+
+ dst.update_omap_root(t, omap_root);
+ dst.update_xattr_root(t, xattr_root);
+ dst.update_log_root(t, log_root);
+}
+}
+
SeaStore::Shard::tm_ret
SeaStore::Shard::_rename(
internal_context_t &ctx,
return objHanlder.rename(ObjectDataHandler::context_t{
*transaction_manager, *ctx.transaction, *onode, d_onode.get()
}).si_then([&ctx, &onode, &d_onode] {
- auto get_prefix = [](Onode &onode) {
- auto p = onode.get_clone_prefix();
- assert(p);
- return *p;
- };
- auto src_prefix = get_prefix(*onode);
- auto dst_prefix = get_prefix(*d_onode);
-
- auto rename_omap_root = [&](omap_type_t type) {
- auto root = onode->get_root(type).get(d_onode->get_metadata_hint());
- if (root.is_null()) {
- return root;
- }
- auto offset = root.addr.get_byte_distance<loffset_t>(src_prefix);
- root.update(
- (dst_prefix + offset).checked_to_laddr(),
- root.depth, d_onode->get_metadata_hint(), type);
- return root;
- };
-
auto olayout = onode->get_layout();
uint32_t size = olayout.size;
- auto omap_root = rename_omap_root(omap_type_t::OMAP);
- auto xattr_root = rename_omap_root(omap_type_t::XATTR);
- auto log_root = rename_omap_root(omap_type_t::LOG);
auto oi_bl = ceph::bufferlist::static_from_mem(
&olayout.oi[0],
(uint32_t)olayout.oi_size);
(uint32_t)olayout.ss_size);
d_onode->update_onode_size(*ctx.transaction, size);
- d_onode->update_omap_root(*ctx.transaction, omap_root);
- d_onode->update_xattr_root(*ctx.transaction, xattr_root);
- d_onode->update_log_root(*ctx.transaction, log_root);
d_onode->update_object_info(*ctx.transaction, oi_bl);
d_onode->update_snapset(*ctx.transaction, ss_bl);
+ rename_onode_omap_metadata(*ctx.transaction, *onode, *d_onode);
});
}).si_then([this, &ctx, &onode] {
return onode_manager->erase_onode(
ObjectDataHandler(max_object_size),
[&onode, this, &ctx](auto &objhandler)
{
- auto fut = ObjectDataHandler::clone_iertr::now();
- auto objctx = ObjectDataHandler::context_t{
- *transaction_manager,
- *ctx.transaction,
- *onode,
- };
- if (onode->need_cow()) {
- fut = objhandler.copy_on_write(objctx);
- }
- return fut.si_then([&objhandler, objctx] {
- return objhandler.clear(objctx);
+ return _maybe_copy_on_write(ctx, *onode, objhandler
+ ).si_then([&onode, this, &ctx, &objhandler] {
+ return objhandler.clear(
+ ObjectDataHandler::context_t{
+ *transaction_manager,
+ *ctx.transaction,
+ *onode,
+ });
});
});
}).si_then([this, &ctx, &onode] {
return seastar::do_with(
std::move(_bl),
ObjectDataHandler(max_object_size),
- [=, this, &ctx, &onode](auto &bl, auto &objhandler) {
- auto fut = ObjectDataHandler::clone_iertr::now();
- auto objctx = ObjectDataHandler::context_t{
+ [=, this, &ctx, &onode](auto &bl, auto &objhandler)
+ {
+ return _maybe_copy_on_write(ctx, onode, objhandler
+ ).si_then([&ctx, &onode, &objhandler, offset, &bl, this] {
+ return objhandler.write(
+ ObjectDataHandler::context_t{
*transaction_manager,
*ctx.transaction,
onode,
- };
- if (onode.need_cow()) {
- fut = objhandler.copy_on_write(objctx);
- }
- return fut.si_then([&objhandler, objctx, offset, &bl] {
- return objhandler.write(objctx, offset, bl);
- });
+ },
+ offset,
+ bl);
});
+ });
}
SeaStore::Shard::tm_ret
* the case where the *source* is not further mutated, so here we
* reverse the two onodes so that HEAD will be the target.
*/
+ auto id = onode.get_layout()
+ .object_data
+ .get()
+ .get_reserved_data_base()
+ .get_local_object_id();
onode.swap_layout(*ctx.transaction, d_onode);
+ onode.set_sibling_object_id(id);
return objHandler.clone(
ObjectDataHandler::context_t{
*transaction_manager,
});
}
+SeaStore::Shard::tm_ret
+SeaStore::Shard::_maybe_copy_on_write(
+ internal_context_t &ctx,
+ Onode &onode,
+ ObjectDataHandler &handler)
+{
+ if (!onode.need_cow()) {
+ co_return;
+ }
+ auto fake_onode = onode.offload_data_and_md(*ctx.transaction);
+ onode.set_sibling_object_id(fake_onode->get_clone_prefix()->get_local_object_id());
+ co_await handler.copy_on_write(
+ ObjectDataHandler::context_t{
+ *transaction_manager,
+ *ctx.transaction,
+ *fake_onode,
+ &onode
+ });
+ rename_onode_omap_metadata(*ctx.transaction, *fake_onode, onode);
+}
+
SeaStore::Shard::tm_ret
SeaStore::Shard::_clone_range(
internal_context_t &ctx,
std::max<uint64_t>(offset + len, object_size));
return seastar::do_with(
ObjectDataHandler(max_object_size),
- [=, this, &ctx, &onode](auto &objhandler) {
- auto fut = ObjectDataHandler::clone_iertr::now();
- auto objctx = ObjectDataHandler::context_t{
- *transaction_manager,
- *ctx.transaction,
- onode,
- };
- if (onode.need_cow()) {
- fut = objhandler.copy_on_write(objctx);
- }
- return fut.si_then([&objhandler, objctx, offset, len] {
- return objhandler.zero(objctx, offset, len);
+ [=, this, &ctx, &onode](auto &objhandler)
+ {
+ return _maybe_copy_on_write(ctx, onode, objhandler
+ ).si_then([this, &ctx, &onode, &objhandler, offset, len] {
+ return objhandler.zero(
+ ObjectDataHandler::context_t{
+ *transaction_manager,
+ *ctx.transaction,
+ onode,
+ },
+ offset,
+ len);
});
});
}
onode.update_onode_size(*ctx.transaction, size);
return seastar::do_with(
ObjectDataHandler(max_object_size),
- [=, this, &ctx, &onode](auto &objhandler) {
- auto fut = ObjectDataHandler::clone_iertr::now();
- auto objctx = ObjectDataHandler::context_t{
- *transaction_manager,
- *ctx.transaction,
- onode,
- };
- if (onode.need_cow()) {
- fut = objhandler.copy_on_write(objctx);
- }
- return fut.si_then([&objhandler, objctx, size] {
- return objhandler.truncate(objctx, size);
+ [=, this, &ctx, &onode](auto &objhandler)
+ {
+ return _maybe_copy_on_write(ctx, onode, objhandler
+ ).si_then([this, &ctx, &onode, &objhandler, size] {
+ return objhandler.truncate(
+ ObjectDataHandler::context_t{
+ *transaction_manager,
+ *ctx.transaction,
+ onode,
+ },
+ size);
});
});
}