bool is_zero_reserved() const {
return !is_indirect() && get_val().is_zero();
}
+ // true if the mapping corresponds to real data
bool is_real() const {
return !is_indirect() && !get_val().is_zero();
}
});
}
+ObjectDataHandler::clone_ret
+ObjectDataHandler::do_clone(
+ context_t ctx,
+ object_data_t &object_data,
+ object_data_t &d_object_data,
+ LBAMapping first_mapping,
+ bool updateref)
+{
+ LOG_PREFIX("ObjectDataHandler::do_clone");
+ assert(d_object_data.is_null());
+ auto old_base = object_data.get_reserved_data_base();
+ auto old_len = object_data.get_reserved_data_len();
+ auto mapping = co_await prepare_data_reservation(
+ ctx, d_object_data, old_len);
+ ceph_assert(mapping.has_value());
+ DEBUGT("new obj reserve_data_base: {}, len 0x{:x}",
+ ctx.t,
+ d_object_data.get_reserved_data_base(),
+ d_object_data.get_reserved_data_len());
+ auto pos = co_await ctx.tm.remove(ctx.t, std::move(*mapping)
+ ).handle_error_interruptible(
+ clone_iertr::pass_further{},
+ crimson::ct_error::assert_all{"unexpected enoent"}
+ );
+ auto base = d_object_data.get_reserved_data_base();
+ auto len = d_object_data.get_reserved_data_len();
+ auto cr_ret = co_await ctx.tm.clone_range(
+ ctx.t, old_base, base, 0, len, std::move(pos),
+ std::move(first_mapping), updateref);
+ if (cr_ret.shared_direct_mapping) {
+ ctx.onode.set_need_cow(ctx.t);
+ }
+}
+
ObjectDataHandler::clone_ret ObjectDataHandler::clone(
context_t ctx)
{
- // the whole clone procedure can be seperated into the following steps:
- // 1. let clone onode(d_object_data) take the head onode's
- // object data base;
- // 2. reserve a new region in lba tree for the head onode;
- // 3. clone all extents of the clone onode, see transaction_manager.h
- // for the details of clone_pin;
- // 4. reserve the space between the head onode's size and its reservation
- // length.
return with_objects_data(
ctx,
[ctx, this](auto &object_data, auto &d_object_data) {
}
return ctx.tm.get_pin(ctx.t, object_data.get_reserved_data_base()
).si_then([this, &object_data, &d_object_data, ctx](auto mapping) {
- auto old_base = object_data.get_reserved_data_base();
- auto old_len = object_data.get_reserved_data_len();
- return prepare_data_reservation(
- ctx,
- d_object_data,
- object_data.get_reserved_data_len()
- ).si_then([&object_data, &d_object_data, ctx](auto mapping) {
- assert(!object_data.is_null());
- assert(mapping);
- LOG_PREFIX(ObjectDataHandler::clone);
- DEBUGT("cloned obj reserve_data_base: {}, len 0x{:x}",
- ctx.t,
- d_object_data.get_reserved_data_base(),
- d_object_data.get_reserved_data_len());
- return ctx.tm.remove(ctx.t, std::move(*mapping));
- }).si_then([mapping, &d_object_data, ctx](auto pos) mutable {
- auto base = d_object_data.get_reserved_data_base();
- auto len = d_object_data.get_reserved_data_len();
- return ctx.tm.clone_range(
- ctx.t, base, len, std::move(pos), std::move(mapping), true);
- }).si_then([ctx, &object_data, &d_object_data, this] {
- object_data.clear();
- return prepare_data_reservation(
- ctx,
- object_data,
- d_object_data.get_reserved_data_len()
- ).si_then([ctx, &object_data](auto mapping) {
- LOG_PREFIX("ObjectDataHandler::clone");
- DEBUGT("head obj reserve_data_base: {}, len 0x{:x}",
- ctx.t,
- object_data.get_reserved_data_base(),
- object_data.get_reserved_data_len());
- return ctx.tm.remove(ctx.t, std::move(*mapping));
- });
- }).si_then([ctx, &object_data, mapping](auto pos) mutable {
- auto base = object_data.get_reserved_data_base();
- auto len = object_data.get_reserved_data_len();
- return ctx.tm.clone_range(
- ctx.t, base, len, std::move(pos), std::move(mapping), false);
- }).si_then([ctx, mapping, old_base, old_len] {
- return ctx.tm.remove_mappings_in_range(
- ctx.t, old_base, old_len, std::move(mapping), {false, true}
- ).discard_result();
- });
+ ceph_assert(ctx.d_onode);
+ return do_clone(ctx, object_data, d_object_data, std::move(mapping), true);
}).handle_error_interruptible(
clone_iertr::pass_further{},
crimson::ct_error::assert_all{"unexpected enoent"}
clear_ret clear(context_t ctx);
/// Clone data of an Onode
+ /// Note that the clone always assume that ctx.onode
+ /// is a snap onode, so, for OP_CLONE, the caller of
+ /// this method should swap the layout of the onode
+ /// and the dest_onode first.
using clone_iertr = base_iertr;
using clone_ret = clone_iertr::future<>;
clone_ret clone(context_t ctx);
std::optional<bufferlist> &&bl,
LBAMapping first_mapping);
+ /**
+ * do_clone
+ *
+ * Clone lba mappings from object_data to d_object_data.
+ * object_data must belong to ctx.onode, and d_object_data must belong to ctx.d_onode
+ * This implementation is asymmetric and optimizes for (but does not require) the case
+ * that source is not further mutated.
+ */
+ clone_ret do_clone(
+ context_t ctx,
+ object_data_t &object_data,
+ object_data_t &d_object_data,
+ LBAMapping first_mapping,
+ bool updateref);
+
/// Ensures object_data reserved region is prepared
write_iertr::future<std::optional<LBAMapping>>
prepare_data_reservation(
virtual const onode_layout_t &get_layout() const = 0;
virtual ~Onode() = default;
+ bool is_head() const {
+ return hobj.is_head();
+ }
+ bool is_snap() const {
+ return hobj.is_snap();
+ }
+ bool need_cow() const {
+ return get_layout().need_cow;
+ }
virtual void update_onode_size(Transaction&, uint32_t) = 0;
virtual void update_omap_root(Transaction&, omap_root_t&) = 0;
virtual void update_log_root(Transaction&, omap_root_t&) = 0;
{
auto &object_size = onode.get_layout().size;
d_onode.update_onode_size(*ctx.transaction, object_size);
- return objHandler.clone(
- ObjectDataHandler::context_t{
- *transaction_manager,
- *ctx.transaction,
- onode,
- &d_onode});
+ if (onode.is_head()) { // OP_CLONE
+ assert(onode.is_head());
+ assert(d_onode.is_snap());
+ /* The most common usage of OP_CLONE is during a write operation.
+ * The osd will submit a transaction cloning HEAD to clone and
+ * then mutating HEAD. ObjectDataHandler::do_clone optimizes for
+ * the case where the *source* is not further mutated, so here we
+ * reverse the two onodes so that HEAD will be the target.
+ */
+ onode.swap_layout(*ctx.transaction, d_onode);
+ return objHandler.clone(
+ ObjectDataHandler::context_t{
+ *transaction_manager,
+ *ctx.transaction,
+ d_onode,
+ &onode});
+ } else { // OP_ROLLBACK
+ assert(d_onode.is_head());
+ return objHandler.clone(
+ ObjectDataHandler::context_t{
+ *transaction_manager,
+ *ctx.transaction,
+ onode,
+ &d_onode});
+ }
}).si_then([&ctx, &onode, &d_onode, this] {
return omaptree_clone(
- *ctx.transaction, omap_type_t::XATTR, onode, d_onode);
+ *ctx.transaction,
+ omap_type_t::XATTR,
+ onode.is_head() ? d_onode : onode,
+ onode.is_head() ? onode : d_onode);
}).si_then([&ctx, &onode, &d_onode, this] {
return omaptree_clone(
- *ctx.transaction, omap_type_t::OMAP, onode, d_onode);
+ *ctx.transaction,
+ omap_type_t::OMAP,
+ onode.is_head() ? d_onode : onode,
+ onode.is_head() ? onode : d_onode);
}).si_then([&ctx, &onode, &d_onode, this] {
return omaptree_clone(
- *ctx.transaction, omap_type_t::LOG, onode, d_onode);
+ *ctx.transaction,
+ omap_type_t::LOG,
+ onode.is_head() ? d_onode : onode,
+ onode.is_head() ? onode : d_onode);
});
}
reserved_data_len = 0;
}
};
+constexpr object_data_t get_null_object_data() {
+ return object_data_t{L_ADDR_NULL, 0};
+}
struct __attribute__((packed)) object_data_le_t {
laddr_le_t reserved_data_base = laddr_le_t(L_ADDR_NULL);
});
}
+ // clone the mappings in range base~len, returns true if there exists
+ // direct mappings that are cloned.
using clone_iertr = base_iertr;
- using clone_ret = clone_iertr::future<>;
+ using clone_ret = clone_iertr::future<bool>;
clone_ret clone_range(
Transaction &t,
laddr_t base,
std::move(pos),
std::move(mapping),
(extent_len_t)0,
- [&t, this, updateref, base, len](auto &pos, auto &mapping, auto &offset) {
+ false,
+ [&t, this, updateref, base, len]
+ (auto &pos, auto &mapping, auto &offset, auto &ret) {
return trans_intr::repeat(
- [&t, this, &pos, &mapping, &offset, updateref, base, len]()
+ [&t, this, &pos, &mapping, &offset, updateref, base, len, &ret]()
-> clone_iertr::future<seastar::stop_iteration> {
if (offset >= len) {
return clone_iertr::make_ready_future<
crimson::ct_error::assert_all{"unexpected error"}
);
}
+ if (mapping.is_real()) {
+ ret = true;
+ }
return clone_pin(
t, std::move(pos), std::move(mapping),
(base + offset).checked_to_laddr(), updateref
return seastar::stop_iteration::no;
});
});
+ }).si_then([&ret] {
+ return ret;
});
});
}