From ae84c12df31096bab8323ba521ba00f829adff23 Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Tue, 4 Oct 2022 20:55:52 +0000 Subject: [PATCH] crimson/osd: add tmap support rbdv1 headers use tmap, it's somewhat easier to add support to crimson than to modify those tests to avoid rbdv1 on crimson. Fixes: https://tracker.ceph.com/issues/57759 Signed-off-by: Samuel Just --- src/crimson/CMakeLists.txt | 1 + src/crimson/common/tmap_helpers.cc | 131 +++++++++++++++++++++++ src/crimson/common/tmap_helpers.h | 40 +++++++ src/crimson/osd/ops_executer.cc | 17 ++- src/crimson/osd/pg_backend.cc | 165 ++++++++++++++++++++++++++--- src/crimson/osd/pg_backend.h | 33 ++++++ 6 files changed, 366 insertions(+), 21 deletions(-) create mode 100644 src/crimson/common/tmap_helpers.cc create mode 100644 src/crimson/common/tmap_helpers.h diff --git a/src/crimson/CMakeLists.txt b/src/crimson/CMakeLists.txt index c1dc34fc0b2..44c2ee93065 100644 --- a/src/crimson/CMakeLists.txt +++ b/src/crimson/CMakeLists.txt @@ -22,6 +22,7 @@ set(crimson_common_srcs common/logclient.cc common/operation.cc common/throttle.cc + common/tmap_helpers.cc common/tri_mutex.cc) # the specialized version of ceph-common, where diff --git a/src/crimson/common/tmap_helpers.cc b/src/crimson/common/tmap_helpers.cc new file mode 100644 index 00000000000..9c14ebc450e --- /dev/null +++ b/src/crimson/common/tmap_helpers.cc @@ -0,0 +1,131 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "crimson/common/tmap_helpers.h" + +#include "include/buffer.h" +#include "include/encoding.h" +#include "include/rados.h" + +namespace detail { + +#define decode_or_return(v, bp) \ + try { \ + ::decode(v, bp); \ + } catch (...) { \ + return -EINVAL; \ + } + +class TMapContents { + std::map keys; + bufferlist header; +public: + TMapContents() = default; + + int decode(bufferlist::const_iterator &bliter) { + keys.clear(); + header.clear(); + if (bliter.end()) { + return 0; + } + decode_or_return(header, bliter); + __u32 num_keys; + decode_or_return(num_keys, bliter); + for (; num_keys > 0; --num_keys) { + std::string key; + decode_or_return(key, bliter); + decode_or_return(keys[key], bliter); + } + return 0; + } + + bufferlist encode() { + bufferlist bl; + ::encode(header, bl); + ::encode(static_cast<__u32>(keys.size()), bl); + for (auto &[k, v]: keys) { + ::encode(k, bl); + ::encode(v, bl); + } + return bl; + } + + int update(bufferlist::const_iterator in) { + while (!in.end()) { + __u8 op; + decode_or_return(op, in); + + if (op == CEPH_OSD_TMAP_HDR) { + decode_or_return(header, in); + continue; + } + + std::string key; + decode_or_return(key, in); + + switch (op) { + case CEPH_OSD_TMAP_SET: { + decode_or_return(keys[key], in); + break; + } + case CEPH_OSD_TMAP_CREATE: { + if (keys.contains(key)) { + return -EEXIST; + } + decode_or_return(keys[key], in); + break; + } + case CEPH_OSD_TMAP_RM: { + auto kiter = keys.find(key); + if (kiter == keys.end()) { + return -ENOENT; + } + keys.erase(kiter); + break; + } + case CEPH_OSD_TMAP_RMSLOPPY: { + keys.erase(key); + break; + } + } + } + return 0; + } + + int put(bufferlist::const_iterator in) { + return 0; + } +}; + +} + +namespace crimson::common { + +using do_tmap_up_ret = tl::expected; +do_tmap_up_ret do_tmap_up(bufferlist::const_iterator in, bufferlist contents) +{ + detail::TMapContents tmap; + auto bliter = contents.cbegin(); + int r = tmap.decode(bliter); + if (r < 0) { + return tl::unexpected(r); + } + r = tmap.update(in); + if (r < 0) { + return tl::unexpected(r); + } + return tmap.encode(); +} + +using do_tmap_up_ret = tl::expected; +do_tmap_up_ret do_tmap_put(bufferlist::const_iterator in) +{ + detail::TMapContents tmap; + int r = tmap.decode(in); + if (r < 0) { + return tl::unexpected(r); + } + return tmap.encode(); +} + +} diff --git a/src/crimson/common/tmap_helpers.h b/src/crimson/common/tmap_helpers.h new file mode 100644 index 00000000000..446dbea2a0f --- /dev/null +++ b/src/crimson/common/tmap_helpers.h @@ -0,0 +1,40 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include "include/expected.hpp" + +#include "include/buffer.h" +#include "include/encoding.h" + +namespace crimson::common { + +/** + * do_tmap_up + * + * Performs tmap update instructions encoded in buffer referenced by in. + * + * @param [in] in iterator to buffer containing encoded tmap update operations + * @param [in] contents current contents of object + * @return buffer containing new object contents, + * -EINVAL for decoding errors, + * -EEXIST for CEPH_OSD_TMAP_CREATE on a key that exists + * -ENOENT for CEPH_OSD_TMAP_RM on a key that does not exist + */ +using do_tmap_up_ret = tl::expected; +do_tmap_up_ret do_tmap_up(bufferlist::const_iterator in, bufferlist contents); + +/** + * do_tmap_put + * + * Validates passed buffer pointed to by in and returns resulting object buffer. + * + * @param [in] in iterator to buffer containing tmap encoding + * @return buffer containing validated tmap encoded by in + * -EINVAL for decoding errors, + */ +using do_tmap_up_ret = tl::expected; +do_tmap_up_ret do_tmap_put(bufferlist::const_iterator in); + +} diff --git a/src/crimson/osd/ops_executer.cc b/src/crimson/osd/ops_executer.cc index fdf9d3837d8..1685cdf8a29 100644 --- a/src/crimson/osd/ops_executer.cc +++ b/src/crimson/osd/ops_executer.cc @@ -609,12 +609,19 @@ OpsExecuter::do_execute_op(OSDOp& osd_op) return do_const_op([this, &osd_op] (/* const */auto& backend, const auto& os) { return backend.stat(os, osd_op, delta_stats); }); + + case CEPH_OSD_OP_TMAPPUT: + return do_write_op([this, &osd_op](auto& backend, auto& os, auto& txn) { + return backend.tmapput(os, osd_op, txn, delta_stats, *osd_op_params); + }); case CEPH_OSD_OP_TMAPUP: - // TODO: there was an effort to kill TMAP in ceph-osd. According to - // @dzafman this isn't possible yet. Maybe it could be accomplished - // before crimson's readiness and we'd luckily don't need to carry. - logger().info("crimson explicitly does not support CEPH_OSD_OP_TMAPUP"); - return dont_do_legacy_op(); + return do_write_op([this, &osd_op](auto& backend, auto& os, auto &txn) { + return backend.tmapup(os, osd_op, txn, delta_stats, *osd_op_params); + }); + case CEPH_OSD_OP_TMAPGET: + return do_read_op([this, &osd_op](auto& backend, const auto& os) { + return backend.tmapget(os, osd_op, delta_stats); + }); // OMAP case CEPH_OSD_OP_OMAPGETKEYS: diff --git a/src/crimson/osd/pg_backend.cc b/src/crimson/osd/pg_backend.cc index bdadabb8e19..8e02d3591c0 100644 --- a/src/crimson/osd/pg_backend.cc +++ b/src/crimson/osd/pg_backend.cc @@ -18,6 +18,7 @@ #include "common/Clock.h" #include "crimson/common/exception.h" +#include "crimson/common/tmap_helpers.h" #include "crimson/os/futurized_collection.h" #include "crimson/os/futurized_store.h" #include "crimson/osd/osd_operation.h" @@ -484,6 +485,39 @@ PGBackend::stat( return stat_errorator::now(); } +PGBackend::write_iertr::future<> PGBackend::_writefull( + ObjectState& os, + off_t truncate_size, + const bufferlist& bl, + ceph::os::Transaction& txn, + osd_op_params_t& osd_op_params, + object_stat_sum_t& delta_stats, + unsigned flags) +{ + const bool existing = maybe_create_new_object(os, txn, delta_stats); + if (existing && bl.length() < os.oi.size) { + + txn.truncate(coll->get_cid(), ghobject_t{os.oi.soid}, bl.length()); + truncate_update_size_and_usage(delta_stats, os.oi, truncate_size); + + osd_op_params.clean_regions.mark_data_region_dirty( + bl.length(), + os.oi.size - bl.length()); + } + if (bl.length()) { + txn.write( + coll->get_cid(), ghobject_t{os.oi.soid}, 0, bl.length(), + bl, flags); + update_size_and_usage( + delta_stats, os.oi, 0, + bl.length(), true); + osd_op_params.clean_regions.mark_data_region_dirty( + 0, + std::max((uint64_t)bl.length(), os.oi.size)); + } + return seastar::now(); +} + bool PGBackend::maybe_create_new_object( ObjectState& os, ceph::os::Transaction& txn, @@ -682,22 +716,14 @@ PGBackend::write_iertr::future<> PGBackend::writefull( return crimson::ct_error::file_too_large::make(); } - const bool existing = maybe_create_new_object(os, txn, delta_stats); - if (existing && op.extent.length < os.oi.size) { - txn.truncate(coll->get_cid(), ghobject_t{os.oi.soid}, op.extent.length); - truncate_update_size_and_usage(delta_stats, os.oi, op.extent.truncate_size); - osd_op_params.clean_regions.mark_data_region_dirty(op.extent.length, - os.oi.size - op.extent.length); - } - if (op.extent.length) { - txn.write(coll->get_cid(), ghobject_t{os.oi.soid}, 0, op.extent.length, - osd_op.indata, op.flags); - update_size_and_usage(delta_stats, os.oi, 0, - op.extent.length, true); - osd_op_params.clean_regions.mark_data_region_dirty(0, - std::max((uint64_t) op.extent.length, os.oi.size)); - } - return seastar::now(); + return _writefull( + os, + op.extent.truncate_size, + osd_op.indata, + txn, + osd_op_params, + delta_stats, + op.flags); } PGBackend::append_ierrorator::future<> PGBackend::append( @@ -1603,6 +1629,113 @@ PGBackend::fiemap( return store->fiemap(c, oid, off, len); } +PGBackend::write_iertr::future<> PGBackend::tmapput( + ObjectState& os, + const OSDOp& osd_op, + ceph::os::Transaction& txn, + object_stat_sum_t& delta_stats, + osd_op_params_t& osd_op_params) +{ + logger().debug("PGBackend::tmapput: {}", os.oi.soid); + auto ret = crimson::common::do_tmap_put(osd_op.indata.cbegin()); + if (!ret.has_value()) { + logger().debug("PGBackend::tmapup: {}, ret={}", os.oi.soid, ret.error()); + ceph_assert(ret.error() == -EINVAL); + return crimson::ct_error::invarg::make(); + } else { + auto bl = std::move(ret.value()); + return _writefull( + os, + bl.length(), + std::move(bl), + txn, + osd_op_params, + delta_stats, + 0); + } +} + +PGBackend::tmapup_iertr::future<> PGBackend::tmapup( + ObjectState& os, + const OSDOp& osd_op, + ceph::os::Transaction& txn, + object_stat_sum_t& delta_stats, + osd_op_params_t& osd_op_params) +{ + logger().debug("PGBackend::tmapup: {}", os.oi.soid); + return PGBackend::write_iertr::now( + ).si_then([this, &os] { + return _read(os.oi.soid, 0, os.oi.size, 0); + }).handle_error_interruptible( + crimson::ct_error::enoent::handle([](auto &) { + return seastar::make_ready_future(); + }), + PGBackend::write_iertr::pass_further{}, + crimson::ct_error::assert_all{"read error in mutate_object_contents"} + ).si_then([this, &os, &osd_op, &txn, + &delta_stats, &osd_op_params] + (auto &&bl) mutable -> PGBackend::tmapup_iertr::future<> { + auto result = crimson::common::do_tmap_up( + osd_op.indata.cbegin(), + std::move(bl)); + if (!result.has_value()) { + int ret = result.error(); + logger().debug("PGBackend::tmapup: {}, ret={}", os.oi.soid, ret); + switch (ret) { + case -EEXIST: + return crimson::ct_error::eexist::make(); + case -ENOENT: + return crimson::ct_error::enoent::make(); + case -EINVAL: + return crimson::ct_error::invarg::make(); + default: + ceph_assert(0 == "impossible error"); + return crimson::ct_error::invarg::make(); + } + } + + logger().debug( + "PGBackend::tmapup: {}, result.value.length()={}, ret=0", + os.oi.soid, result.value().length()); + return _writefull( + os, + result.value().length(), + result.value(), + txn, + osd_op_params, + delta_stats, + 0); + }); +} + +PGBackend::read_ierrorator::future<> PGBackend::tmapget( + const ObjectState& os, + OSDOp& osd_op, + object_stat_sum_t& delta_stats) +{ + logger().debug("PGBackend::tmapget: {}", os.oi.soid); + const auto& oi = os.oi; + logger().debug("PGBackend::tmapget: read {} 0~{}", oi.soid, oi.size); + if (!os.exists || os.oi.is_whiteout()) { + logger().debug("PGBackend::tmapget: {} DNE", os.oi.soid); + return crimson::ct_error::enoent::make(); + } + + return _read(oi.soid, 0, oi.size, 0).safe_then_interruptible_tuple( + [&delta_stats, &osd_op](auto&& bl) -> read_errorator::future<> { + logger().debug("PGBackend::tmapget: data length: {}", bl.length()); + osd_op.op.extent.length = bl.length(); + osd_op.rval = 0; + delta_stats.num_rd++; + delta_stats.num_rd_kb += shift_round_up(bl.length(), 10); + osd_op.outdata = std::move(bl); + return read_errorator::now(); + }, crimson::ct_error::input_output_error::handle([] { + return read_errorator::future<>{crimson::ct_error::object_corrupted::make()}; + }), + read_errorator::pass_further{}); +} + void PGBackend::on_activate_complete() { peering.reset(); } diff --git a/src/crimson/osd/pg_backend.h b/src/crimson/osd/pg_backend.h index 7f38cb03873..66b4add53c5 100644 --- a/src/crimson/osd/pg_backend.h +++ b/src/crimson/osd/pg_backend.h @@ -274,6 +274,31 @@ public: uint64_t off, uint64_t len); + write_iertr::future<> tmapput( + ObjectState& os, + const OSDOp& osd_op, + ceph::os::Transaction& trans, + object_stat_sum_t& delta_stats, + osd_op_params_t& osd_op_params); + + using tmapup_ertr = write_ertr::extend< + crimson::ct_error::enoent, + crimson::ct_error::eexist>; + using tmapup_iertr = ::crimson::interruptible::interruptible_errorator< + ::crimson::osd::IOInterruptCondition, + tmapup_ertr>; + tmapup_iertr::future<> tmapup( + ObjectState& os, + const OSDOp& osd_op, + ceph::os::Transaction& trans, + object_stat_sum_t& delta_stats, + osd_op_params_t& osd_op_params); + + read_ierrorator::future<> tmapget( + const ObjectState& os, + OSDOp& osd_op, + object_stat_sum_t& delta_stats); + // OMAP ll_read_ierrorator::future<> omap_get_keys( const ObjectState& os, @@ -372,6 +397,14 @@ private: size_t offset, size_t length, uint32_t flags) = 0; + write_iertr::future<> _writefull( + ObjectState& os, + off_t truncate_size, + const bufferlist& bl, + ceph::os::Transaction& txn, + osd_op_params_t& osd_op_params, + object_stat_sum_t& delta_stats, + unsigned flags); bool maybe_create_new_object(ObjectState& os, ceph::os::Transaction& txn, -- 2.39.5