From 55a6d3f95ca4a895d660d36cc60851115d2f9b03 Mon Sep 17 00:00:00 2001 From: Radoslaw Zarzynski Date: Tue, 23 Mar 2021 13:26:53 +0000 Subject: [PATCH] crimson/osd: introduce RollbackOrchestrator to OpsExecuter. If the execution of an `OSDOp` fails, we're left with potentially altered `ObjectContext`. We deal with that by reloading `obc` if there was any modification to it. To figure this out, `has_seen_write()` on `OpsExecuter` is being called. Unfortunately, the current impl. has following drawbacks: * `has_seen_write()` can be called after `std::move(ox).flush_...()` which is very inelegant; * it requires catching both `ObjectContext` and `OpsExecuter` while the latter already references the former; * there is no explicitly given reason in the header for justifying the presence of `has_seen_writes()`. Signed-off-by: Radoslaw Zarzynski --- src/crimson/osd/ops_executer.h | 46 ++++++++++++++++++++++++++++++++++ src/crimson/osd/pg.cc | 23 +++++++---------- 2 files changed, 55 insertions(+), 14 deletions(-) diff --git a/src/crimson/osd/ops_executer.h b/src/crimson/osd/ops_executer.h index 8a2f92cdcaa1f..a2927519ff83c 100644 --- a/src/crimson/osd/ops_executer.h +++ b/src/crimson/osd/ops_executer.h @@ -238,6 +238,18 @@ public: msg(std::in_place_type_t>{}, &msg) { } + template + struct RollbackHelper { + interruptible_future<> rollback_obc_if_modified(const std::error_code& e); + OpsExecuter& ox; + Func func; + }; + + template + RollbackHelper create_rollbacker(Func&& func) { + return {*this, std::forward(func)}; + } + interruptible_errorated_future execute_op(class OSDOp& osd_op); @@ -333,6 +345,40 @@ OpsExecuter::flush_changes_n_do_ops_effects(Ref pg, MutFunc&& mut_func) && } } +template +OpsExecuter::interruptible_future<> +OpsExecuter::RollbackHelper::rollback_obc_if_modified( + const std::error_code& e) +{ + // Oops, an operation had failed. do_osd_ops() altogether with + // OpsExecuter already dropped the ObjectStore::Transaction if + // there was any. However, this is not enough to completely + // rollback as we gave OpsExecuter the very single copy of `obc` + // we maintain and we did it for both reading and writing. + // Now all modifications must be reverted. + // + // Let's just reload from the store. Evicting from the shared + // LRU would be tricky as next MOSDOp (the one at `get_obc` + // phase) could actually already finished the lookup. Fortunately, + // this is supposed to live on cold paths, so performance is not + // a concern -- simplicity wins. + // + // The conditional's purpose is to efficiently handle hot errors + // which may appear as a result of e.g. CEPH_OSD_OP_CMPXATTR or + // CEPH_OSD_OP_OMAP_CMP. These are read-like ops and clients + // typically append them before any write. If OpsExecuter hasn't + // seen any modifying operation, `obc` is supposed to be kept + // unchanged. + const auto need_rollback = ox.has_seen_write(); + crimson::get_logger(ceph_subsys_osd).debug( + "{}: object {} got error {}, need_rollback={}", + __func__, + ox.obc->get_oid(), + e, + need_rollback); + return need_rollback ? func(*ox.obc) : interruptor::now(); +} + // PgOpsExecuter -- a class for executing ops targeting a certain PG. class PgOpsExecuter { template diff --git a/src/crimson/osd/pg.cc b/src/crimson/osd/pg.cc index 23f8e55b6a478..82f15ee14be46 100644 --- a/src/crimson/osd/pg.cc +++ b/src/crimson/osd/pg.cc @@ -708,6 +708,10 @@ PG::do_osd_ops_iertr::future PG::do_osd_ops_execute( SuccessFunc&& success_func, FailureFunc&& failure_func) { + auto rollbacker = ox.create_rollbacker([this] (auto& obc) { + return reload_obc(obc).handle_error_interruptible( + load_obc_ertr::assert_all{"can't live with object state messed up"}); + }); return interruptor::do_for_each(ops, [m, &ox](OSDOp& osd_op) { logger().debug( "do_osd_ops_execute: {} - object {} - handling op {}", @@ -744,21 +748,12 @@ PG::do_osd_ops_iertr::future PG::do_osd_ops_execute( return do_osd_ops_iertr::future{crimson::ct_error::eagain::make()}; }); }), OpsExecuter::osd_op_errorator::all_same_way( - [&ox, m, obc, failure_func=std::move(failure_func), this] (const std::error_code& e) mutable { - const bool need_reload_obc = ox.has_seen_write(); - logger().debug( - "do_osd_ops_execute: {} - object {} got error {}, {}; need_reload_obc {}", - m, - obc->obs.oi.soid, - e.value(), - e.message(), - need_reload_obc); - return ( - need_reload_obc ? reload_obc(*obc) - : interruptor::make_interruptible(load_obc_ertr::now()) - ).safe_then_interruptible([&e, failure_func=std::move(failure_func)] { + [rollbacker, failure_func=std::move(failure_func), m] + (const std::error_code& e) mutable { + return rollbacker.rollback_obc_if_modified(e).then_interruptible( + [&e, failure_func=std::move(failure_func)] { return std::move(failure_func)(e); - }, load_obc_ertr::assert_all{ "can't live with object state messed up" }); + }); })); } -- 2.39.5