From: Xuehan Xu Date: Sun, 8 Sep 2024 05:27:07 +0000 (+0800) Subject: crimson/osd/pg: rollback ops by copying obc beforehand and recover after X-Git-Tag: v20.0.0~1041^2~3 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=fc41513d4e990c3a7cba38f100e7e6275dfdf849;p=ceph.git crimson/osd/pg: rollback ops by copying obc beforehand and recover after errors Fixes: https://tracker.ceph.com/issues/63844 Signed-off-by: Xuehan Xu --- diff --git a/src/crimson/osd/object_context.h b/src/crimson/osd/object_context.h index 4148e3b592c0..e17af91e3ade 100644 --- a/src/crimson/osd/object_context.h +++ b/src/crimson/osd/object_context.h @@ -76,6 +76,11 @@ public: ObjectContext(hobject_t hoid) : lock(hoid), obs(std::move(hoid)) {} + void update_from(const ObjectContext &obc) { + obs = obc.obs; + ssc = obc.ssc; + } + const hobject_t &get_oid() const { return obs.oi.soid; } diff --git a/src/crimson/osd/ops_executer.h b/src/crimson/osd/ops_executer.h index 0448be407d85..623e3d90d56a 100644 --- a/src/crimson/osd/ops_executer.h +++ b/src/crimson/osd/ops_executer.h @@ -619,7 +619,10 @@ OpsExecuter::RollbackHelper::rollback_obc_if_modified( ox->obc->get_oid(), e, need_rollback); - return need_rollback ? func(*ox->obc) : interruptor::now(); + if (need_rollback) { + func(ox->obc); + } + return interruptor::now(); } // PgOpsExecuter -- a class for executing ops targeting a certain PG. diff --git a/src/crimson/osd/pg.cc b/src/crimson/osd/pg.cc index b4a0074b382f..6038dd655a40 100644 --- a/src/crimson/osd/pg.cc +++ b/src/crimson/osd/pg.cc @@ -963,6 +963,15 @@ PG::BackgroundProcessLock::lock() noexcept return interruptor::make_interruptible(mutex.lock()); } +// We may need to rollback the ObjectContext on failed op execution. +// Copy the current obc before mutating it in order to recover on failures. +ObjectContextRef duplicate_obc(const ObjectContextRef &obc) { + ObjectContextRef object_context = new ObjectContext(obc->obs.oi.soid); + object_context->obs = obc->obs; + object_context->ssc = new SnapSetContext(*obc->ssc); + return object_context; +} + template PG::do_osd_ops_iertr::future> PG::do_osd_ops_execute( @@ -975,9 +984,9 @@ PG::do_osd_ops_execute( FailureFunc&& failure_func) { assert(ox); - auto rollbacker = ox->create_rollbacker([this] (auto& obc) { - return obc_loader.reload_obc(obc).handle_error_interruptible( - load_obc_ertr::assert_all{"can't live with object state messed up"}); + auto rollbacker = ox->create_rollbacker( + [object_context=duplicate_obc(obc)] (auto& obc) mutable { + obc->update_from(*object_context); }); auto failure_func_ptr = seastar::make_lw_shared(std::move(failure_func)); return interruptor::do_for_each(ops, [ox](OSDOp& osd_op) {