From: Radoslaw Zarzynski Date: Fri, 20 Nov 2020 15:42:36 +0000 (+0100) Subject: crimson: conditionally reload ObjectState if execution fails. X-Git-Tag: v16.1.0~429^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=9945dd5876290d7287ef457953ffc0fbabcab05e;p=ceph.git crimson: conditionally reload ObjectState if execution fails. Signed-off-by: Radoslaw Zarzynski --- diff --git a/src/crimson/osd/ops_executer.h b/src/crimson/osd/ops_executer.h index d7e6c6980154..42fcf61b8003 100644 --- a/src/crimson/osd/ops_executer.h +++ b/src/crimson/osd/ops_executer.h @@ -194,6 +194,10 @@ public: uint32_t get_pool_stripe_width() const { return pool_info.get_stripe_width(); } + + bool has_seen_write() const { + return num_write > 0; + } }; template diff --git a/src/crimson/osd/pg.cc b/src/crimson/osd/pg.cc index 8893b6f43eb4..625f4322f5b3 100644 --- a/src/crimson/osd/pg.cc +++ b/src/crimson/osd/pg.cc @@ -615,21 +615,48 @@ osd_op_params_t&& PG::fill_op_params_bump_pg_version( seastar::future> PG::handle_failed_op( const std::error_code& e, ObjectContextRef obc, + const OpsExecuter& ox, const MOSDOp& m) const { + // Oops, an operation had failed. do_osd_ops() altogether with + // OpsExecuter already dropped the ObjectStore::Transaction if + // there was any. However, this is not enough to completely + // rollback as we gave OpsExecuter the very single copy of `obc` + // we maintain and we did it for both reading and writing. + // Now all modifications must be reverted. + // + // Let's just reload from the store. Evicting from the shared + // LRU would be tricky as next MOSDOp (the one at `get_obc` + // phase) could actually already finished the lookup. Fortunately, + // this is supposed to live on cold paths, so performance is not + // a concern -- simplicity wins. + // + // The conditional's purpose is to efficiently handle hot errors + // which may appear as a result of e.g. CEPH_OSD_OP_CMPXATTR or + // CEPH_OSD_OP_OMAP_CMP. These are read-like ops and clients + // typically append them before any write. If OpsExecuter hasn't + // seen any modifying operation, `obc` is supposed to be kept + // unchanged. assert(e.value() > 0); + const bool need_reload_obc = ox.has_seen_write(); logger().debug( - "{}: {} - object {} got error code {}, {}", + "{}: {} - object {} got error code {}, {}; need_reload_obc {}", __func__, m, obc->obs.oi.soid, e.value(), - e.message()); - auto reply = make_message( - &m, -e.value(), get_osdmap_epoch(), 0, false); - reply->set_enoent_reply_versions(peering_state.get_info().last_update, - peering_state.get_info().last_user_version); - return seastar::make_ready_future>(std::move(reply)); + e.message(), + need_reload_obc); + return (need_reload_obc ? reload_obc(*obc) + : load_obc_ertr::now() + ).safe_then([&e, &m, obc = std::move(obc), this] { + auto reply = make_message( + &m, -e.value(), get_osdmap_epoch(), 0, false); + reply->set_enoent_reply_versions( + peering_state.get_info().last_update, + peering_state.get_info().last_user_version); + return seastar::make_ready_future>(std::move(reply)); + }, load_obc_ertr::assert_all{ "can't live with object state messed up" }); } seastar::future> PG::do_osd_ops( @@ -689,7 +716,6 @@ seastar::future> PG::do_osd_ops( }).safe_then([this, m, obc, - ox_deleter = std::move(ox), rvec = op_info.allows_returnvec()] { // TODO: should stop at the first op which returns a negative retval, // cmpext uses it for returning the index of first unmatched byte @@ -708,12 +734,18 @@ seastar::future> PG::do_osd_ops( *m, obc->obs.oi.soid); return seastar::make_ready_future>(std::move(reply)); - }, OpsExecuter::osd_op_errorator::all_same_way([=] (const std::error_code& e) { - return handle_failed_op(e, obc, *m); - })).handle_exception_type([=](const crimson::osd::error& e) { + }, osd_op_errorator::all_same_way([ox = ox.get(), + m, + obc, + this] (const std::error_code& e) { + return handle_failed_op(e, std::move(obc), *ox, *m); + })).handle_exception_type([ox_deleter = std::move(ox), + m, + obc, + this] (const crimson::osd::error& e) { // we need this handler because throwing path which aren't errorated yet. logger().debug("encountered the legacy error handling path!"); - return handle_failed_op(e.code(), obc, *m); + return handle_failed_op(e.code(), std::move(obc), *ox_deleter, *m); }); } @@ -883,6 +915,29 @@ PG::load_head_obc(ObjectContextRef obc) }); } +PG::load_obc_ertr::future<> +PG::reload_obc(crimson::osd::ObjectContext& obc) const +{ + assert(obc.is_head()); + return backend->load_metadata(obc.get_oid()).safe_then([&obc](auto md) + -> load_obc_ertr::future<> { + logger().debug( + "{}: reloaded obs {} for {}", + __func__, + md->os.oi, + obc.get_oid()); + if (!md->ss) { + logger().error( + "{}: oid {} missing snapset", + __func__, + obc.get_oid()); + return crimson::ct_error::object_corrupted::make(); + } + obc.set_head_state(std::move(md->os), std::move(*(md->ss))); + return load_obc_ertr::now(); + }); +} + PG::load_obc_ertr::future<> PG::with_locked_obc(Ref &m, const OpInfo &op_info, Operation *op, PG::with_obc_func_t &&f) diff --git a/src/crimson/osd/pg.h b/src/crimson/osd/pg.h index 12244dcd6531..2e061c4e1da0 100644 --- a/src/crimson/osd/pg.h +++ b/src/crimson/osd/pg.h @@ -34,8 +34,8 @@ #include "crimson/osd/pg_recovery_listener.h" #include "crimson/osd/recovery_backend.h" -class OSDMap; class MQuery; +class OSDMap; class PGBackend; class PGPeeringEvent; class osd_op_params_t; @@ -54,6 +54,7 @@ namespace crimson::os { namespace crimson::osd { class ClientRequest; +class OpsExecuter; class PG : public boost::intrusive_ref_counter< PG, @@ -500,6 +501,9 @@ public: load_obc_ertr::future load_head_obc(ObjectContextRef obc); + load_obc_ertr::future<> + reload_obc(crimson::osd::ObjectContext& obc) const; + public: using with_obc_func_t = std::function (ObjectContextRef)>; @@ -538,6 +542,7 @@ private: seastar::future> handle_failed_op( const std::error_code& e, ObjectContextRef obc, + const OpsExecuter& ox, const MOSDOp& m) const; seastar::future> do_osd_ops( Ref m,