From 6f3a43c060211a078d6844a8181ae66e8fea832d Mon Sep 17 00:00:00 2001 From: Xuehan Xu Date: Mon, 8 Jun 2020 17:58:56 +0800 Subject: [PATCH] crimson: block io for object recovery Signed-off-by: Xuehan Xu --- src/crimson/osd/object_context.h | 9 +++-- .../osd/osd_operations/background_recovery.cc | 29 +++++++++++++++ .../osd/osd_operations/background_recovery.h | 36 ++++++++++++++----- .../osd/osd_operations/client_request.cc | 32 ++++++++++++----- .../osd/osd_operations/client_request.h | 7 ++-- src/crimson/osd/pg.h | 5 +-- src/crimson/osd/pg_recovery.cc | 2 +- src/crimson/osd/pg_recovery.h | 1 + src/crimson/osd/pg_recovery_listener.h | 2 +- .../osd/replicated_recovery_backend.cc | 21 +++++++---- src/osd/MissingLoc.cc | 5 +-- src/osd/MissingLoc.h | 3 +- 12 files changed, 116 insertions(+), 36 deletions(-) diff --git a/src/crimson/osd/object_context.h b/src/crimson/osd/object_context.h index 6ff49dedb3f..2a5461c4d90 100644 --- a/src/crimson/osd/object_context.h +++ b/src/crimson/osd/object_context.h @@ -203,8 +203,13 @@ public: void drop_read() { return put_lock_type(RWState::RWREAD); } - bool get_recovery_read() { - return rwstate.get_recovery_read(); + seastar::future get_recovery_read(bool can_wait = false) { + if (!can_wait) { + return seastar::make_ready_future(rwstate.get_recovery_read()); + } + return with_queue([this] { + return rwstate.get_recovery_read(); + }).then([] {return seastar::make_ready_future(true); }); } void drop_recovery_read() { ceph_assert(rwstate.recovery_read_marker); diff --git a/src/crimson/osd/osd_operations/background_recovery.cc b/src/crimson/osd/osd_operations/background_recovery.cc index 5148d46a65c..29b2418c1d6 100644 --- a/src/crimson/osd/osd_operations/background_recovery.cc +++ b/src/crimson/osd/osd_operations/background_recovery.cc @@ -68,4 +68,33 @@ seastar::future<> BackgroundRecovery::start() }); } +seastar::future UrgentRecovery::do_recovery() +{ + if (!pg->has_reset_since(epoch_started)) { + auto futopt = pg->get_recovery_handler()->recover_missing(soid, need); + assert(futopt); + return with_blocking_future(std::move(*futopt)).then([] { + return seastar::make_ready_future(false); + }); + } + return seastar::make_ready_future(false); +} + +void UrgentRecovery::print(std::ostream &lhs) const +{ + lhs << "UrgentRecovery(" << pg->get_pgid() << ", " + << soid << ", v" << need << ")"; +} + +void UrgentRecovery::dump_detail(Formatter *f) const +{ + f->dump_stream("pgid") << pg->get_pgid(); + f->open_object_section("recovery_detail"); + { + f->dump_stream("oid") << soid; + f->dump_stream("version") << need; + } + f->close_section(); +} + } diff --git a/src/crimson/osd/osd_operations/background_recovery.h b/src/crimson/osd/osd_operations/background_recovery.h index 65ea5220c9b..1890d22be97 100644 --- a/src/crimson/osd/osd_operations/background_recovery.h +++ b/src/crimson/osd/osd_operations/background_recovery.h @@ -13,7 +13,7 @@ namespace crimson::osd { class PG; class ShardServices; -class BackgroundRecovery final : public OperationT { +class BackgroundRecovery : public OperationT { public: static constexpr OperationTypeCode type = OperationTypeCode::background_recovery; @@ -23,24 +23,42 @@ public: epoch_t epoch_started, crimson::osd::scheduler::scheduler_class_t scheduler_class); - void print(std::ostream &) const final; - void dump_detail(Formatter *f) const final; + virtual void print(std::ostream &) const; + virtual void dump_detail(Formatter *f) const; seastar::future<> start(); -private: +protected: Ref pg; ShardServices &ss; epoch_t epoch_started; crimson::osd::scheduler::scheduler_class_t scheduler_class; - - auto get_scheduler_params() const { + auto get_scheduler_params(crimson::osd::scheduler::cost_t cost = 1, + crimson::osd::scheduler::client_t owner = 0) const { return crimson::osd::scheduler::params_t{ - 1, // cost - 0, // owner + cost, // cost + owner, // owner scheduler_class }; } + virtual seastar::future do_recovery(); +}; - seastar::future do_recovery(); +class UrgentRecovery final : public BackgroundRecovery { +public: + UrgentRecovery( + const hobject_t& soid, + const eversion_t& need, + Ref pg, + ShardServices& ss, + epoch_t epoch_started, + crimson::osd::scheduler::scheduler_class_t scheduler_class) + : BackgroundRecovery{pg, ss, epoch_started, scheduler_class}, + soid{soid}, need(need) {} + void print(std::ostream&) const final; + void dump_detail(Formatter* f) const final; +private: + const hobject_t soid; + const eversion_t need; + seastar::future do_recovery() override; }; } diff --git a/src/crimson/osd/osd_operations/client_request.cc b/src/crimson/osd/osd_operations/client_request.cc index f31621a4867..8f8075f4efe 100644 --- a/src/crimson/osd/osd_operations/client_request.cc +++ b/src/crimson/osd/osd_operations/client_request.cc @@ -68,7 +68,7 @@ seastar::future<> ClientRequest::start() return with_blocking_future(osd.wait_for_pg(m->get_spg())); }).then([this, opref=std::move(opref)](Ref pgref) { return seastar::do_with( - std::move(pgref), std::move(opref), [this](auto pgref, auto opref) { + std::move(pgref), std::move(opref), [this](auto& pgref, auto& opref) { PG &pg = *pgref; return with_blocking_future( handle.enter(pp(pg).await_map) @@ -80,14 +80,14 @@ seastar::future<> ClientRequest::start() handle.enter(pp(pg).wait_for_active)); }).then([this, &pg]() mutable { return with_blocking_future(pg.wait_for_active_blocker.wait()); - }).then([this, &pg]() mutable { + }).then([this, &pgref]() mutable { if (m->finish_decode()) { m->clear_payload(); } if (is_pg_op()) { - return process_pg_op(pg); + return process_pg_op(pgref); } else { - return process_op(pg); + return process_op(pgref); } }); }); @@ -96,20 +96,33 @@ seastar::future<> ClientRequest::start() } seastar::future<> ClientRequest::process_pg_op( - PG &pg) + Ref &pg) { - return pg.do_pg_ops(m) + return pg->do_pg_ops(m) .then([this](Ref reply) { return conn->send(reply); }); } seastar::future<> ClientRequest::process_op( - PG &pg) + Ref &pgref) { + PG& pg = *pgref; return with_blocking_future( - handle.enter(pp(pg).get_obc) - ).then([this, &pg]() { + handle.enter(pp(pg).recover_missing) + ).then([this, &pg, pgref=std::move(pgref)] { + eversion_t ver; + const hobject_t& soid = m->get_hobj(); + if (pg.is_unreadable_object(soid, &ver)) { + auto [op, fut] = osd.get_shard_services().start_operation( + soid, ver, pgref, osd.get_shard_services(), m->get_min_epoch(), + crimson::osd::scheduler::scheduler_class_t::immediate); + return std::move(fut); + } + return seastar::now(); + }).then([this, &pg] { + return with_blocking_future(handle.enter(pp(pg).get_obc)); + }).then([this, &pg]() { op_info.set_from_op(&*m, *pg.get_osdmap()); return pg.with_locked_obc( m, @@ -130,4 +143,5 @@ seastar::future<> ClientRequest::process_op( return seastar::now(); })); } + } diff --git a/src/crimson/osd/osd_operations/client_request.h b/src/crimson/osd/osd_operations/client_request.h index de0e914d8e6..d2fdbe926df 100644 --- a/src/crimson/osd/osd_operations/client_request.h +++ b/src/crimson/osd/osd_operations/client_request.h @@ -37,6 +37,9 @@ public: OrderedPipelinePhase wait_for_active = { "ClientRequest::PGPipeline::wait_for_active" }; + OrderedPipelinePhase recover_missing = { + "ClientRequest::PGPipeline::recover_missing" + }; OrderedPipelinePhase get_obc = { "ClientRequest::PGPipeline::get_obc" }; @@ -58,9 +61,9 @@ public: private: seastar::future<> process_pg_op( - PG &pg); + Ref &pg); seastar::future<> process_op( - PG &pg); + Ref &pg); bool is_pg_op() const; ConnectionPipeline &cp(); diff --git a/src/crimson/osd/pg.h b/src/crimson/osd/pg.h index a4e2650c994..d4beaf9a52b 100644 --- a/src/crimson/osd/pg.h +++ b/src/crimson/osd/pg.h @@ -661,10 +661,11 @@ private: bool is_missing_object(const hobject_t& soid) const { return peering_state.get_pg_log().get_missing().get_items().count(soid); } - bool is_unreadable_object(const hobject_t &oid) const final { + bool is_unreadable_object(const hobject_t &oid, + eversion_t* v = 0) const final { return is_missing_object(oid) || !peering_state.get_missing_loc().readable_with_acting( - oid, get_actingset()); + oid, get_actingset(), v); } const set &get_actingset() const { return peering_state.get_actingset(); diff --git a/src/crimson/osd/pg_recovery.cc b/src/crimson/osd/pg_recovery.cc index f040c7e3219..a77f67ccfe1 100644 --- a/src/crimson/osd/pg_recovery.cc +++ b/src/crimson/osd/pg_recovery.cc @@ -332,7 +332,7 @@ void PGRecovery::on_local_recover( obc->obs.oi = recovery_info.oi; // obc is loaded the excl lock obc->put_lock_type(RWState::RWEXCL); - assert(obc->get_recovery_read()); + assert(obc->get_recovery_read().get0()); } if (!pg->is_unreadable_object(soid)) { pg->get_recovery_backend()->get_recovering(soid).set_readable(); diff --git a/src/crimson/osd/pg_recovery.h b/src/crimson/osd/pg_recovery.h index d5309550e4a..e264b76614a 100644 --- a/src/crimson/osd/pg_recovery.h +++ b/src/crimson/osd/pg_recovery.h @@ -69,6 +69,7 @@ private: void _committed_pushed_object(epoch_t epoch, eversion_t last_complete); friend class ReplicatedRecoveryBackend; + friend class crimson::osd::UrgentRecovery; seastar::future<> handle_pull(Ref m); seastar::future<> handle_push(Ref m); seastar::future<> handle_push_reply(Ref m); diff --git a/src/crimson/osd/pg_recovery_listener.h b/src/crimson/osd/pg_recovery_listener.h index 29e91e403ef..f9df92a4aaf 100644 --- a/src/crimson/osd/pg_recovery_listener.h +++ b/src/crimson/osd/pg_recovery_listener.h @@ -29,7 +29,7 @@ public: virtual const pg_shard_t& get_pg_whoami() const = 0; virtual const spg_t& get_pgid() const = 0; virtual RecoveryBackend* get_recovery_backend() = 0; - virtual bool is_unreadable_object(const hobject_t&) const = 0; + virtual bool is_unreadable_object(const hobject_t&, eversion_t* v = 0) const = 0; virtual bool has_reset_since(epoch_t) const = 0; virtual std::vector get_replica_recovery_order() const = 0; virtual seastar::future<> stop() = 0; diff --git a/src/crimson/osd/replicated_recovery_backend.cc b/src/crimson/osd/replicated_recovery_backend.cc index 013e2cd99da..30df1da9928 100644 --- a/src/crimson/osd/replicated_recovery_backend.cc +++ b/src/crimson/osd/replicated_recovery_backend.cc @@ -45,16 +45,18 @@ seastar::future<> ReplicatedRecoveryBackend::recover_object( std::move(msg), pg.get_osdmap_epoch()).then( [&recovery_waiter] { - return recovery_waiter.wait_for_pull(); + return recovery_waiter.wait_for_pull().then([] { + return seastar::make_ready_future(true); + }); }); } else { - return seastar::make_ready_future<>(); + return seastar::make_ready_future(false); } - }().then([this, &pops, &shards, soid, need, &recovery_waiter]() mutable { - return [this, &recovery_waiter, soid] { + }().then([this, &pops, &shards, soid, need, &recovery_waiter](bool pulled) mutable { + return [this, &recovery_waiter, soid, pulled] { if (!recovery_waiter.obc) { return pg.get_or_load_head_obc(soid).safe_then( - [&recovery_waiter](auto p) { + [&recovery_waiter, pulled](auto p) { auto& [obc, existed] = p; logger().debug("recover_object: loaded obc: {}", obc->obs.oi.soid); recovery_waiter.obc = obc; @@ -62,7 +64,12 @@ seastar::future<> ReplicatedRecoveryBackend::recover_object( // obc is loaded with excl lock recovery_waiter.obc->put_lock_type(RWState::RWEXCL); } - assert(recovery_waiter.obc->get_recovery_read()); + bool got = recovery_waiter.obc->get_recovery_read().get0(); + assert(pulled ? got : 1); + if (!got) { + return recovery_waiter.obc->get_recovery_read(true) + .then([](bool) { return seastar::now(); }); + } return seastar::make_ready_future<>(); }, crimson::osd::PG::load_obc_ertr::all_same_way( [this, &recovery_waiter, soid](const std::error_code& e) { @@ -73,7 +80,7 @@ seastar::future<> ReplicatedRecoveryBackend::recover_object( recovery_waiter.obc = obc; // obc is loaded with excl lock recovery_waiter.obc->put_lock_type(RWState::RWEXCL); - assert(recovery_waiter.obc->get_recovery_read()); + assert(recovery_waiter.obc->get_recovery_read().get0()); return seastar::make_ready_future<>(); }) ); diff --git a/src/osd/MissingLoc.cc b/src/osd/MissingLoc.cc index 2682e57c26f..d45220a824a 100644 --- a/src/osd/MissingLoc.cc +++ b/src/osd/MissingLoc.cc @@ -12,8 +12,9 @@ using std::set; bool MissingLoc::readable_with_acting( const hobject_t &hoid, - const set &acting) const { - if (!needs_recovery(hoid)) + const set &acting, + eversion_t* v) const { + if (!needs_recovery(hoid, v)) return true; if (is_deleted(hoid)) return false; diff --git a/src/osd/MissingLoc.h b/src/osd/MissingLoc.h index ed091bb2113..9bce3cedad0 100644 --- a/src/osd/MissingLoc.h +++ b/src/osd/MissingLoc.h @@ -157,7 +157,8 @@ class MissingLoc { } bool readable_with_acting( const hobject_t &hoid, - const std::set &acting) const; + const std::set &acting, + eversion_t* v = 0) const; uint64_t num_unfound() const { uint64_t ret = 0; for (std::map::const_iterator i = -- 2.39.5