From d2279d57ecd6d98e872b0bb35852f2fd4baa3f45 Mon Sep 17 00:00:00 2001 From: Xuehan Xu Date: Mon, 1 Jun 2020 18:50:00 +0800 Subject: [PATCH] crimson/osd: send "mark me down" messages to monitor when shutting down Signed-off-by: Xuehan Xu --- src/crimson/osd/osd.cc | 115 ++++++++++++++++++++++++++++------------ src/crimson/osd/osd.h | 9 +++- src/crimson/osd/state.h | 8 +++ 3 files changed, 96 insertions(+), 36 deletions(-) diff --git a/src/crimson/osd/osd.cc b/src/crimson/osd/osd.cc index 5a082d6c756..0bf20f0f2b3 100644 --- a/src/crimson/osd/osd.cc +++ b/src/crimson/osd/osd.cc @@ -10,6 +10,7 @@ #include #include #include +#include #include "common/pick_address.h" #include "include/util.h" @@ -19,6 +20,7 @@ #include "messages/MOSDBeacon.h" #include "messages/MOSDBoot.h" #include "messages/MOSDMap.h" +#include "messages/MOSDMarkMeDown.h" #include "messages/MOSDOp.h" #include "messages/MOSDPGLog.h" #include "messages/MOSDPGPull.h" @@ -457,42 +459,44 @@ seastar::future<> OSD::stop() { logger().info("stop"); // see also OSD::shutdown() - state.set_stopping(); - - if (!public_msgr->dispatcher_chain_empty()) { - public_msgr->remove_dispatcher(*this); - public_msgr->remove_dispatcher(*mgrc); - public_msgr->remove_dispatcher(*monc); - } - if (!cluster_msgr->dispatcher_chain_empty()) { - cluster_msgr->remove_dispatcher(*this); - cluster_msgr->remove_dispatcher(*mgrc); - cluster_msgr->remove_dispatcher(*monc); - } - auto gate_close_fut = gate.close(); - return asok->stop().then([this] { - return heartbeat->stop(); - }).then([this] { - return store->umount(); - }).then([this] { - return store->stop(); - }).then([this] { - return seastar::parallel_for_each(pg_map.get_pgs(), - [](auto& p) { - return p.second->stop(); + return prepare_to_stop().then([this] { + state.set_stopping(); + logger().debug("prepared to stop"); + if (!public_msgr->dispatcher_chain_empty()) { + public_msgr->remove_dispatcher(*this); + public_msgr->remove_dispatcher(*mgrc); + public_msgr->remove_dispatcher(*monc); + } + if (!cluster_msgr->dispatcher_chain_empty()) { + cluster_msgr->remove_dispatcher(*this); + cluster_msgr->remove_dispatcher(*mgrc); + cluster_msgr->remove_dispatcher(*monc); + } + auto gate_close_fut = gate.close(); + return asok->stop().then([this] { + return heartbeat->stop(); + }).then([this] { + return store->umount(); + }).then([this] { + return store->stop(); + }).then([this] { + return seastar::parallel_for_each(pg_map.get_pgs(), + [](auto& p) { + return p.second->stop(); + }); + }).then([this] { + return monc->stop(); + }).then([this] { + return mgrc->stop(); + }).then([fut=std::move(gate_close_fut)]() mutable { + return std::move(fut); + }).then([this] { + return when_all_succeed( + public_msgr->shutdown(), + cluster_msgr->shutdown()); + }).handle_exception([](auto ep) { + logger().error("error while stopping osd: {}", ep); }); - }).then([this] { - return monc->stop(); - }).then([this] { - return mgrc->stop(); - }).then([fut=std::move(gate_close_fut)]() mutable { - return std::move(fut); - }).then([this] { - return when_all_succeed( - public_msgr->shutdown(), - cluster_msgr->shutdown()); - }).handle_exception([](auto ep) { - logger().error("error while stopping osd: {}", ep); }); } @@ -624,6 +628,8 @@ seastar::future<> OSD::ms_dispatch(crimson::net::Connection* conn, MessageRef m) return seastar::now(); case MSG_COMMAND: return handle_command(conn, boost::static_pointer_cast(m)); + case MSG_OSD_MARK_ME_DOWN: + return handle_mark_me_down(conn, boost::static_pointer_cast(m)); case MSG_OSD_PG_PULL: [[fallthrough]]; case MSG_OSD_PG_PUSH: @@ -970,6 +976,11 @@ seastar::future<> OSD::committed_osd_maps(version_t first, heartbeat_timer.arm_periodic( std::chrono::seconds(TICK_INTERVAL)); } + } else if (!osdmap->is_up(whoami)) { + if (state.is_prestop()) { + got_stop_ack(); + return seastar::now(); + } } check_osdmap_features(); // yay! @@ -1036,6 +1047,15 @@ seastar::future<> OSD::handle_rep_op_reply(crimson::net::Connection* conn, return seastar::now(); } +seastar::future<> OSD::handle_mark_me_down(crimson::net::Connection* conn, + Ref m) +{ + if (state.is_prestop()) { + got_stop_ack(); + } + return seastar::now(); +} + seastar::future<> OSD::handle_recovery_subreq(crimson::net::Connection* conn, Ref m) { @@ -1180,4 +1200,29 @@ blocking_future> OSD::wait_for_pg( return pg_map.get_pg(pgid).first; } +seastar::future<> OSD::prepare_to_stop() +{ + if (osdmap && osdmap->is_up(whoami)) { + state.set_prestop(); + return monc->send_message( + make_message( + monc->get_fsid(), + whoami, + osdmap->get_addrs(whoami), + osdmap->get_epoch(), + true)).then([this] { + const auto timeout = + std::chrono::duration_cast( + std::chrono::duration( + local_conf().get_val("osd_mon_shutdown_timeout"))); + return seastar::with_timeout( + seastar::timer<>::clock::now() + timeout, + stop_acked.get_future()); + }).handle_exception_type([this](seastar::timed_out_error&) { + return seastar::now(); + }); + } + return seastar::now(); +} + } diff --git a/src/crimson/osd/osd.h b/src/crimson/osd/osd.h index 05d5ad9adfc..cb6c0db3b04 100644 --- a/src/crimson/osd/osd.h +++ b/src/crimson/osd/osd.h @@ -181,7 +181,8 @@ private: Ref m); seastar::future<> handle_recovery_subreq(crimson::net::Connection* conn, Ref m); - + seastar::future<> handle_mark_me_down(crimson::net::Connection* conn, + Ref m); seastar::future<> committed_osd_maps(version_t first, version_t last, @@ -205,6 +206,12 @@ public: private: PGMap pg_map; crimson::common::Gated gate; + + seastar::promise<> stop_acked; + void got_stop_ack() { + stop_acked.set_value(); + } + seastar::future<> prepare_to_stop(); public: blocking_future> get_or_create_pg( spg_t pgid, diff --git a/src/crimson/osd/state.h b/src/crimson/osd/state.h index 4c445348d30..ba48cd36f12 100644 --- a/src/crimson/osd/state.h +++ b/src/crimson/osd/state.h @@ -15,6 +15,7 @@ class OSDState { PREBOOT, BOOTING, ACTIVE, + PRESTOP, STOPPING, WAITING_FOR_HEALTHY, }; @@ -34,6 +35,9 @@ public: bool is_active() const { return state == State::ACTIVE; } + bool is_prestop() const { + return state == State::PRESTOP; + } bool is_stopping() const { return state == State::STOPPING; } @@ -49,6 +53,9 @@ public: void set_active() { state = State::ACTIVE; } + void set_prestop() { + state = State::PRESTOP; + } void set_stopping() { state = State::STOPPING; } @@ -58,6 +65,7 @@ public: case State::PREBOOT: return "preboot"; case State::BOOTING: return "booting"; case State::ACTIVE: return "active"; + case State::PRESTOP: return "prestop"; case State::STOPPING: return "stopping"; case State::WAITING_FOR_HEALTHY: return "waiting_for_healthy"; default: return "???"; -- 2.39.5