From: Kefu Chai Date: Sun, 3 Feb 2019 11:19:23 +0000 (+0800) Subject: crimson/osd: update heartbeat peers properly X-Git-Tag: v14.1.0~159^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=fbff008b440533935c660d2a68097870f1bfcef5;p=ceph.git crimson/osd: update heartbeat peers properly * add reporters for fast failure detection * trim heartbeat peers if the number of peers exceeds `osd_heartbeat_min_peers`. Signed-off-by: Kefu Chai --- diff --git a/src/crimson/osd/heartbeat.cc b/src/crimson/osd/heartbeat.cc index 5f1e769afb20..8f81cc3e7510 100644 --- a/src/crimson/osd/heartbeat.cc +++ b/src/crimson/osd/heartbeat.cc @@ -98,6 +98,87 @@ void Heartbeat::add_peer(osd_id_t peer, epoch_t epoch) } } +seastar::future Heartbeat::remove_down_peers() +{ + osds_t osds; + for (auto& peer : peers) { + osds.push_back(peer.first); + } + return seastar::map_reduce(std::move(osds), + [this](auto& osd) { + auto osdmap = service.get_map(); + if (!osdmap->is_up(osd)) { + return remove_peer(osd).then([] { + return seastar::make_ready_future(-1); + }); + } else if (peers[osd].epoch < osdmap->get_epoch()) { + return seastar::make_ready_future(osd); + } else { + return seastar::make_ready_future(-1); + } + }, osds_t{}, + [this](osds_t&& extras, osd_id_t extra) { + if (extra >= 0) { + extras.push_back(extra); + } + return extras; + }); +} + +void Heartbeat::add_reporter_peers(int whoami) +{ + auto osdmap = service.get_map(); + // include next and previous up osds to ensure we have a fully-connected set + set want; + if (auto next = osdmap->get_next_up_osd_after(whoami); next >= 0) { + want.insert(next); + } + if (auto prev = osdmap->get_previous_up_osd_before(whoami); prev >= 0) { + want.insert(prev); + } + // make sure we have at least **min_down** osds coming from different + // subtree level (e.g., hosts) for fast failure detection. + auto min_down = local_conf().get_val("mon_osd_min_down_reporters"); + auto subtree = local_conf().get_val("mon_osd_reporter_subtree_level"); + osdmap->get_random_up_osds_by_subtree( + whoami, subtree, min_down, want, &want); + for (auto osd : want) { + add_peer(osd, osdmap->get_epoch()); + } +} + +seastar::future<> Heartbeat::update_peers(int whoami) +{ + const auto min_peers = static_cast( + local_conf().get_val("osd_heartbeat_min_peers")); + return remove_down_peers().then([=](osds_t&& extra) { + add_reporter_peers(whoami); + // too many? + struct iteration_state { + osds_t::const_iterator where; + osds_t::const_iterator end; + }; + return seastar::do_with(iteration_state{extra.begin(),extra.end()}, + [=](iteration_state& s) { + return seastar::do_until( + [min_peers, &s, this] { + return peers.size() < min_peers || s.where == s.end; }, + [&s, this] { + return remove_peer(*s.where); } + ); + }); + }).then([=] { + // or too few? + auto osdmap = service.get_map(); + for (auto next = osdmap->get_next_up_osd_after(whoami); + peers.size() < min_peers && next >= 0 && next != whoami; + next = osdmap->get_next_up_osd_after(next)) { + add_peer(next, osdmap->get_epoch()); + } + return seastar::now(); + }); +} + seastar::future<> Heartbeat::remove_peer(osd_id_t peer) { auto found = peers.find(peer); diff --git a/src/crimson/osd/heartbeat.h b/src/crimson/osd/heartbeat.h index 2304af440297..916acba2c9a4 100644 --- a/src/crimson/osd/heartbeat.h +++ b/src/crimson/osd/heartbeat.h @@ -32,6 +32,7 @@ public: seastar::future<> stop(); void add_peer(osd_id_t peer, epoch_t epoch); + seastar::future<> update_peers(int whoami); seastar::future<> remove_peer(osd_id_t peer); seastar::future<> send_heartbeats(); @@ -55,6 +56,13 @@ private: seastar::future<> send_still_alive(osd_id_t, const entity_addrvec_t&); + using osds_t = std::vector; + /// remove down OSDs + /// @return peers not needed in this epoch + seastar::future remove_down_peers(); + /// add enough reporters for fast failure detection + void add_reporter_peers(int whoami); + private: std::unique_ptr front_msgr; std::unique_ptr back_msgr; diff --git a/src/crimson/osd/osd.cc b/src/crimson/osd/osd.cc index c064d7676d5e..24f7de8bbd5d 100644 --- a/src/crimson/osd/osd.cc +++ b/src/crimson/osd/osd.cc @@ -602,5 +602,5 @@ void OSD::update_heartbeat_peers() } } } - // TODO: remove down OSD + heartbeat->update_peers(whoami); }