From: Noah Watkins Date: Fri, 27 Jul 2018 01:36:17 +0000 (-0700) Subject: mon: timely health updates to manager X-Git-Tag: v14.0.1~613^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=842b7992ef9bbc5bb835cf83628f4a9e95a30da4;p=ceph.git mon: timely health updates to manager When the health of a monitor service changes immediately dispatch a digest to manager daemons with the new health checks rather than waiting for the next scheduled event. This has the added benefit that health checks that are set by the manager daemon are echoed back and observable quickly. Signed-off-by: Noah Watkins --- diff --git a/src/mon/MgrMonitor.cc b/src/mon/MgrMonitor.cc index a7af4241c00d..ccbcb072ccb5 100644 --- a/src/mon/MgrMonitor.cc +++ b/src/mon/MgrMonitor.cc @@ -149,6 +149,36 @@ health_status_t MgrMonitor::should_warn_about_mgr_down() return HEALTH_OK; } +void MgrMonitor::post_paxos_update() +{ + // are we handling digest subscribers? + if (digest_event) { + bool send = false; + if (prev_health_checks.empty()) { + prev_health_checks.resize(mon->paxos_service.size()); + send = true; + } + assert(prev_health_checks.size() == mon->paxos_service.size()); + for (auto i = 0u; i < prev_health_checks.size(); i++) { + const auto& curr = mon->paxos_service[i]->get_health_checks(); + if (!send && curr != prev_health_checks[i]) { + send = true; + } + prev_health_checks[i] = curr; + } + if (send) { + if (is_active()) { + send_digests(); + } else { + cancel_timer(); + wait_for_active_ctx(new C_MonContext(mon, [this](int) { + send_digests(); + })); + } + } + } +} + void MgrMonitor::encode_pending(MonitorDBStore::TransactionRef t) { dout(10) << __func__ << " " << pending_map << dendl; @@ -453,8 +483,10 @@ void MgrMonitor::send_digests() cancel_timer(); const std::string type = "mgrdigest"; - if (mon->session_map.subs.count(type) == 0) + if (mon->session_map.subs.count(type) == 0) { + prev_health_checks.clear(); return; + } if (!is_active()) { // if paxos is currently not active, don't send a digest but reenable timer diff --git a/src/mon/MgrMonitor.h b/src/mon/MgrMonitor.h index d73b1f26dd98..b295e4030096 100644 --- a/src/mon/MgrMonitor.h +++ b/src/mon/MgrMonitor.h @@ -57,6 +57,8 @@ class MgrMonitor: public PaxosService Context *digest_event = nullptr; void cancel_timer(); + std::vector prev_health_checks; + bool check_caps(MonOpRequestRef op, const uuid_d& fsid); health_status_t should_warn_about_mgr_down(); @@ -83,6 +85,7 @@ public: void create_initial() override; void get_store_prefixes(std::set& s) const override; void update_from_paxos(bool *need_bootstrap) override; + void post_paxos_update() override; void create_pending() override; void encode_pending(MonitorDBStore::TransactionRef t) override;