From 344f112bb36d1f11212b08e9061fdac6b139bb76 Mon Sep 17 00:00:00 2001 From: Kefu Chai Date: Wed, 20 Sep 2017 15:13:35 +0800 Subject: [PATCH] common: use mono clock for HeartbeatMap to avoid problems when admin sets system clock. see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53901 for the reason why we cannot use atomic. Signed-off-by: Kefu Chai Signed-off-by: Xinze Chi --- src/common/HeartbeatMap.cc | 29 +++++++++++++++++------------ src/common/HeartbeatMap.h | 12 ++++++++---- src/common/WorkQueue.h | 4 ++-- 3 files changed, 27 insertions(+), 18 deletions(-) diff --git a/src/common/HeartbeatMap.cc b/src/common/HeartbeatMap.cc index ae1f8e8faae..62a703f9114 100644 --- a/src/common/HeartbeatMap.cc +++ b/src/common/HeartbeatMap.cc @@ -28,7 +28,6 @@ namespace ceph { HeartbeatMap::HeartbeatMap(CephContext *cct) : m_cct(cct), m_rwlock("HeartbeatMap::m_rwlock"), - m_inject_unhealthy_until(0), m_unhealthy_workers(0), m_total_workers(0) { @@ -64,12 +63,11 @@ void HeartbeatMap::remove_worker(const heartbeat_handle_d *h) delete h; } -bool HeartbeatMap::_check(const heartbeat_handle_d *h, const char *who, time_t now) +bool HeartbeatMap::_check(const heartbeat_handle_d *h, const char *who, + ceph::coarse_mono_clock::rep now) { bool healthy = true; - time_t was; - - was = h->timeout; + auto was = h->timeout.load(); if (was && was < now) { ldout(m_cct, 1) << who << " '" << h->name << "'" << " had timed out after " << h->grace << dendl; @@ -86,11 +84,14 @@ bool HeartbeatMap::_check(const heartbeat_handle_d *h, const char *who, time_t n return healthy; } -void HeartbeatMap::reset_timeout(heartbeat_handle_d *h, time_t grace, time_t suicide_grace) +void HeartbeatMap::reset_timeout(heartbeat_handle_d *h, + ceph::coarse_mono_clock::rep grace, + ceph::coarse_mono_clock::rep suicide_grace) { ldout(m_cct, 20) << "reset_timeout '" << h->name << "' grace " << grace << " suicide " << suicide_grace << dendl; - time_t now = time(NULL); + auto now = chrono::duration_cast( + ceph::coarse_mono_clock::now().time_since_epoch()).count(); _check(h, "reset_timeout", now); h->timeout = now + grace; @@ -106,7 +107,8 @@ void HeartbeatMap::reset_timeout(heartbeat_handle_d *h, time_t grace, time_t sui void HeartbeatMap::clear_timeout(heartbeat_handle_d *h) { ldout(m_cct, 20) << "clear_timeout '" << h->name << "'" << dendl; - time_t now = time(NULL); + auto now = chrono::duration_cast( + ceph::coarse_mono_clock::now().time_since_epoch()).count(); _check(h, "clear_timeout", now); h->timeout = 0; h->suicide_timeout = 0; @@ -117,16 +119,18 @@ bool HeartbeatMap::is_healthy() int unhealthy = 0; int total = 0; m_rwlock.get_read(); - time_t now = time(NULL); + auto now = ceph::coarse_mono_clock::now(); if (m_cct->_conf->heartbeat_inject_failure) { ldout(m_cct, 0) << "is_healthy injecting failure for next " << m_cct->_conf->heartbeat_inject_failure << " seconds" << dendl; - m_inject_unhealthy_until = now + m_cct->_conf->heartbeat_inject_failure; + m_inject_unhealthy_until = now + std::chrono::seconds(m_cct->_conf->heartbeat_inject_failure); m_cct->_conf->set_val("heartbeat_inject_failure", "0"); } bool healthy = true; if (now < m_inject_unhealthy_until) { - ldout(m_cct, 0) << "is_healthy = false, injected failure for next " << (m_inject_unhealthy_until - now) << " seconds" << dendl; + auto sec = std::chrono::duration_cast(m_inject_unhealthy_until - now).count(); + ldout(m_cct, 0) << "is_healthy = false, injected failure for next " + << sec << " seconds" << dendl; healthy = false; } @@ -134,7 +138,8 @@ bool HeartbeatMap::is_healthy() p != m_workers.end(); ++p) { heartbeat_handle_d *h = *p; - if (!_check(h, "is_healthy", now)) { + auto epoch = chrono::duration_cast(now.time_since_epoch()).count(); + if (!_check(h, "is_healthy", epoch)) { healthy = false; unhealthy++; } diff --git a/src/common/HeartbeatMap.h b/src/common/HeartbeatMap.h index 4e9b314667f..f7ffd9eb620 100644 --- a/src/common/HeartbeatMap.h +++ b/src/common/HeartbeatMap.h @@ -18,9 +18,9 @@ #include #include #include - #include +#include "common/ceph_time.h" #include "RWLock.h" class CephContext; @@ -41,6 +41,7 @@ namespace ceph { struct heartbeat_handle_d { const std::string name; pthread_t thread_id; + // TODO: use atomic, once we can ditch GCC 4.8 std::atomic timeout = { 0 }, suicide_timeout = { 0 }; time_t grace, suicide_grace; std::list::iterator list_item; @@ -57,7 +58,9 @@ class HeartbeatMap { void remove_worker(const heartbeat_handle_d *h); // reset the timeout so that it expects another touch within grace amount of time - void reset_timeout(heartbeat_handle_d *h, time_t grace, time_t suicide_grace); + void reset_timeout(heartbeat_handle_d *h, + ceph::coarse_mono_clock::rep grace, + ceph::coarse_mono_clock::rep suicide_grace); // clear the timeout so that it's not checked on void clear_timeout(heartbeat_handle_d *h); @@ -79,12 +82,13 @@ class HeartbeatMap { private: CephContext *m_cct; RWLock m_rwlock; - time_t m_inject_unhealthy_until; + ceph::coarse_mono_clock::time_point m_inject_unhealthy_until; std::list m_workers; std::atomic m_unhealthy_workers = { 0 }; std::atomic m_total_workers = { 0 }; - bool _check(const heartbeat_handle_d *h, const char *who, time_t now); + bool _check(const heartbeat_handle_d *h, const char *who, + ceph::coarse_mono_clock::rep now); }; } diff --git a/src/common/WorkQueue.h b/src/common/WorkQueue.h index d3eff47bdf8..fcdd43b35d7 100644 --- a/src/common/WorkQueue.h +++ b/src/common/WorkQueue.h @@ -42,8 +42,8 @@ public: friend class ThreadPool; CephContext *cct; heartbeat_handle_d *hb; - time_t grace; - time_t suicide_grace; + ceph::coarse_mono_clock::rep grace; + ceph::coarse_mono_clock::rep suicide_grace; public: TPHandle( CephContext *cct, -- 2.39.5