From 9f62ad1c96f79a20627ba137598c85bc05368824 Mon Sep 17 00:00:00 2001 From: sageweil Date: Mon, 19 Nov 2007 22:24:41 +0000 Subject: [PATCH] report failure if we do not receive timely heartbeats from replicas git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@2089 29311d96-e01e-0410-9327-a35deaab8ce9 --- trunk/ceph/config.cc | 1 + trunk/ceph/config.h | 3 +- trunk/ceph/osd/OSD.cc | 80 +++++++++++++++++++++++++++++------------ trunk/ceph/osd/OSD.h | 6 +++- trunk/ceph/osd/OSDMap.h | 10 ++++++ trunk/ceph/osd/PG.h | 1 - 6 files changed, 75 insertions(+), 26 deletions(-) diff --git a/trunk/ceph/config.cc b/trunk/ceph/config.cc index 37006d6f0111a..4bf0b9192972d 100644 --- a/trunk/ceph/config.cc +++ b/trunk/ceph/config.cc @@ -304,6 +304,7 @@ md_config_t g_conf = { osd_age: .8, osd_age_time: 0, osd_heartbeat_interval: 1, + osd_heartbeat_grace: 30, osd_pg_stats_interval: 5, osd_replay_window: 5, osd_max_pull: 2, diff --git a/trunk/ceph/config.h b/trunk/ceph/config.h index ef286a9c86052..3b3008e63bfe7 100644 --- a/trunk/ceph/config.h +++ b/trunk/ceph/config.h @@ -268,7 +268,8 @@ struct md_config_t { bool osd_mkfs; float osd_age; int osd_age_time; - int osd_heartbeat_interval; + int osd_heartbeat_interval; + int osd_heartbeat_grace; int osd_pg_stats_interval; int osd_replay_window; int osd_max_pull; diff --git a/trunk/ceph/osd/OSD.cc b/trunk/ceph/osd/OSD.cc index d43da3780d1b6..77451c086926d 100644 --- a/trunk/ceph/osd/OSD.cc +++ b/trunk/ceph/osd/OSD.cc @@ -288,7 +288,8 @@ int OSD::init() osd_logtype.add_inc("rlnum"); osd_logtype.add_set("numpg"); - osd_logtype.add_set("pingset"); + osd_logtype.add_set("hbto"); + osd_logtype.add_set("hbfrom"); osd_logtype.add_set("buf"); @@ -708,11 +709,36 @@ void OSD::take_peer_stat(int peer, const osd_peer_stat_t& stat) peer_stat[peer] = stat; } +void OSD::update_heartbeat_sets() +{ + // build heartbeat to/from set + heartbeat_to.clear(); + heartbeat_from.clear(); + for (hash_map::iterator i = pg_map.begin(); + i != pg_map.end(); + i++) { + PG *pg = i->second; + + // replicas ping primary. + if (pg->get_role() > 0) { + assert(pg->acting.size() > 1); + heartbeat_to.insert(pg->acting[0]); + } + else if (pg->get_role() == 0) { + assert(pg->acting[0] == whoami); + for (unsigned i=1; iacting.size(); i++) { + assert(pg->acting[i] != whoami); + heartbeat_from.insert(pg->acting[i]); + } + } + } + dout(10) << "hb to: " << heartbeat_to << dendl; + dout(10) << "hb from: " << heartbeat_from << dendl; +} + void OSD::heartbeat() { utime_t now = g_clock.now(); - utime_t since = now; - since.sec_ref() -= g_conf.osd_heartbeat_interval; // get CPU load avg ifstream in("/proc/loadavg"); @@ -726,30 +752,15 @@ void OSD::heartbeat() // calc my stats Mutex::Locker lock(peer_stat_lock); _refresh_my_stat(now); + my_stat_on_peer.clear(); dout(5) << "heartbeat: " << my_stat << dendl; //load_calc.set_size(stat_ops); - // send pings - set pingset; - for (hash_map::iterator i = pg_map.begin(); - i != pg_map.end(); - i++) { - PG *pg = i->second; - - // we want to ping the primary. - if (pg->get_role() <= 0) continue; - if (pg->acting.size() < 1) continue; - - if (pg->last_heartbeat < since) { - pg->last_heartbeat = now; - pingset.insert(pg->acting[0]); - } - } - my_stat_on_peer.clear(); - for (set::iterator i = pingset.begin(); - i != pingset.end(); + // send heartbeats + for (set::iterator i = heartbeat_to.begin(); + i != heartbeat_to.end(); i++) { _share_map_outgoing( osdmap->get_inst(*i) ); my_stat_on_peer[*i] = my_stat; @@ -757,7 +768,27 @@ void OSD::heartbeat() osdmap->get_inst(*i)); } - if (logger) logger->set("pingset", pingset.size()); + // check for incoming heartbeats (move me elsewhere?) + utime_t grace = now; + grace -= g_conf.osd_heartbeat_grace; + for (set::iterator p = heartbeat_from.begin(); + p != heartbeat_from.end(); + p++) { + if (heartbeat_from_stamp.count(*p)) { + if (heartbeat_from_stamp[*p] < grace) { + dout(0) << "no heartbeat from osd" << *p << " since " << heartbeat_from_stamp[*p] + << " (cutoff " << grace << ")" << dendl; + int mon = monmap->pick_mon(); + messenger->send_message(new MOSDFailure(messenger->get_myinst(), osdmap->get_inst(*p), osdmap->get_epoch()), + monmap->get_inst(mon)); + } + } else + heartbeat_from_stamp[*p] = now; // fake initial + } + + + if (logger) logger->set("hbto", heartbeat_to.size()); + if (logger) logger->set("hbfrom", heartbeat_from.size()); // hack: fake reorg? if (osdmap && g_conf.fake_osdmap_updates) { @@ -1064,6 +1095,7 @@ void OSD::handle_osd_ping(MOSDPing *m) int from = m->get_source().num(); take_peer_stat(from, m->peer_stat); + heartbeat_from_stamp[from] = m->get_recv_stamp(); delete m; } @@ -1510,6 +1542,8 @@ void OSD::activate_map(ObjectStore::Transaction& t) do_activators(activator_map); logger->set("numpg", pg_map.size()); + + update_heartbeat_sets(); } diff --git a/trunk/ceph/osd/OSD.h b/trunk/ceph/osd/OSD.h index be6348eceb126..f49f22e1aef95 100644 --- a/trunk/ceph/osd/OSD.h +++ b/trunk/ceph/osd/OSD.h @@ -92,7 +92,11 @@ public: private: - // heartbeat + // -- heartbeat -- + set heartbeat_to, heartbeat_from; + map heartbeat_from_stamp; + + void update_heartbeat_sets(); void heartbeat(); class C_Heartbeat : public Context { diff --git a/trunk/ceph/osd/OSDMap.h b/trunk/ceph/osd/OSDMap.h index 136d449f39d0f..b40ecdf291497 100644 --- a/trunk/ceph/osd/OSDMap.h +++ b/trunk/ceph/osd/OSDMap.h @@ -211,6 +211,16 @@ private: return false; } + int get_any_up_osd() { + for (set::iterator p = osds.begin(); + p != osds.end(); + p++) { + if (is_up(*p)) + return *p; + } + return -1; + } + void mark_down(int o, bool clean) { down_osds[o] = clean; } void mark_up(int o) { down_osds.erase(o); } void mark_out(int o) { diff --git a/trunk/ceph/osd/PG.h b/trunk/ceph/osd/PG.h index 0e14ea3a2ed63..4fac4a468832e 100644 --- a/trunk/ceph/osd/PG.h +++ b/trunk/ceph/osd/PG.h @@ -451,7 +451,6 @@ public: IndexedLog log; OndiskLog ondisklog; Missing missing; - utime_t last_heartbeat; // protected: int role; // 0 = primary, 1 = replica, -1=none. -- 2.39.5