From fca6817d7ed1534157d04e2100313e1c2e222e21 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Piotr=20Da=C5=82ek?= Date: Thu, 5 May 2016 21:48:31 +0200 Subject: [PATCH] messages/MOSDFailure.h: distinguish between timeout and immediate failure MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Change "is_failed" field to "flags" and use it to distinguish between timeout and immediate, known OSD failure. Then use that in OSD and MON, and make sure "min_reporters" don't affect known failures by actually going around failure heuristic code. Signed-off-by: Piotr Dałek --- src/messages/MOSDFailure.h | 34 +++++++++++++++++++++++++++------- src/mon/OSDMonitor.cc | 25 ++++++++++++++++++++++++- src/mon/OSDMonitor.h | 1 + src/osd/OSD.cc | 7 +++---- src/osd/OSDMap.cc | 9 +++++++++ src/osd/OSDMap.h | 1 + 6 files changed, 65 insertions(+), 12 deletions(-) diff --git a/src/messages/MOSDFailure.h b/src/messages/MOSDFailure.h index d1d9d9cc6a27a..2b51a620a6f7b 100644 --- a/src/messages/MOSDFailure.h +++ b/src/messages/MOSDFailure.h @@ -24,22 +24,41 @@ class MOSDFailure : public PaxosServiceMessage { static const int HEAD_VERSION = 3; public: + enum { + FLAG_ALIVE = 0, // use this on its own to mark as "I'm still alive" + FLAG_FAILED = 1, // if set, failure; if not, recovery + FLAG_IMMEDIATE = 2, // known failure, not a timeout + }; + uuid_d fsid; entity_inst_t target_osd; - __u8 is_failed; + __u8 flags; epoch_t epoch; int32_t failed_for; // known to be failed since at least this long MOSDFailure() : PaxosServiceMessage(MSG_OSD_FAILURE, 0, HEAD_VERSION) { } MOSDFailure(const uuid_d &fs, const entity_inst_t& f, int duration, epoch_t e) : PaxosServiceMessage(MSG_OSD_FAILURE, e, HEAD_VERSION), - fsid(fs), target_osd(f), is_failed(true), epoch(e), failed_for(duration) { } + fsid(fs), target_osd(f), + flags(FLAG_FAILED), + epoch(e), failed_for(duration) { } + MOSDFailure(const uuid_d &fs, const entity_inst_t& f, int duration, + epoch_t e, __u8 extra_flags) + : PaxosServiceMessage(MSG_OSD_FAILURE, e, HEAD_VERSION), + fsid(fs), target_osd(f), + flags(extra_flags), + epoch(e), failed_for(duration) { } private: ~MOSDFailure() {} public: entity_inst_t get_target() { return target_osd; } - bool if_osd_failed() { return is_failed; } + bool if_osd_failed() const { + return flags & FLAG_FAILED; + } + bool is_immediate() const { + return flags & FLAG_IMMEDIATE; + } epoch_t get_epoch() { return epoch; } void decode_payload() { @@ -49,9 +68,9 @@ public: ::decode(target_osd, p); ::decode(epoch, p); if (header.version >= 2) - ::decode(is_failed, p); + ::decode(flags, p); else - is_failed = true; + flags = FLAG_FAILED; if (header.version >= 3) ::decode(failed_for, p); else @@ -63,14 +82,15 @@ public: ::encode(fsid, payload); ::encode(target_osd, payload, features); ::encode(epoch, payload); - ::encode(is_failed, payload); + ::encode(flags, payload); ::encode(failed_for, payload); } const char *get_type_name() const { return "osd_failure"; } void print(ostream& out) const { out << "osd_failure(" - << (is_failed ? "failed " : "recovered ") + << (if_osd_failed() ? "failed " : "recovered ") + << (is_immediate() ? "immediate " : "timeout ") << target_osd << " for " << failed_for << "sec e" << epoch << " v" << version << ")"; } diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index bfdaf1ef68ec2..a19544a9e3f4f 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -1820,6 +1820,22 @@ bool OSDMonitor::check_failure(utime_t now, int target_osd, failure_info_t& fi) return false; } +void OSDMonitor::force_failure(utime_t now, int target_osd) +{ + // already pending failure? + if (pending_inc.new_state.count(target_osd) && + pending_inc.new_state[target_osd] & CEPH_OSD_UP) { + dout(10) << " already pending failure" << dendl; + return; + } + + dout(1) << " we're forcing failure of osd." << target_osd << dendl; + pending_inc.new_state[target_osd] = CEPH_OSD_UP; + + mon->clog->info() << osdmap.get_inst(target_osd) << " failed (forced)\n"; + return; +} + bool OSDMonitor::prepare_failure(MonOpRequestRef op) { op->mark_osdmon_event(__func__); @@ -1841,8 +1857,15 @@ bool OSDMonitor::prepare_failure(MonOpRequestRef op) if (m->if_osd_failed()) { // add a report + if (m->is_immediate()) { + mon->clog->debug() << m->get_target() << " reported immediately failed by " + << m->get_orig_source_inst() << "\n"; + force_failure(now, target_osd); + return true; + } mon->clog->debug() << m->get_target() << " reported failed by " - << m->get_orig_source_inst() << "\n"; + << m->get_orig_source_inst() << "\n"; + failure_info_t& fi = failure_info[target_osd]; MonOpRequestRef old_op = fi.add_report(reporter, failed_since, op); if (old_op) { diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h index 7a066699eca21..c1d6031755359 100644 --- a/src/mon/OSDMonitor.h +++ b/src/mon/OSDMonitor.h @@ -128,6 +128,7 @@ private: bool check_failures(utime_t now); bool check_failure(utime_t now, int target_osd, failure_info_t& fi); + void force_failure(utime_t now, int target_osd); // map thrashing int thrash_map; diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 121e5abd0969a..df010420b3d0f 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -4790,8 +4790,8 @@ bool OSD::ms_handle_refused(Connection *con) if (monc && (type == CEPH_ENTITY_TYPE_OSD)) { OSDMapRef osdmap = get_osdmap(); if (osdmap) { - int id = osdmap->identify_osd(con->get_peer_addr()); - if (osdmap->is_up(id)) { + int id = osdmap->identify_osd_on_all_channels(con->get_peer_addr()); + if (id >= 0 && osdmap->is_up(id)) { // I'm cheating mon heartbeat grace logic, because we know it's not going // to respawn alone. +1 so we won't hit any boundary case. monc->send_mon_message(new MOSDFailure(monc->get_fsid(), @@ -5116,8 +5116,7 @@ void OSD::send_failures() void OSD::send_still_alive(epoch_t epoch, const entity_inst_t &i) { - MOSDFailure *m = new MOSDFailure(monc->get_fsid(), i, 0, epoch); - m->is_failed = false; + MOSDFailure *m = new MOSDFailure(monc->get_fsid(), i, 0, epoch, MOSDFailure::FLAG_ALIVE); monc->send_mon_message(m); } diff --git a/src/osd/OSDMap.cc b/src/osd/OSDMap.cc index 03aac656cb57c..095564e418159 100644 --- a/src/osd/OSDMap.cc +++ b/src/osd/OSDMap.cc @@ -1007,6 +1007,15 @@ int OSDMap::identify_osd(const uuid_d& u) const return -1; } +int OSDMap::identify_osd_on_all_channels(const entity_addr_t& addr) const +{ + for (int i=0; i= 0; -- 2.39.5