The monitor will need this to dynamically adjust the heartbeat grace.
Closes: #3044
Signed-off-by: Sage Weil <sage@inktank.com>
class MOSDFailure : public PaxosServiceMessage {
- static const int HEAD_VERSION = 2;
+ static const int HEAD_VERSION = 3;
public:
uuid_d fsid;
entity_inst_t target_osd;
__u8 is_failed;
epoch_t epoch;
+ int32_t failed_for; // known to be failed since at least this long
MOSDFailure() : PaxosServiceMessage(MSG_OSD_FAILURE, 0, HEAD_VERSION) { }
- MOSDFailure(const uuid_d &fs, entity_inst_t f, epoch_t e)
+ MOSDFailure(const uuid_d &fs, entity_inst_t f, int duration, epoch_t e)
: PaxosServiceMessage(MSG_OSD_FAILURE, e, HEAD_VERSION),
- fsid(fs), target_osd(f), is_failed(true), epoch(e) { }
+ fsid(fs), target_osd(f), is_failed(true), epoch(e), failed_for(duration) { }
private:
~MOSDFailure() {}
::decode(fsid, p);
::decode(target_osd, p);
::decode(epoch, p);
- if (header.version >=2)
+ if (header.version >= 2)
::decode(is_failed, p);
else
is_failed = true;
+ if (header.version >= 3)
+ ::decode(failed_for, p);
+ else
+ failed_for = 0;
}
void encode_payload(uint64_t features) {
paxos_encode();
::encode(target_osd, payload);
::encode(epoch, payload);
::encode(is_failed, payload);
+ ::encode(failed_for, payload);
}
const char *get_type_name() const { return "osd_failure"; }
void print(ostream& out) const {
- out << "osd_failure(" << target_osd << " e" << epoch << " v" << version << ")";
+ out << "osd_failure(" << target_osd << " for " << failed_for << " e" << epoch << " v" << version << ")";
}
};
derr << "heartbeat_check: no reply from osd." << p->first
<< " ever, first ping sent " << p->second.first_tx
<< " (cutoff " << cutoff << ")" << dendl;
+
+ // fail
+ failure_queue[p->first] = p->second.last_tx;
} else {
if (p->second.last_rx > cutoff)
continue; // got recent reply
derr << "heartbeat_check: no reply from osd." << p->first
<< " since " << p->second.last_rx
<< " (cutoff " << cutoff << ")" << dendl;
- }
- // fail!
- queue_failure(p->first);
+ // fail
+ failure_queue[p->first] = p->second.last_rx;
+ }
}
}
heartbeat_lock.Lock();
locked = true;
}
+ utime_t now = ceph_clock_now(g_ceph_context);
while (!failure_queue.empty()) {
- int osd = *failure_queue.begin();
+ int osd = failure_queue.begin()->first;
+ int failed_for = (int)(double)(now - failure_queue.begin()->second);
entity_inst_t i = osdmap->get_inst(osd);
- monc->send_mon_message(new MOSDFailure(monc->get_fsid(), i, osdmap->get_epoch()));
+ monc->send_mon_message(new MOSDFailure(monc->get_fsid(), i, failed_for, osdmap->get_epoch()));
failure_pending[osd] = i;
failure_queue.erase(osd);
}
void OSD::send_still_alive(epoch_t epoch, entity_inst_t i)
{
- MOSDFailure *m = new MOSDFailure(monc->get_fsid(), i, epoch);
+ MOSDFailure *m = new MOSDFailure(monc->get_fsid(), i, 0, epoch);
m->is_failed = false;
monc->send_mon_message(m);
}
void send_alive();
// -- failures --
- set<int> failure_queue;
+ map<int,utime_t> failure_queue;
map<int,entity_inst_t> failure_pending;
- void queue_failure(int n) {
- failure_queue.insert(n);
- }
void send_failures();
void send_still_alive(epoch_t epoch, entity_inst_t i);