static const int HEAD_VERSION = 3;
public:
+ enum {
+ FLAG_ALIVE = 0, // use this on its own to mark as "I'm still alive"
+ FLAG_FAILED = 1, // if set, failure; if not, recovery
+ FLAG_IMMEDIATE = 2, // known failure, not a timeout
+ };
+
uuid_d fsid;
entity_inst_t target_osd;
- __u8 is_failed;
+ __u8 flags;
epoch_t epoch;
int32_t failed_for; // known to be failed since at least this long
MOSDFailure() : PaxosServiceMessage(MSG_OSD_FAILURE, 0, HEAD_VERSION) { }
MOSDFailure(const uuid_d &fs, const entity_inst_t& f, int duration, epoch_t e)
: PaxosServiceMessage(MSG_OSD_FAILURE, e, HEAD_VERSION),
- fsid(fs), target_osd(f), is_failed(true), epoch(e), failed_for(duration) { }
+ fsid(fs), target_osd(f),
+ flags(FLAG_FAILED),
+ epoch(e), failed_for(duration) { }
+ MOSDFailure(const uuid_d &fs, const entity_inst_t& f, int duration,
+ epoch_t e, __u8 extra_flags)
+ : PaxosServiceMessage(MSG_OSD_FAILURE, e, HEAD_VERSION),
+ fsid(fs), target_osd(f),
+ flags(extra_flags),
+ epoch(e), failed_for(duration) { }
private:
~MOSDFailure() {}
public:
entity_inst_t get_target() { return target_osd; }
- bool if_osd_failed() { return is_failed; }
+ bool if_osd_failed() const {
+ return flags & FLAG_FAILED;
+ }
+ bool is_immediate() const {
+ return flags & FLAG_IMMEDIATE;
+ }
epoch_t get_epoch() { return epoch; }
void decode_payload() {
::decode(target_osd, p);
::decode(epoch, p);
if (header.version >= 2)
- ::decode(is_failed, p);
+ ::decode(flags, p);
else
- is_failed = true;
+ flags = FLAG_FAILED;
if (header.version >= 3)
::decode(failed_for, p);
else
::encode(fsid, payload);
::encode(target_osd, payload, features);
::encode(epoch, payload);
- ::encode(is_failed, payload);
+ ::encode(flags, payload);
::encode(failed_for, payload);
}
const char *get_type_name() const { return "osd_failure"; }
void print(ostream& out) const {
out << "osd_failure("
- << (is_failed ? "failed " : "recovered ")
+ << (if_osd_failed() ? "failed " : "recovered ")
+ << (is_immediate() ? "immediate " : "timeout ")
<< target_osd << " for " << failed_for << "sec e" << epoch
<< " v" << version << ")";
}
return false;
}
+void OSDMonitor::force_failure(utime_t now, int target_osd)
+{
+ // already pending failure?
+ if (pending_inc.new_state.count(target_osd) &&
+ pending_inc.new_state[target_osd] & CEPH_OSD_UP) {
+ dout(10) << " already pending failure" << dendl;
+ return;
+ }
+
+ dout(1) << " we're forcing failure of osd." << target_osd << dendl;
+ pending_inc.new_state[target_osd] = CEPH_OSD_UP;
+
+ mon->clog->info() << osdmap.get_inst(target_osd) << " failed (forced)\n";
+ return;
+}
+
bool OSDMonitor::prepare_failure(MonOpRequestRef op)
{
op->mark_osdmon_event(__func__);
if (m->if_osd_failed()) {
// add a report
+ if (m->is_immediate()) {
+ mon->clog->debug() << m->get_target() << " reported immediately failed by "
+ << m->get_orig_source_inst() << "\n";
+ force_failure(now, target_osd);
+ return true;
+ }
mon->clog->debug() << m->get_target() << " reported failed by "
- << m->get_orig_source_inst() << "\n";
+ << m->get_orig_source_inst() << "\n";
+
failure_info_t& fi = failure_info[target_osd];
MonOpRequestRef old_op = fi.add_report(reporter, failed_since, op);
if (old_op) {
bool check_failures(utime_t now);
bool check_failure(utime_t now, int target_osd, failure_info_t& fi);
+ void force_failure(utime_t now, int target_osd);
// map thrashing
int thrash_map;
if (monc && (type == CEPH_ENTITY_TYPE_OSD)) {
OSDMapRef osdmap = get_osdmap();
if (osdmap) {
- int id = osdmap->identify_osd(con->get_peer_addr());
- if (osdmap->is_up(id)) {
+ int id = osdmap->identify_osd_on_all_channels(con->get_peer_addr());
+ if (id >= 0 && osdmap->is_up(id)) {
// I'm cheating mon heartbeat grace logic, because we know it's not going
// to respawn alone. +1 so we won't hit any boundary case.
monc->send_mon_message(new MOSDFailure(monc->get_fsid(),
void OSD::send_still_alive(epoch_t epoch, const entity_inst_t &i)
{
- MOSDFailure *m = new MOSDFailure(monc->get_fsid(), i, 0, epoch);
- m->is_failed = false;
+ MOSDFailure *m = new MOSDFailure(monc->get_fsid(), i, 0, epoch, MOSDFailure::FLAG_ALIVE);
monc->send_mon_message(m);
}
return -1;
}
+int OSDMap::identify_osd_on_all_channels(const entity_addr_t& addr) const
+{
+ for (int i=0; i<max_osd; i++)
+ if (exists(i) && (get_addr(i) == addr || get_cluster_addr(i) == addr ||
+ get_hb_back_addr(i) == addr || get_hb_front_addr(i) == addr))
+ return i;
+ return -1;
+}
+
int OSDMap::find_osd_on_ip(const entity_addr_t& ip) const
{
for (int i=0; i<max_osd; i++)
int identify_osd(const entity_addr_t& addr) const;
int identify_osd(const uuid_d& u) const;
+ int identify_osd_on_all_channels(const entity_addr_t& addr) const;
bool have_addr(const entity_addr_t& addr) const {
return identify_osd(addr) >= 0;