From 5625c2126238185859b50ccf502f2b6976d1a51c Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 11 Mar 2009 13:42:19 -0700 Subject: [PATCH] mds: rework mds standby framework --- src/cmds.cc | 13 +++---------- src/mds/MDS.cc | 14 ++++++++------ src/mds/MDS.h | 5 +++-- src/mds/MDSMap.cc | 9 +++++++++ src/mds/MDSMap.h | 13 +++++++++++-- src/messages/MMDSBeacon.h | 20 ++++++++++++++------ src/mon/MDSMonitor.cc | 29 +++++++++++------------------ 7 files changed, 59 insertions(+), 44 deletions(-) diff --git a/src/cmds.cc b/src/cmds.cc index 0920a59777f4a..626fd75eb069d 100644 --- a/src/cmds.cc +++ b/src/cmds.cc @@ -52,15 +52,9 @@ int main(int argc, const char **argv) common_init(args, "mds"); // mds specific args - int whoami = -1; - int shadow = -1; const char *name = 0; for (unsigned i=0; istandby_replay_for = shadow; + MDS *mds = new MDS(name, m, &monmap); mds->init(); rank.wait(); diff --git a/src/mds/MDS.cc b/src/mds/MDS.cc index 7167291bdfc25..9128143af0503 100644 --- a/src/mds/MDS.cc +++ b/src/mds/MDS.cc @@ -71,11 +71,12 @@ // cons/des -MDS::MDS(const char *n, int whoami_, Messenger *m, MonMap *mm) : +MDS::MDS(const char *n, Messenger *m, MonMap *mm) : mds_lock("MDS::mds_lock"), timer(mds_lock), name(n), - whoami(whoami_), incarnation(0), + whoami(-1), incarnation(0), + standby_for_rank(-1), standby_replay_for(-1), messenger(m), monmap(mm), @@ -345,7 +346,6 @@ int MDS::init() // starting beacon. this will induce an MDSMap from the monitor want_state = MDSMap::STATE_BOOT; - want_rank = whoami; beacon_start(); whoami = -1; messenger->reset_myname(entity_name_t::MDS(whoami)); @@ -451,9 +451,11 @@ void MDS::beacon_send() beacon_seq_stamp[beacon_last_seq] = g_clock.now(); - messenger->send_message(new MMDSBeacon(monmap->fsid, name, mdsmap->get_epoch(), - want_state, beacon_last_seq, want_rank), - monmap->get_inst(mon)); + MMDSBeacon *beacon = new MMDSBeacon(monmap->fsid, name, mdsmap->get_epoch(), + want_state, beacon_last_seq); + beacon->set_standby_for_rank(standby_for_rank); + beacon->set_standby_for_name(standby_for_name); + messenger->send_message(beacon, monmap->get_inst(mon)); // schedule next sender if (beacon_sender) timer.cancel_event(beacon_sender); diff --git a/src/mds/MDS.h b/src/mds/MDS.h index 19d90f1762fe5..935edbe2516a0 100644 --- a/src/mds/MDS.h +++ b/src/mds/MDS.h @@ -122,6 +122,8 @@ class MDS : public Dispatcher { int incarnation; int standby_replay_for; + int standby_for_rank; + string standby_for_name; Messenger *messenger; MonMap *monmap; @@ -156,7 +158,6 @@ class MDS : public Dispatcher { // -- MDS state -- int state; // my confirmed state int want_state; // the state i want - int want_rank; // the mds rank i want list waiting_for_active; map > waiting_for_active_peer; @@ -264,7 +265,7 @@ class MDS : public Dispatcher { private: virtual bool dispatch_impl(Message *m); public: - MDS(const char *n, int whoami, Messenger *m, MonMap *mm); + MDS(const char *n, Messenger *m, MonMap *mm); ~MDS(); // who am i etc diff --git a/src/mds/MDSMap.cc b/src/mds/MDSMap.cc index 217a28b6830d8..d8f48df3e36fc 100644 --- a/src/mds/MDSMap.cc +++ b/src/mds/MDSMap.cc @@ -54,6 +54,15 @@ void MDSMap::print(ostream& out) << " seq " << info.state_seq; if (info.laggy()) out << " laggy since " << info.laggy_since; + if (info.standby_for_rank >= 0 || + info.standby_for_rank >= 0) { + out << " (standby for"; + if (info.standby_for_rank >= 0) + out << " rank " << info.standby_for_rank; + if (info.standby_for_name.length()) + out << " '" << info.standby_for_name << "'"; + out << ")"; + } out << "\n"; } diff --git a/src/mds/MDSMap.h b/src/mds/MDSMap.h index 7baf1c0081474..2991f99228b2c 100644 --- a/src/mds/MDSMap.h +++ b/src/mds/MDSMap.h @@ -111,6 +111,8 @@ class MDSMap { version_t state_seq; entity_addr_t addr; utime_t laggy_since; + int standby_for_rank; + string standby_for_name; mds_info_t() : rank(-1), inc(0), state(STATE_STANDBY), state_seq(0) { } @@ -127,6 +129,8 @@ class MDSMap { ::encode(state_seq, bl); ::encode(addr, bl); ::encode(laggy_since, bl); + ::encode(standby_for_rank, bl); + ::encode(standby_for_name, bl); } void decode(bufferlist::iterator& bl) { ::decode(name, bl); @@ -136,6 +140,8 @@ class MDSMap { ::decode(state_seq, bl); ::decode(addr, bl); ::decode(laggy_since, bl); + ::decode(standby_for_rank, bl); + ::decode(standby_for_name, bl); } }; WRITE_CLASS_ENCODER(mds_info_t) @@ -261,11 +267,12 @@ class MDSMap { return p->first; } - bool find_standby_for(int mds, entity_addr_t &a) { + bool find_standby_for(int mds, string& name, entity_addr_t &a) { for (map::const_iterator p = mds_info.begin(); p != mds_info.end(); p++) { - if (p->second.rank == mds && + if ((p->second.standby_for_rank == mds || + p->second.standby_for_name == name) && p->second.state == MDSMap::STATE_STANDBY && !p->second.laggy()) { a = p->second.addr; @@ -276,6 +283,8 @@ class MDSMap { p != mds_info.end(); p++) { if (p->second.rank == -1 && + p->second.standby_for_rank < 0 && + p->second.standby_for_name.length() == 0 && p->second.state == MDSMap::STATE_STANDBY && !p->second.laggy()) { a = p->second.addr; diff --git a/src/messages/MMDSBeacon.h b/src/messages/MMDSBeacon.h index 2228a469257b7..ccc2c1cd04864 100644 --- a/src/messages/MMDSBeacon.h +++ b/src/messages/MMDSBeacon.h @@ -27,13 +27,15 @@ class MMDSBeacon : public Message { epoch_t last_epoch_seen; // include last mdsmap epoch mds has seen to avoid race with monitor decree __u32 state; version_t seq; - __s32 want_rank; + __s32 standby_for_rank; + string standby_for_name; public: MMDSBeacon() : Message(MSG_MDS_BEACON) {} - MMDSBeacon(ceph_fsid_t &f, string& n, epoch_t les, int st, version_t se, int wr) : + MMDSBeacon(ceph_fsid_t &f, string& n, epoch_t les, int st, version_t se) : Message(MSG_MDS_BEACON), - fsid(f), name(n), last_epoch_seen(les), state(st), seq(se), want_rank(wr) { } + fsid(f), name(n), last_epoch_seen(les), state(st), seq(se), + standby_for_rank(-1) { } ceph_fsid_t& get_fsid() { return fsid; } string& get_name() { return name; } @@ -41,7 +43,11 @@ class MMDSBeacon : public Message { int get_state() { return state; } version_t get_seq() { return seq; } const char *get_type_name() { return "mdsbeacon"; } - int get_want_rank() { return want_rank; } + int get_standby_for_rank() { return standby_for_rank; } + const string& get_standby_for_name() { return standby_for_name; } + + void set_standby_for_rank(int r) { standby_for_rank = r; } + void set_standby_for_name(string& n) { standby_for_name = n; } void print(ostream& out) { out << "mdsbeacon(" << name << " " << MDSMap::get_state_name(state) @@ -54,7 +60,8 @@ class MMDSBeacon : public Message { ::encode(state, payload); ::encode(seq, payload); ::encode(name, payload); - ::encode(want_rank, payload); + ::encode(standby_for_rank, payload); + ::encode(standby_for_name, payload); } void decode_payload() { bufferlist::iterator p = payload.begin(); @@ -63,7 +70,8 @@ class MMDSBeacon : public Message { ::decode(state, p); ::decode(seq, p); ::decode(name, p); - ::decode(want_rank, p); + ::decode(standby_for_rank, p); + ::decode(standby_for_name, p); } }; diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc index b9b35c1ad3b13..212df5440682e 100644 --- a/src/mon/MDSMonitor.cc +++ b/src/mon/MDSMonitor.cc @@ -233,7 +233,7 @@ bool MDSMonitor::preprocess_beacon(MMDSBeacon *m) dout(15) << "mds_beacon " << *m << " noting time and replying" << dendl; last_beacon[addr] = g_clock.now(); mon->messenger->send_message(new MMDSBeacon(mon->monmap->fsid, m->get_name(), - mdsmap.get_epoch(), state, seq, 0), + mdsmap.get_epoch(), state, seq), m->get_orig_source_inst()); // done @@ -275,23 +275,14 @@ bool MDSMonitor::prepare_beacon(MMDSBeacon *m) // boot? if (state == MDSMap::STATE_BOOT) { - int from = m->get_orig_source_inst().name.num(); - - // standby for a given rank? - int standby_for = m->get_want_rank(); - if (standby_for >= (int)pending_mdsmap.max_mds) { - dout(10) << "prepare_beacon boot: wanted standby for mds" << from - << " >= max_mds " << pending_mdsmap.max_mds - << ", will be shared standby" << dendl; - standby_for = -1; - } - // add MDSMap::mds_info_t& info = pending_mdsmap.mds_info[addr]; info.name = m->get_name(); - info.rank = standby_for; + info.rank = -1; info.addr = addr; info.state = MDSMap::STATE_STANDBY; + info.standby_for_rank = m->get_standby_for_rank(); + info.standby_for_name = m->get_standby_for_name(); // initialize the beacon timer last_beacon[addr] = g_clock.now(); @@ -506,10 +497,11 @@ void MDSMonitor::tick() while (pending_mdsmap.get_num_mds() < pending_mdsmap.get_max_mds() && !pending_mdsmap.is_degraded()) { int mds = 0; + string name; while (pending_mdsmap.is_in(mds)) mds++; entity_addr_t addr; - if (!pending_mdsmap.find_standby_for(mds, addr)) + if (!pending_mdsmap.find_standby_for(mds, name, addr)) break; dout(1) << "adding standby " << addr << " as mds" << mds << dendl; @@ -562,11 +554,11 @@ void MDSMonitor::tick() entity_addr_t sa; if (info.rank >= 0 && info.state > 0 && //|| info.state == MDSMap::STATE_STANDBY_REPLAY) && - pending_mdsmap.find_standby_for(info.rank, sa)) { + pending_mdsmap.find_standby_for(info.rank, info.name, sa)) { + MDSMap::mds_info_t& si = pending_mdsmap.mds_info[sa]; dout(10) << " replacing " << addr << " mds" << info.rank << "." << info.inc << " " << MDSMap::get_state_name(info.state) - << " with " << sa << dendl; - MDSMap::mds_info_t& si = pending_mdsmap.mds_info[sa]; + << " with " << si.name << " " << sa << dendl; switch (info.state) { case MDSMap::STATE_CREATING: case MDSMap::STATE_STARTING: @@ -633,7 +625,8 @@ void MDSMonitor::tick() while (p != failed.end()) { int f = *p++; entity_addr_t sa; - if (pending_mdsmap.find_standby_for(f, sa)) { + string name; // FIXME + if (pending_mdsmap.find_standby_for(f, name, sa)) { dout(0) << " taking over failed mds" << f << " with " << sa << dendl; MDSMap::mds_info_t& si = pending_mdsmap.mds_info[sa]; si.state = MDSMap::STATE_REPLAY; -- 2.39.5