From 23eaa6d28c3eaa5ba5c3a5f00b87a3d7694d374f Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 19 Nov 2009 15:30:27 -0800 Subject: [PATCH] mds: use global_id as key in MDSMap This makes the data structures less stupid (using an entity_addr_t as a key is silly). This is a protocol and disk format change. --- src/include/ceph_fs.h | 2 +- src/mds/MDBalancer.cc | 2 +- src/mds/MDS.cc | 6 +- src/mds/MDSMap.cc | 13 ++-- src/mds/MDSMap.h | 62 ++++++++------- src/mds/mdstypes.h | 2 +- src/messages/MMDSBeacon.h | 10 ++- src/messages/MMDSLoadTargets.h | 9 ++- src/mon/MDSMonitor.cc | 133 +++++++++++++++++---------------- src/mon/MDSMonitor.h | 2 +- 10 files changed, 129 insertions(+), 112 deletions(-) diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h index 1e96a9a87d8d4..4e5f49c738d83 100644 --- a/src/include/ceph_fs.h +++ b/src/include/ceph_fs.h @@ -39,7 +39,7 @@ #define CEPH_MDS_PROTOCOL 9 /* cluster internal */ #define CEPH_MON_PROTOCOL 5 /* cluster internal */ #define CEPH_OSDC_PROTOCOL 22 /* server/client */ -#define CEPH_MDSC_PROTOCOL 29 /* server/client */ +#define CEPH_MDSC_PROTOCOL 30 /* server/client */ #define CEPH_MONC_PROTOCOL 15 /* server/client */ diff --git a/src/mds/MDBalancer.cc b/src/mds/MDBalancer.cc index c545314e16e93..f7b41a4867e88 100644 --- a/src/mds/MDBalancer.cc +++ b/src/mds/MDBalancer.cc @@ -689,7 +689,7 @@ inline void MDBalancer::send_targets_message() i != my_targets.end(); ++i) targets.insert(i->first); - MMDSLoadTargets* m = new MMDSLoadTargets(targets); + MMDSLoadTargets* m = new MMDSLoadTargets(mds->monc->get_global_id(), targets); mds->monc->send_mon_message(m); } diff --git a/src/mds/MDS.cc b/src/mds/MDS.cc index f2d83c1fa0282..cd6c3ffb48b8c 100644 --- a/src/mds/MDS.cc +++ b/src/mds/MDS.cc @@ -496,7 +496,7 @@ void MDS::beacon_send() beacon_seq_stamp[beacon_last_seq] = g_clock.now(); - MMDSBeacon *beacon = new MMDSBeacon(monc->get_fsid(), name, mdsmap->get_epoch(), + MMDSBeacon *beacon = new MMDSBeacon(monc->get_fsid(), monc->get_global_id(), name, mdsmap->get_epoch(), want_state, beacon_last_seq); beacon->set_standby_for_rank(standby_for_rank); beacon->set_standby_for_name(standby_for_name); @@ -606,8 +606,8 @@ void MDS::handle_mds_map(MMDSMap *m) // see who i am addr = messenger->get_myaddr(); - whoami = mdsmap->get_rank(addr); - state = mdsmap->get_state(addr); + whoami = mdsmap->get_rank_gid(monc->get_global_id()); + state = mdsmap->get_state_gid(monc->get_global_id()); dout(10) << "map says i am " << addr << " mds" << whoami << " state " << ceph_mds_state_name(state) << dendl; if (state != oldstate) diff --git a/src/mds/MDSMap.cc b/src/mds/MDSMap.cc index d83222a82eee1..bf8d5f7a7e70e 100644 --- a/src/mds/MDSMap.cc +++ b/src/mds/MDSMap.cc @@ -38,18 +38,19 @@ void MDSMap::print(ostream& out) << "failed <" << failed << ">\n" << "stopped <" << stopped << ">\n"; - multimap< pair, entity_addr_t > foo; - for (map::iterator p = mds_info.begin(); + multimap< pair, __u64 > foo; + for (map<__u64,mds_info_t>::iterator p = mds_info.begin(); p != mds_info.end(); p++) - foo.insert(pair,entity_addr_t>(pair(p->second.rank, p->second.inc-1), p->first)); + foo.insert(pair,__u64>(pair(p->second.rank, p->second.inc-1), p->first)); - for (multimap< pair, entity_addr_t >::iterator p = foo.begin(); + for (multimap< pair, __u64 >::iterator p = foo.begin(); p != foo.end(); p++) { mds_info_t& info = mds_info[p->second]; - out << info.addr + out << p->second << ": " + << info.addr << " '" << info.name << "'" << " mds" << info.rank << "." << info.inc @@ -78,7 +79,7 @@ void MDSMap::print(ostream& out) void MDSMap::print_summary(ostream& out) { map by_state; - for (map::iterator p = mds_info.begin(); + for (map<__u64,mds_info_t>::iterator p = mds_info.begin(); p != mds_info.end(); p++) { string s = ceph_mds_state_name(p->second.state); diff --git a/src/mds/MDSMap.h b/src/mds/MDSMap.h index 11e55972dea72..533ec4f65d17b 100644 --- a/src/mds/MDSMap.h +++ b/src/mds/MDSMap.h @@ -77,6 +77,7 @@ public: static const int STATE_STOPPING = CEPH_MDS_STATE_STOPPING; // up, exporting metadata (-> standby or out) struct mds_info_t { + __u64 global_id; string name; int32_t rank; int32_t inc; @@ -88,7 +89,7 @@ public: string standby_for_name; set export_targets; - mds_info_t() : rank(-1), inc(0), state(STATE_STANDBY), state_seq(0) { } + mds_info_t() : global_id(0), rank(-1), inc(0), state(STATE_STANDBY), state_seq(0) { } bool laggy() const { return !(laggy_since == utime_t()); } void clear_laggy() { laggy_since = utime_t(); } @@ -96,8 +97,9 @@ public: entity_inst_t get_inst() const { return entity_inst_t(entity_name_t::MDS(rank), addr); } void encode(bufferlist& bl) const { - __u8 v = 2; + __u8 v = 3; ::encode(v, bl); + ::encode(global_id, bl); ::encode(name, bl); ::encode(rank, bl); ::encode(inc, bl); @@ -112,6 +114,7 @@ public: void decode(bufferlist::iterator& bl) { __u8 v; ::decode(v, bl); + ::decode(global_id, bl); ::decode(name, bl); ::decode(rank, bl); ::decode(inc, bl); @@ -159,8 +162,8 @@ protected: set in; // currently defined cluster map inc; // most recent incarnation. set failed, stopped; // which roles are failed or stopped - map up; // who is in those roles - map mds_info; + map up; // who is in those roles + map<__u64,mds_info_t> mds_info; friend class MDSMonitor; @@ -199,10 +202,10 @@ public: __u32 get_cas_pg_pool() const { return cas_pg_pool; } __u32 get_metadata_pg_pool() const { return metadata_pg_pool; } - const map& get_mds_info() { return mds_info; } - const mds_info_t& get_mds_info(entity_addr_t a) { - assert(mds_info.count(a)); - return mds_info[a]; + const map<__u64,mds_info_t>& get_mds_info() { return mds_info; } + const mds_info_t& get_mds_info_gid(__u64 gid) { + assert(mds_info.count(gid)); + return mds_info[gid]; } const mds_info_t& get_mds_info(int m) { assert(up.count(m) && mds_info.count(up[m])); @@ -215,7 +218,7 @@ public: } unsigned get_num_mds(int state) { unsigned n = 0; - for (map::const_iterator p = mds_info.begin(); + for (map<__u64,mds_info_t>::const_iterator p = mds_info.begin(); p != mds_info.end(); p++) if (p->second.state == state) ++n; @@ -229,7 +232,7 @@ public: s = in; } void get_up_mds_set(set& s) { - for (map::const_iterator p = up.begin(); + for (map::const_iterator p = up.begin(); p != up.end(); p++) s.insert(p->first); @@ -249,14 +252,14 @@ public: } void get_recovery_mds_set(set& s) { s = failed; - for (map::const_iterator p = mds_info.begin(); + for (map<__u64,mds_info_t>::const_iterator p = mds_info.begin(); p != mds_info.end(); p++) if (p->second.state >= STATE_REPLAY && p->second.state <= STATE_STOPPING) s.insert(p->second.rank); } void get_mds_set(set& s, int state) { - for (map::const_iterator p = mds_info.begin(); + for (map<__u64,mds_info_t>::const_iterator p = mds_info.begin(); p != mds_info.end(); p++) if (p->second.state == state) @@ -266,13 +269,14 @@ public: int get_random_up_mds() { if (up.empty()) return -1; - map::iterator p = up.begin(); - for (int n = rand() % up.size(); n; n--) p++; + map::iterator p = up.begin(); + for (int n = rand() % up.size(); n; n--) + p++; return p->first; } - bool find_standby_for(int mds, string& name, entity_addr_t &a) { - for (map::const_iterator p = mds_info.begin(); + __u64 find_standby_for(int mds, string& name) { + for (map<__u64,mds_info_t>::const_iterator p = mds_info.begin(); p != mds_info.end(); p++) { if (p->second.rank == -1 && @@ -280,11 +284,10 @@ public: p->second.standby_for_name == name) && p->second.state == MDSMap::STATE_STANDBY && !p->second.laggy()) { - a = p->second.addr; - return true; + return p->first; } } - for (map::const_iterator p = mds_info.begin(); + for (map<__u64,mds_info_t>::const_iterator p = mds_info.begin(); p != mds_info.end(); p++) { if (p->second.rank == -1 && @@ -292,11 +295,10 @@ public: p->second.standby_for_name.length() == 0 && p->second.state == MDSMap::STATE_STANDBY && !p->second.laggy()) { - a = p->second.addr; - return true; + return p->first; } } - return false; + return 0; } // mds states @@ -309,11 +311,13 @@ public: bool is_stopped(int m) { return stopped.count(m); } bool is_dne(int m) { return in.count(m) == 0; } - bool is_dne(entity_addr_t a) { return mds_info.count(a) == 0; } + bool is_dne_gid(__u64 gid) { return mds_info.count(gid) == 0; } int get_state(int m) { return up.count(m) ? mds_info[up[m]].state : 0; } - int get_state(entity_addr_t a) { return mds_info.count(a) ? mds_info[a].state : 0; } - mds_info_t& get_info(entity_addr_t a) { assert(mds_info.count(a)); return mds_info[a]; } + int get_state_gid(__u64 gid) { return mds_info.count(gid) ? mds_info[gid].state : 0; } + + mds_info_t& get_info(int m) { assert(up.count(m)); return mds_info[up[m]]; } + mds_info_t& get_info_gid(__u64 gid) { assert(mds_info.count(gid)); return mds_info[gid]; } bool is_boot(int m) { return get_state(m) == STATE_BOOT; } bool is_creating(int m) { return get_state(m) == STATE_CREATING; } @@ -327,7 +331,7 @@ public: bool is_stopping(int m) { return get_state(m) == STATE_STOPPING; } bool is_clientreplay_or_active_or_stopping(int m) { return is_clientreplay(m) || is_active(m) || is_stopping(m); } - bool is_laggy(entity_addr_t a) { return mds_info.count(a) && mds_info[a].laggy(); } + bool is_laggy_gid(__u64 gid) { return mds_info.count(gid) && mds_info[gid].laggy(); } // cluster states @@ -375,9 +379,9 @@ public: return false; } - int get_rank(const entity_addr_t& addr) { - if (mds_info.count(addr)) - return mds_info[addr].rank; + int get_rank_gid(__u64 gid) { + if (mds_info.count(gid)) + return mds_info[gid].rank; return -1; } diff --git a/src/mds/mdstypes.h b/src/mds/mdstypes.h index f08347657348e..4f574aa61045a 100644 --- a/src/mds/mdstypes.h +++ b/src/mds/mdstypes.h @@ -20,7 +20,7 @@ using namespace std; #include -#define CEPH_FS_ONDISK_MAGIC "ceph fs volume v010" +#define CEPH_FS_ONDISK_MAGIC "ceph fs volume v011" //#define MDS_REF_SET // define me for improved debug output, sanity checking diff --git a/src/messages/MMDSBeacon.h b/src/messages/MMDSBeacon.h index c7473bde74532..f516c33062a3e 100644 --- a/src/messages/MMDSBeacon.h +++ b/src/messages/MMDSBeacon.h @@ -23,6 +23,7 @@ class MMDSBeacon : public PaxosServiceMessage { ceph_fsid_t fsid; + __u64 global_id; string name; __u32 state; @@ -32,12 +33,13 @@ class MMDSBeacon : public PaxosServiceMessage { public: MMDSBeacon() : PaxosServiceMessage(MSG_MDS_BEACON, 0) {} - MMDSBeacon(const ceph_fsid_t &f, string& n, epoch_t les, int st, version_t se) : + MMDSBeacon(const ceph_fsid_t &f, __u64 g, string& n, epoch_t les, int st, version_t se) : PaxosServiceMessage(MSG_MDS_BEACON, les), - fsid(f), name(n), state(st), seq(se), + fsid(f), global_id(g), name(n), state(st), seq(se), standby_for_rank(-1) { } ceph_fsid_t& get_fsid() { return fsid; } + __u64 get_global_id() { return global_id; } string& get_name() { return name; } epoch_t get_last_epoch_seen() { return version; } int get_state() { return state; } @@ -50,13 +52,14 @@ class MMDSBeacon : public PaxosServiceMessage { void set_standby_for_name(string& n) { standby_for_name = n; } void print(ostream& out) { - out << "mdsbeacon(" << name << " " << ceph_mds_state_name(state) + out << "mdsbeacon(" << global_id << "/" << name << " " << ceph_mds_state_name(state) << " seq " << seq << " v" << version << ")"; } void encode_payload() { paxos_encode(); ::encode(fsid, payload); + ::encode(global_id, payload); ::encode(state, payload); ::encode(seq, payload); ::encode(name, payload); @@ -67,6 +70,7 @@ class MMDSBeacon : public PaxosServiceMessage { bufferlist::iterator p = payload.begin(); paxos_decode(p); ::decode(fsid, p); + ::decode(global_id, p); ::decode(state, p); ::decode(seq, p); ::decode(name, p); diff --git a/src/messages/MMDSLoadTargets.h b/src/messages/MMDSLoadTargets.h index 1b0466cecfa69..fffa52218647e 100644 --- a/src/messages/MMDSLoadTargets.h +++ b/src/messages/MMDSLoadTargets.h @@ -23,25 +23,28 @@ using std::map; class MMDSLoadTargets : public Message { public: + __u64 global_id; set targets; MMDSLoadTargets() : Message(MSG_MDS_OFFLOAD_TARGETS) {} - MMDSLoadTargets(set& mds_targets) : + MMDSLoadTargets(__u64 g, set& mds_targets) : Message(MSG_MDS_OFFLOAD_TARGETS), - targets(mds_targets) {} + global_id(g), targets(mds_targets) {} const char* get_type_name() { return "mds_load_targets"; } void print(ostream& o) { - o << "mds_load_targets(" << targets << ")"; + o << "mds_load_targets(" << global_id << " " << targets << ")"; } void decode_payload() { bufferlist::iterator p = payload.begin(); + ::decode(global_id, p); ::decode(targets, p); } void encode_payload() { + ::encode(global_id, payload); ::encode(targets, payload); } }; diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc index c2098533b8b08..0a45eb18ea96d 100644 --- a/src/mon/MDSMonitor.cc +++ b/src/mon/MDSMonitor.cc @@ -144,6 +144,7 @@ bool MDSMonitor::preprocess_beacon(MMDSBeacon *m) { entity_addr_t addr = m->get_orig_source_inst().addr; int state = m->get_state(); + __u64 gid = m->get_global_id(); version_t seq = m->get_seq(); MDSMap::mds_info_t info; @@ -161,7 +162,7 @@ bool MDSMonitor::preprocess_beacon(MMDSBeacon *m) return false; // booted, but not in map? - if (pending_mdsmap.is_dne(addr)) { + if (pending_mdsmap.is_dne_gid(gid)) { if (state != MDSMap::STATE_BOOT) { dout(7) << "mds_beacon " << *m << " is not in mdsmap" << dendl; mon->send_reply(m, new MMDSMap(mon->monmap->fsid, &mdsmap)); @@ -170,7 +171,7 @@ bool MDSMonitor::preprocess_beacon(MMDSBeacon *m) return false; // not booted yet. } } - info = pending_mdsmap.get_info(addr); + info = pending_mdsmap.get_info_gid(gid); // old seq? if (info.state_seq > seq) { @@ -219,10 +220,10 @@ bool MDSMonitor::preprocess_beacon(MMDSBeacon *m) ignore: // note time and reply dout(15) << "mds_beacon " << *m << " noting time and replying" << dendl; - last_beacon[addr].stamp = g_clock.now(); - last_beacon[addr].seq = seq; + last_beacon[gid].stamp = g_clock.now(); + last_beacon[gid].seq = seq; mon->send_reply(m, - new MMDSBeacon(mon->monmap->fsid, m->get_name(), + new MMDSBeacon(mon->monmap->fsid, m->get_global_id(), m->get_name(), mdsmap.get_epoch(), state, seq)); // done @@ -234,9 +235,9 @@ bool MDSMonitor::preprocess_beacon(MMDSBeacon *m) bool MDSMonitor::preprocess_offload_targets(MMDSLoadTargets* m) { dout(10) << "preprocess_offload_targets " << *m << " from " << m->get_orig_source() << dendl; - const entity_addr_t& a = m->get_orig_source_addr(); - if (mdsmap.mds_info.count(a) && - m->targets == mdsmap.mds_info[a].export_targets) + __u64 gid = m->global_id; + if (mdsmap.mds_info.count(gid) && + m->targets == mdsmap.mds_info[gid].export_targets) return true; return false; } @@ -272,13 +273,15 @@ bool MDSMonitor::prepare_beacon(MMDSBeacon *m) // -- this is an update -- dout(12) << "prepare_beacon " << *m << " from " << m->get_orig_source_inst() << dendl; entity_addr_t addr = m->get_orig_source_inst().addr; + __u64 gid = m->get_global_id(); int state = m->get_state(); version_t seq = m->get_seq(); // boot? if (state == MDSMap::STATE_BOOT) { // add - MDSMap::mds_info_t& info = pending_mdsmap.mds_info[addr]; + MDSMap::mds_info_t& info = pending_mdsmap.mds_info[gid]; + info.global_id = gid; info.name = m->get_name(); info.rank = -1; info.addr = addr; @@ -288,12 +291,12 @@ bool MDSMonitor::prepare_beacon(MMDSBeacon *m) info.standby_for_name = m->get_standby_for_name(); // initialize the beacon timer - last_beacon[addr].stamp = g_clock.now(); - last_beacon[addr].seq = seq; + last_beacon[gid].stamp = g_clock.now(); + last_beacon[gid].seq = seq; } else { // state change - MDSMap::mds_info_t& info = pending_mdsmap.get_info(addr); + MDSMap::mds_info_t& info = pending_mdsmap.get_info_gid(gid); if (info.laggy()) { dout(10) << "prepare_beacon clearly laggy flag on " << addr << dendl; @@ -308,8 +311,8 @@ bool MDSMonitor::prepare_beacon(MMDSBeacon *m) pending_mdsmap.up.erase(info.rank); pending_mdsmap.in.erase(info.rank); pending_mdsmap.stopped.insert(info.rank); - pending_mdsmap.mds_info.erase(addr); // last! info is a ref into this map - last_beacon.erase(addr); + pending_mdsmap.mds_info.erase(gid); // last! info is a ref into this map + last_beacon.erase(gid); } else { info.state = state; info.state_seq = seq; @@ -326,12 +329,12 @@ bool MDSMonitor::prepare_beacon(MMDSBeacon *m) bool MDSMonitor::prepare_offload_targets(MMDSLoadTargets *m) { - const entity_addr_t& a = m->get_orig_source_addr(); - if (pending_mdsmap.mds_info.count(a)) { - dout(10) << "prepare_offload_targets " << a << " " << m->targets << dendl; - pending_mdsmap.mds_info[a].export_targets = m->targets; + __u64 gid = m->global_id; + if (pending_mdsmap.mds_info.count(gid)) { + dout(10) << "prepare_offload_targets " << gid << " " << m->targets << dendl; + pending_mdsmap.mds_info[gid].export_targets = m->targets; } else { - dout(10) << "prepare_offload_targets " << a << " not in map" << dendl; + dout(10) << "prepare_offload_targets " << gid << " not in map" << dendl; } return true; } @@ -472,9 +475,9 @@ bool MDSMonitor::prepare_command(MMonCommand *m) int who = atoi(m->cmd[2].c_str()); if (mdsmap.is_active(who)) { r = 0; - entity_addr_t a = pending_mdsmap.up[who]; - ss << "telling mds" << who << " " << a << " to stop"; - pending_mdsmap.mds_info[a].state = MDSMap::STATE_STOPPING; + __u64 gid = pending_mdsmap.up[who]; + ss << "telling mds" << who << " " << pending_mdsmap.mds_info[gid].addr << " to stop"; + pending_mdsmap.mds_info[gid].state = MDSMap::STATE_STOPPING; } else { r = -EEXIST; ss << "mds" << who << " not active (" @@ -563,13 +566,13 @@ void MDSMonitor::tick() string name; while (pending_mdsmap.is_in(mds)) mds++; - entity_addr_t addr; - if (!pending_mdsmap.find_standby_for(mds, name, addr)) + __u64 newgid = pending_mdsmap.find_standby_for(mds, name); + if (!newgid) break; - dout(1) << "adding standby " << addr << " as mds" << mds << dendl; + MDSMap::mds_info_t& info = pending_mdsmap.mds_info[newgid]; + dout(1) << "adding standby " << info.addr << " as mds" << mds << dendl; - MDSMap::mds_info_t& info = pending_mdsmap.mds_info[addr]; info.rank = mds; if (pending_mdsmap.stopped.count(mds)) { info.state = MDSMap::STATE_STARTING; @@ -578,7 +581,7 @@ void MDSMonitor::tick() info.state = MDSMap::STATE_CREATING; info.inc = ++pending_mdsmap.inc[mds]; pending_mdsmap.in.insert(mds); - pending_mdsmap.up[mds] = addr; + pending_mdsmap.up[mds] = newgid; do_propose = true; } @@ -588,54 +591,54 @@ void MDSMonitor::tick() cutoff -= g_conf.mds_beacon_grace; // make sure last_beacon is fully populated - for (map::iterator p = pending_mdsmap.mds_info.begin(); + for (map<__u64,MDSMap::mds_info_t>::iterator p = pending_mdsmap.mds_info.begin(); p != pending_mdsmap.mds_info.end(); ++p) - if (last_beacon.count(p->second.addr) == 0) { + if (last_beacon.count(p->first) == 0) { const MDSMap::mds_info_t& info = p->second; dout(10) << " adding " << p->second.addr << " mds" << info.rank << "." << info.inc << " " << ceph_mds_state_name(info.state) << " to last_beacon" << dendl; - last_beacon[p->second.addr].stamp = g_clock.now(); - last_beacon[p->second.addr].seq = 0; + last_beacon[p->first].stamp = g_clock.now(); + last_beacon[p->first].seq = 0; } if (mon->osdmon()->paxos->is_writeable()) { bool propose_osdmap = false; - map::iterator p = last_beacon.begin(); + map<__u64, beacon_info_t>::iterator p = last_beacon.begin(); while (p != last_beacon.end()) { - entity_addr_t addr = p->first; + __u64 gid = p->first; utime_t since = p->second.stamp; __u64 seq = p->second.seq; p++; - if (pending_mdsmap.mds_info.count(addr) == 0) { + if (pending_mdsmap.mds_info.count(gid) == 0) { // clean it out - last_beacon.erase(addr); + last_beacon.erase(gid); continue; } if (since >= cutoff) continue; - MDSMap::mds_info_t& info = pending_mdsmap.mds_info[addr]; + MDSMap::mds_info_t& info = pending_mdsmap.mds_info[gid]; - dout(10) << "no beacon from " << addr << " mds" << info.rank << "." << info.inc + dout(10) << "no beacon from " << info.addr << " mds" << info.rank << "." << info.inc << " " << ceph_mds_state_name(info.state) << " since " << since << dendl; // are we in? // and is there a non-laggy standby that can take over for us? - entity_addr_t sa; + __u64 sgid; if (info.rank >= 0 && info.state != CEPH_MDS_STATE_STANDBY && - pending_mdsmap.find_standby_for(info.rank, info.name, sa)) { - MDSMap::mds_info_t& si = pending_mdsmap.mds_info[sa]; - dout(10) << " replacing " << addr << " mds" << info.rank << "." << info.inc + (sgid = pending_mdsmap.find_standby_for(info.rank, info.name)) != 0) { + MDSMap::mds_info_t& si = pending_mdsmap.mds_info[sgid]; + dout(10) << " replacing " << info.addr << " mds" << info.rank << "." << info.inc << " " << ceph_mds_state_name(info.state) - << " with " << si.name << " " << sa << dendl; + << " with " << sgid << "/" << si.name << " " << info.addr << dendl; switch (info.state) { case MDSMap::STATE_CREATING: case MDSMap::STATE_STARTING: @@ -658,36 +661,36 @@ void MDSMonitor::tick() info.state_seq = seq; if (si.state > 0) { si.inc = ++pending_mdsmap.inc[info.rank]; - pending_mdsmap.up[info.rank] = sa; + pending_mdsmap.up[info.rank] = sgid; pending_mdsmap.last_failure = pending_mdsmap.epoch; } - pending_mdsmap.mds_info.erase(addr); + pending_mdsmap.mds_info.erase(gid); if (si.state > 0) { // blacklist utime_t until = now; until += g_conf.mds_blacklist_interval; - mon->osdmon()->blacklist(addr, until); + mon->osdmon()->blacklist(info.addr, until); propose_osdmap = true; } do_propose = true; } else if (info.state == MDSMap::STATE_STANDBY_REPLAY) { - dout(10) << " failing " << addr << " mds" << info.rank << "." << info.inc + dout(10) << " failing " << info.addr << " mds" << info.rank << "." << info.inc << " " << ceph_mds_state_name(info.state) << dendl; - pending_mdsmap.mds_info.erase(addr); + pending_mdsmap.mds_info.erase(gid); do_propose = true; } else if (!info.laggy()) { // just mark laggy - dout(10) << " marking " << addr << " mds" << info.rank << "." << info.inc + dout(10) << " marking " << info.addr << " mds" << info.rank << "." << info.inc << " " << ceph_mds_state_name(info.state) << " laggy" << dendl; info.laggy_since = now; do_propose = true; } - last_beacon.erase(addr); + last_beacon.erase(gid); } if (propose_osdmap) @@ -703,16 +706,17 @@ void MDSMonitor::tick() set::iterator p = failed.begin(); while (p != failed.end()) { int f = *p++; - entity_addr_t sa; + __u64 sgid; string name; // FIXME - if (pending_mdsmap.find_standby_for(f, name, sa)) { - dout(0) << " taking over failed mds" << f << " with " << sa << dendl; - MDSMap::mds_info_t& si = pending_mdsmap.mds_info[sa]; + sgid = pending_mdsmap.find_standby_for(f, name); + if (sgid) { + MDSMap::mds_info_t& si = pending_mdsmap.mds_info[sgid]; + dout(0) << " taking over failed mds" << f << " with " << sgid << "/" << si.name << " " << si.addr << dendl; si.state = MDSMap::STATE_REPLAY; si.rank = f; si.inc = ++pending_mdsmap.inc[f]; pending_mdsmap.in.insert(f); - pending_mdsmap.up[f] = sa; + pending_mdsmap.up[f] = sgid; do_propose = true; } } @@ -724,8 +728,8 @@ void MDSMonitor::tick() pending_mdsmap.get_num_mds(MDSMap::STATE_STANDBY) >= pending_mdsmap.get_num_mds()) { // see which nodes are shadowed set shadowed; - map > avail; - for (map::iterator p = pending_mdsmap.mds_info.begin(); + map > avail; + for (map<__u64,MDSMap::mds_info_t>::iterator p = pending_mdsmap.mds_info.begin(); p != pending_mdsmap.mds_info.end(); p++) { if (p->second.state == MDSMap::STATE_STANDBY_REPLAY) @@ -743,18 +747,18 @@ void MDSMonitor::tick() continue; // already shadowed. if (pending_mdsmap.get_state(*p) < MDSMap::STATE_ACTIVE) continue; // only shadow active mds - entity_addr_t s; + __u64 sgid; if (avail[*p].size()) { - s = *avail[*p].begin(); + sgid = *avail[*p].begin(); avail[*p].erase(avail[*p].begin()); } else if (avail[-1].size()) { - s = *avail[-1].begin(); + sgid = *avail[-1].begin(); avail[-1].erase(avail[-1].begin()); } else continue; - dout(10) << "mds" << *p << " will be shadowed by " << s << dendl; + dout(10) << "mds" << *p << " will be shadowed by " << sgid << dendl; - MDSMap::mds_info_t& info = pending_mdsmap.mds_info[s]; + MDSMap::mds_info_t& info = pending_mdsmap.mds_info[sgid]; info.rank = *p; info.state = MDSMap::STATE_STANDBY_REPLAY; do_propose = true; @@ -778,8 +782,9 @@ void MDSMonitor::do_stop() dout(7) << "do_stop stopping active mds nodes" << dendl; print_map(mdsmap); - map::iterator p = pending_mdsmap.mds_info.begin(); + map<__u64,MDSMap::mds_info_t>::iterator p = pending_mdsmap.mds_info.begin(); while (p != pending_mdsmap.mds_info.end()) { + __u64 gid = p->first; MDSMap::mds_info_t& info = p->second; p++; switch (info.state) { @@ -791,7 +796,7 @@ void MDSMonitor::do_stop() pending_mdsmap.stopped.insert(info.rank); case MDSMap::STATE_CREATING: pending_mdsmap.up.erase(info.rank); - pending_mdsmap.mds_info.erase(info.addr); + pending_mdsmap.mds_info.erase(gid); pending_mdsmap.in.erase(info.rank); break; case MDSMap::STATE_REPLAY: @@ -802,7 +807,7 @@ void MDSMonitor::do_stop() // BUG: hrm, if this is the case, the STOPPING guys won't be able to stop, will they? pending_mdsmap.failed.insert(info.rank); pending_mdsmap.up.erase(info.rank); - pending_mdsmap.mds_info.erase(info.addr); + pending_mdsmap.mds_info.erase(gid); pending_mdsmap.in.erase(info.rank); break; } diff --git a/src/mon/MDSMonitor.h b/src/mon/MDSMonitor.h index a07a9feb8e76c..4a14f9be200e0 100644 --- a/src/mon/MDSMonitor.h +++ b/src/mon/MDSMonitor.h @@ -90,7 +90,7 @@ class MDSMonitor : public PaxosService { utime_t stamp; __u64 seq; }; - map last_beacon; + map<__u64, beacon_info_t> last_beacon; public: MDSMonitor(Monitor *mn, Paxos *p) : PaxosService(mn, p) { } -- 2.39.5