From 103b128cad2789cb288e9fa666f2d03df13a8fdd Mon Sep 17 00:00:00 2001 From: sageweil Date: Fri, 26 Jan 2007 21:45:21 +0000 Subject: [PATCH] mds startup either to standby or to starting/creating via cmds flag. fixed mdlog inode generation. git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@1040 29311d96-e01e-0410-9327-a35deaab8ce9 --- branches/sage/cephmds2/cmds.cc | 13 +- branches/sage/cephmds2/cosd.cc | 2 +- branches/sage/cephmds2/mds/MDBalancer.cc | 12 +- branches/sage/cephmds2/mds/MDCache.cc | 2 +- branches/sage/cephmds2/mds/MDLog.cc | 38 +++-- branches/sage/cephmds2/mds/MDLog.h | 5 +- branches/sage/cephmds2/mds/MDS.cc | 53 ++++--- branches/sage/cephmds2/mds/MDS.h | 2 +- branches/sage/cephmds2/mds/MDSMap.h | 67 +++++++-- branches/sage/cephmds2/mds/Migrator.cc | 2 +- .../sage/cephmds2/mds/events/EImportMap.h | 3 +- branches/sage/cephmds2/mds/events/EMetaBlob.h | 63 ++++---- branches/sage/cephmds2/mds/journal.cc | 12 +- branches/sage/cephmds2/mon/MDSMonitor.cc | 136 +++++++++--------- branches/sage/cephmds2/mon/OSDMonitor.cc | 7 +- branches/sage/cephmds2/msg/Dispatcher.h | 7 - 16 files changed, 251 insertions(+), 173 deletions(-) diff --git a/branches/sage/cephmds2/cmds.cc b/branches/sage/cephmds2/cmds.cc index 5015fd21bcee5..fee59512207e9 100644 --- a/branches/sage/cephmds2/cmds.cc +++ b/branches/sage/cephmds2/cmds.cc @@ -60,6 +60,17 @@ int main(int argc, char **argv) if (g_conf.debug_after) g_timer.add_event_after(g_conf.debug_after, new C_Debug); + // mds specific args + bool standby = false; // by default, i'll start active. + for (unsigned i=0; iget_myaddr().num(), m, monmap); - mds->init(); + mds->init(standby); // wait rank.wait(); diff --git a/branches/sage/cephmds2/cosd.cc b/branches/sage/cephmds2/cosd.cc index 667e86537e4b2..c390faaca3e49 100644 --- a/branches/sage/cephmds2/cosd.cc +++ b/branches/sage/cephmds2/cosd.cc @@ -62,7 +62,7 @@ int main(int argc, char **argv) if (g_conf.debug_after) g_timer.add_event_after(g_conf.debug_after, new C_Debug); - + // osd specific args char *dev; int whoami = -1; for (unsigned i=0; iget_mds_map()->get_num_mds(); - for (int i = 0; iget_nodeid()) continue; + set up; + mds->get_mds_map()->get_up_mds_set(up); + for (set::iterator p = up.begin(); p != up.end(); ++p) { + if (*p == mds->get_nodeid()) continue; MHeartbeat *hb = new MHeartbeat(load, beat_epoch); hb->get_import_map() = import_map; mds->messenger->send_message(hb, - MSG_ADDR_MDS(i), mds->mdsmap->get_inst(i), - MDS_PORT_BALANCER, - MDS_PORT_BALANCER); + MSG_ADDR_MDS(*p), mds->mdsmap->get_inst(*p), + MDS_PORT_BALANCER, MDS_PORT_BALANCER); } } diff --git a/branches/sage/cephmds2/mds/MDCache.cc b/branches/sage/cephmds2/mds/MDCache.cc index 75222a0b32280..68e6e1083ee3c 100644 --- a/branches/sage/cephmds2/mds/MDCache.cc +++ b/branches/sage/cephmds2/mds/MDCache.cc @@ -2244,7 +2244,7 @@ int MDCache::send_dir_updates(CDir *dir, bool bcast) set who; if (bcast) { - who = mds->get_mds_map()->get_mds_set(); + mds->get_mds_map()->get_active_mds_set(who); } else { for (map::iterator p = dir->replicas_begin(); p != dir->replicas_end(); diff --git a/branches/sage/cephmds2/mds/MDLog.cc b/branches/sage/cephmds2/mds/MDLog.cc index 7708689b3f2d7..dcb5216d77eec 100644 --- a/branches/sage/cephmds2/mds/MDLog.cc +++ b/branches/sage/cephmds2/mds/MDLog.cc @@ -46,6 +46,20 @@ MDLog::MDLog(MDS *m) unflushed = 0; + journaler = 0; + logger = 0; +} + + +MDLog::~MDLog() +{ + if (journaler) { delete journaler; journaler = 0; } + if (logger) { delete logger; logger = 0; } +} + + +void MDLog::init_journaler() +{ // logger char name[80]; sprintf(name, "mds%d.log", mds->get_nodeid()); @@ -73,28 +87,24 @@ MDLog::MDLog(MDS *m) log_inode.layout.object_layout = OBJECT_LAYOUT_STARTOSD; log_inode.layout.osd = mds->get_nodeid() + 10000; // hack } - + // log streamer + if (journaler) delete journaler; journaler = new Journaler(log_inode, mds->objecter, logger); - } -MDLog::~MDLog() -{ - if (journaler) { delete journaler; journaler = 0; } - if (logger) { delete logger; logger = 0; } -} - void MDLog::reset() { + init_journaler(); journaler->reset(); } void MDLog::open(Context *c) { dout(5) << "open discovering log bounds" << endl; + init_journaler(); journaler->recover(c); } @@ -368,6 +378,10 @@ public: void MDLog::_replay() { + dout(10) << "_replay read_pos " << journaler->get_read_pos() + << " / " << journaler->get_write_pos() + << endl; + // read what's buffered while (journaler->is_readable() && journaler->get_read_pos() < journaler->get_write_pos()) { @@ -397,12 +411,20 @@ void MDLog::_replay() delete le; } + dout(10) << "_replay read_pos " << journaler->get_read_pos() + << " / " << journaler->get_write_pos() + << endl; + // wait for read? if (journaler->get_read_pos() < journaler->get_write_pos()) { journaler->wait_for_readable(new C_MDL_Replay(this)); return; } + dout(10) << "_replay read_pos " << journaler->get_read_pos() + << " / " << journaler->get_write_pos() + << endl; + // done! assert(journaler->get_read_pos() == journaler->get_write_pos()); dout(10) << "_replay - complete" << endl; diff --git a/branches/sage/cephmds2/mds/MDLog.h b/branches/sage/cephmds2/mds/MDLog.h index 8fcee8edccdaf..d15bea1fcc9d2 100644 --- a/branches/sage/cephmds2/mds/MDLog.h +++ b/branches/sage/cephmds2/mds/MDLog.h @@ -74,10 +74,13 @@ class MDLog { friend class C_MDS_WroteImportMap; friend class MDCache; + void init_journaler(); + public: MDLog(MDS *m); ~MDLog(); - + + void set_max_events(size_t max) { max_events = max; } size_t get_max_events() { return max_events; } size_t get_num_events() { return num_events + trimming.size(); } diff --git a/branches/sage/cephmds2/mds/MDS.cc b/branches/sage/cephmds2/mds/MDS.cc index 64755b36f53cd..5adf3bfe0b5b7 100644 --- a/branches/sage/cephmds2/mds/MDS.cc +++ b/branches/sage/cephmds2/mds/MDS.cc @@ -102,8 +102,7 @@ MDS::MDS(int whoami, Messenger *m, MonMap *mm) : timer(mds_lock) { req_rate = 0; - state = MDSMap::STATE_DNE; - want_state = MDSMap::STATE_STARTING; + want_state = state = MDSMap::STATE_DNE; logger = logger2 = 0; @@ -215,14 +214,18 @@ public: -int MDS::init() +int MDS::init(bool standby) { mds_lock.Lock(); + if (standby) + want_state = MDSMap::STATE_STANDBY; + else + want_state = MDSMap::STATE_STARTING; + // starting beacon. this will induce an MDSMap from the monitor - state = MDSMap::STATE_STARTING; beacon_start(); - + // schedule tick reset_tick(); @@ -297,8 +300,9 @@ void MDS::tick() void MDS::beacon_start() { - beacon_send(); // send first beacon - reset_beacon_killer(); // schedule killer + beacon_send(); // send first beacon + + //reset_beacon_killer(); // schedule killer } @@ -315,7 +319,9 @@ void MDS::beacon_send() { ++beacon_last_seq; dout(10) << "beacon_send " << MDSMap::get_state_name(want_state) - << " seq " << beacon_last_seq << endl; + << " seq " << beacon_last_seq + << " (currently " << MDSMap::get_state_name(state) << ")" + << endl; beacon_seq_stamp[beacon_last_seq] = g_clock.now(); @@ -405,16 +411,18 @@ void MDS::handle_mds_map(MMDSMap *m) if (oldwhoami != whoami) { messenger->reset_myaddr(MSG_ADDR_MDS(whoami)); reopen_log(); + + mdlog->reset(); } // update my state int oldstate = state; state = mdsmap->get_state(whoami); - if (oldstate == MDSMap::STATE_DNE && state == MDSMap::STATE_CREATING) { - // special case at startup (monitor decides whether i am creating or starting) - assert(want_state == MDSMap::STATE_STARTING); - want_state = MDSMap::STATE_CREATING; + // did the monitor order me active? + if ((oldstate == MDSMap::STATE_DNE || oldstate == MDSMap::STATE_STANDBY) && + (state == MDSMap::STATE_CREATING || state == MDSMap::STATE_STARTING)) { + want_state = state; } dout(1) << "handle_mds_map i am mds" << whoami << " with state " << mdsmap->get_state_name(state) << endl; @@ -476,8 +484,8 @@ void MDS::handle_osd_map(MOSDMap *m) boot_recover(); else if (is_creating()) boot_create(); - else - assert(0); + else + assert(is_standby()); } // pass on to clients @@ -631,8 +639,10 @@ int MDS::shutdown_start() derr(0) << "mds shutdown start" << endl; // tell everyone to stop. - for (set::iterator p = mdsmap->get_mds_set().begin(); - p != mdsmap->get_mds_set().end(); + set active; + mdsmap->get_active_mds_set(active); + for (set::iterator p = active.begin(); + p != active.end(); p++) { if (mdsmap->is_starting(*p) || mdsmap->is_active(*p)) { dout(1) << "sending MShutdownStart to mds" << *p << endl; @@ -752,17 +762,6 @@ void MDS::my_dispatch(Message *m) // HACK FOR NOW - /* - static bool did_heartbeat_hack = false; - if (!shutting_down && !shut_down && - false && - !did_heartbeat_hack) { - osdmonitor->initiate_heartbeat(); - did_heartbeat_hack = true; - } - */ - - if (is_active()) { // flush log to disk after every op. for now. mdlog->flush(); diff --git a/branches/sage/cephmds2/mds/MDS.h b/branches/sage/cephmds2/mds/MDS.h index 6dd8514fd5826..ee3f60143fd61 100644 --- a/branches/sage/cephmds2/mds/MDS.h +++ b/branches/sage/cephmds2/mds/MDS.h @@ -204,7 +204,7 @@ public: void send_message_mds(Message *m, int mds, int port=0, int fromport=0); // start up, shutdown - int init(); + int init(bool standby=false); void reopen_log(); void boot_create(); // i am new mds. diff --git a/branches/sage/cephmds2/mds/MDSMap.h b/branches/sage/cephmds2/mds/MDSMap.h index f3097abad4112..47daa9400a594 100644 --- a/branches/sage/cephmds2/mds/MDSMap.h +++ b/branches/sage/cephmds2/mds/MDSMap.h @@ -31,7 +31,7 @@ class MDSMap { static const int STATE_OUT = 1; // down, once existed, but no imports. static const int STATE_FAILED = 2; // down, holds (er, held) metadata; needs to be recovered. - static const int STATE_STANDBY = 3; // up, but inactive; waiting for someone to fail. + static const int STATE_STANDBY = 3; // up, but inactive. waiting for assignment by monitor. static const int STATE_CREATING = 4; // up, creating MDS instance (initializing journal, etc.) static const int STATE_STARTING = 5; // up, scanning journal, recoverying any shared state static const int STATE_ACTIVE = 6; // up, active @@ -62,10 +62,10 @@ class MDSMap { int anchortable; // which MDS has anchortable (fixme someday) int root; // which MDS has root directory - set mds_set; // set of MDSs - map mds_state; // MDS state + set mds_created; // which mds ids have initialized journals and id tables. + map mds_state; // MDS state map mds_state_seq; - map mds_inst; // up instances + map mds_inst; // up instances friend class MDSMonitor; @@ -80,26 +80,59 @@ class MDSMap { int get_anchortable() const { return anchortable; } int get_root() const { return root; } - int get_num_mds() const { return mds_set.size(); } + // counts + int get_num_mds() const { return mds_state.size(); } int get_num_up_mds() { int n = 0; - for (set::const_iterator p = mds_set.begin(); - p != mds_set.end(); + for (map::const_iterator p = mds_state.begin(); + p != mds_state.end(); p++) - if (is_up(*p)) ++n; + if (is_up(p->first)) ++n; return n; } int get_num_up_or_failed_mds() { int n = 0; - for (set::const_iterator p = mds_set.begin(); - p != mds_set.end(); + for (map::const_iterator p = mds_state.begin(); + p != mds_state.end(); p++) - if (is_up(*p) || is_failed(*p)) ++n; + if (is_up(p->first) || is_failed(p->first)) + ++n; return n; } - const set& get_mds_set() const { return mds_set; } + // sets + void get_mds_set(set& s) { + s.clear(); + for (map::const_iterator p = mds_state.begin(); + p != mds_state.end(); + p++) + s.insert(p->first); + } + void get_up_mds_set(set& s) { + s.clear(); + for (map::const_iterator p = mds_state.begin(); + p != mds_state.end(); + p++) + if (is_up(p->first)) + s.insert(p->first); + } + void get_mds_set(set& s, int state) { + s.clear(); + for (map::const_iterator p = mds_state.begin(); + p != mds_state.end(); + p++) + if (p->second == state) + s.insert(p->first); + } + void get_active_mds_set(set& s) { + get_mds_set(s, MDSMap::STATE_ACTIVE); + } + void get_failed_mds_set(set& s) { + get_mds_set(s, MDSMap::STATE_FAILED); + } + + // state bool is_down(int m) { return is_dne(m) || is_out(m) || is_failed(m); } bool is_up(int m) { return !is_down(m); } @@ -119,6 +152,10 @@ class MDSMap { return STATE_OUT; } + // inst + bool have_inst(int m) { + return mds_inst.count(m); + } const entity_inst_t& get_inst(int m) { assert(mds_inst.count(m)); return mds_inst[m]; @@ -141,9 +178,9 @@ class MDSMap { } void remove_mds(int m) { - mds_set.erase(m); mds_inst.erase(m); mds_state.erase(m); + mds_state_seq.erase(m); } @@ -154,8 +191,8 @@ class MDSMap { blist.append((char*)&anchortable, sizeof(anchortable)); blist.append((char*)&root, sizeof(root)); - _encode(mds_set, blist); _encode(mds_state, blist); + _encode(mds_state_seq, blist); _encode(mds_inst, blist); } @@ -170,8 +207,8 @@ class MDSMap { blist.copy(off, sizeof(root), (char*)&root); off += sizeof(root); - _decode(mds_set, blist, off); _decode(mds_state, blist, off); + _decode(mds_state_seq, blist, off); _decode(mds_inst, blist, off); } diff --git a/branches/sage/cephmds2/mds/Migrator.cc b/branches/sage/cephmds2/mds/Migrator.cc index e110cf44a3b94..143c6ee77397c 100644 --- a/branches/sage/cephmds2/mds/Migrator.cc +++ b/branches/sage/cephmds2/mds/Migrator.cc @@ -3195,7 +3195,7 @@ void Migrator::handle_unhash_dir(MUnhashDir *m) } // init gather set - hash_gather[dir] = mds->get_mds_map()->get_mds_set(); // fixme + mds->get_mds_map()->get_active_mds_set( hash_gather[dir] ); hash_gather[dir].erase(mds->get_nodeid()); // send unhash message diff --git a/branches/sage/cephmds2/mds/events/EImportMap.h b/branches/sage/cephmds2/mds/events/EImportMap.h index 41e1fec300237..50f366faaa9fa 100644 --- a/branches/sage/cephmds2/mds/events/EImportMap.h +++ b/branches/sage/cephmds2/mds/events/EImportMap.h @@ -29,7 +29,8 @@ public: void print(ostream& out) { out << "import_map " << imports.size() << " imports, " - << exports.size() << " exports"; + << exports.size() << " exports" + << " " << metablob; } void encode_payload(bufferlist& bl) { diff --git a/branches/sage/cephmds2/mds/events/EMetaBlob.h b/branches/sage/cephmds2/mds/events/EMetaBlob.h index e97fccd5a91a1..800c6674c91a8 100644 --- a/branches/sage/cephmds2/mds/events/EMetaBlob.h +++ b/branches/sage/cephmds2/mds/events/EMetaBlob.h @@ -138,11 +138,15 @@ class EMetaBlob { int state; int nfull, nremote, nnull; bufferlist bfull, bremote, bnull; + + private: + bool dn_decoded; list dfull; list dremote; list dnull; - - dirlump() : state(0), nfull(0), nremote(0), nnull(0) { } + + public: + dirlump() : state(0), nfull(0), nremote(0), nnull(0), dn_decoded(true) { } bool is_import() { return state & STATE_IMPORT; } void mark_import() { state |= STATE_IMPORT; } @@ -151,6 +155,10 @@ class EMetaBlob { bool is_dirty() { return state & STATE_DIRTY; } void mark_dirty() { state |= STATE_DIRTY; } + list &get_dfull() { return dfull; } + list &get_dremote() { return dremote; } + list &get_dnull() { return dnull; } + void _encode_bits() { for (list::iterator p = dfull.begin(); p != dfull.end(); ++p) p->_encode(bfull); @@ -160,6 +168,7 @@ class EMetaBlob { p->_encode(bnull); } void _decode_bits() { + if (dn_decoded) return; int off = 0; for (int i=0; iis_remote()) { lump.nremote++; if (dirty) - lump.dremote.push_front(remotebit(dn->get_name(), - dn->get_projected_version(), - dn->get_remote_ino(), - dirty)); + lump.get_dremote().push_front(remotebit(dn->get_name(), + dn->get_projected_version(), + dn->get_remote_ino(), + dirty)); else - lump.dremote.push_back(remotebit(dn->get_name(), - dn->get_projected_version(), - dn->get_remote_ino(), - dirty)); + lump.get_dremote().push_back(remotebit(dn->get_name(), + dn->get_projected_version(), + dn->get_remote_ino(), + dirty)); } else if (!in) { lump.nnull++; if (dirty) - lump.dnull.push_front(nullbit(dn->get_name(), - dn->get_projected_version(), - dirty)); + lump.get_dnull().push_front(nullbit(dn->get_name(), + dn->get_projected_version(), + dirty)); else - lump.dnull.push_back(nullbit(dn->get_name(), - dn->get_projected_version(), - dirty)); + lump.get_dnull().push_back(nullbit(dn->get_name(), + dn->get_projected_version(), + dirty)); } else { lump.nfull++; if (dirty) { - lump.dfull.push_front(fullbit(dn->get_name(), - dn->get_projected_version(), - in->inode, in->symlink, - dirty)); - return &lump.dfull.front().inode; + lump.get_dfull().push_front(fullbit(dn->get_name(), + dn->get_projected_version(), + in->inode, in->symlink, + dirty)); + return &lump.get_dfull().front().inode; } else { - lump.dfull.push_back(fullbit(dn->get_name(), - dn->get_projected_version(), - in->inode, in->symlink, - dirty)); - return &lump.dfull.back().inode; + lump.get_dfull().push_back(fullbit(dn->get_name(), + dn->get_projected_version(), + in->inode, in->symlink, + dirty)); + return &lump.get_dfull().back().inode; } } return 0; diff --git a/branches/sage/cephmds2/mds/journal.cc b/branches/sage/cephmds2/mds/journal.cc index eab1a07e201db..82e6990fbf721 100644 --- a/branches/sage/cephmds2/mds/journal.cc +++ b/branches/sage/cephmds2/mds/journal.cc @@ -185,8 +185,8 @@ void EMetaBlob::replay(MDS *mds) lump._decode_bits(); // full dentry+inode pairs - for (list::iterator p = lump.dfull.begin(); - p != lump.dfull.end(); + for (list::iterator p = lump.get_dfull().begin(); + p != lump.get_dfull().end(); p++) { CInode *in = mds->mdcache->get_inode(p->inode.ino); if (!in) { @@ -213,8 +213,8 @@ void EMetaBlob::replay(MDS *mds) } // remote dentries - for (list::iterator p = lump.dremote.begin(); - p != lump.dremote.end(); + for (list::iterator p = lump.get_dremote().begin(); + p != lump.get_dremote().end(); p++) { CDentry *dn = dir->lookup(p->dn); if (!dn) { @@ -232,8 +232,8 @@ void EMetaBlob::replay(MDS *mds) } // null dentries - for (list::iterator p = lump.dnull.begin(); - p != lump.dnull.end(); + for (list::iterator p = lump.get_dnull().begin(); + p != lump.get_dnull().end(); p++) { CDentry *dn = dir->lookup(p->dn); if (!dn) { diff --git a/branches/sage/cephmds2/mon/MDSMonitor.cc b/branches/sage/cephmds2/mon/MDSMonitor.cc index 9eced4e66c697..8b3028d962866 100644 --- a/branches/sage/cephmds2/mon/MDSMonitor.cc +++ b/branches/sage/cephmds2/mon/MDSMonitor.cc @@ -35,13 +35,6 @@ void MDSMonitor::create_initial() mdsmap.epoch = 0; // until everyone boots mdsmap.ctime = g_clock.now(); - /* - for (int i=0; i::iterator p = mdsmap.get_mds_set().begin(); - p != mdsmap.get_mds_set().end(); + set all; + mdsmap.get_mds_set(all); + for (set::iterator p = all.begin(); + p != all.end(); ++p) { dout(7) << " mds" << *p << " : " << MDSMap::get_state_name(mdsmap.get_state(*p)) - << " : " << (mdsmap.mds_inst.count(*p) ? mdsmap.get_inst(*p) : blank) + << " : " << (mdsmap.have_inst(*p) ? mdsmap.get_inst(*p) : blank) << endl; } } @@ -89,58 +84,49 @@ void MDSMonitor::handle_mds_beacon(MMDSBeacon *m) version_t seq = m->get_seq(); // initial boot? - if (state == MDSMap::STATE_STARTING) { - bool booted = false; - - // choose an MDS id - if (from >= 0) { - // wants to be a specific MDS. - if (mdsmap.is_down(from) || - mdsmap.get_inst(from) == m->get_source_inst()) { - // fine, whatever. - dout(10) << "mds_beacon assigning requested mds" << from << endl; - booted = true; - } else { - dout(10) << "mds_beacon not assigning requested mds" << from - << ", that mds is up and someone else" << endl; - from = -1; - } + bool booted = false; + + // choose an MDS id + if (from >= 0) { + // wants to be a specific MDS. + if (mdsmap.is_down(from) || + mdsmap.get_inst(from) == m->get_source_inst()) { + // fine, whatever. + //dout(10) << "mds_beacon assigning requested mds" << from << endl; + booted = true; + } else { + dout(10) << "mds_beacon not assigning requested mds" << from + << ", that mds is up and someone else" << endl; + from = -1; } - if (from < 0) { - // pick a failed mds? - for (set::iterator p = mdsmap.mds_set.begin(); - p != mdsmap.mds_set.end(); - ++p) { - if (mdsmap.is_failed(*p)) { - from = *p; - dout(10) << "mds_beacon assigned failed mds" << from << endl; - booted = true; - break; - } - } + } + if (from < 0) { + // pick a failed mds? + set failed; + mdsmap.get_failed_mds_set(failed); + if (!failed.empty()) { + from = *failed.begin(); + dout(10) << "mds_beacon assigned failed mds" << from << endl; + booted = true; } - if (from < 0) { - // ok, just pick any unused mds id. - for (from=0; ; ++from) { - if (mdsmap.is_dne(from) || - mdsmap.is_out(from)) { - dout(10) << "mds_beacon assigned out|dne mds" << from << endl; - booted = true; - break; - } + } + if (from < 0) { + // ok, just pick any unused mds id. + for (from=0; ; ++from) { + if (mdsmap.is_dne(from) || + mdsmap.is_out(from)) { + dout(10) << "mds_beacon assigned out|dne mds" << from << endl; + booted = true; + break; } } - - // make sure it's in the map - if (booted) { - mdsmap.mds_set.insert(from); - mdsmap.mds_inst[from] = m->get_source_inst(); - } - - if (!mdsmap.mds_state.count(from) || - mdsmap.mds_state[from] == MDSMap::STATE_CREATING) - state = MDSMap::STATE_CREATING; // mds may not know it needs to create } + + // make sure it's in the map + if (booted) { + mdsmap.mds_inst[from] = m->get_source_inst(); + } + // bad beacon? if (mdsmap.is_up(from) && @@ -157,6 +143,19 @@ void MDSMonitor::handle_mds_beacon(MMDSBeacon *m) } + // starting -> creating weirdness. + if (mdsmap.mds_created.count(from) == 0) { + // mds may not know it needs to create + if (state == MDSMap::STATE_STARTING) + state = MDSMap::STATE_CREATING; + + // mds may have finished creating. + if (state == MDSMap::STATE_ACTIVE && + mdsmap.mds_state[from] == MDSMap::STATE_CREATING) + mdsmap.mds_created.insert(from); + } + + // reply to beacon? if (state != MDSMap::STATE_OUT) { last_beacon[from] = g_clock.now(); // note time @@ -164,7 +163,8 @@ void MDSMonitor::handle_mds_beacon(MMDSBeacon *m) } // did we update the map? - if (mdsmap.mds_state[from] != state) { + if (mdsmap.mds_state.count(from) == 0 || + mdsmap.mds_state[from] != state) { // update mds state dout(10) << "mds_beacon mds" << from << " " << MDSMap::get_state_name(mdsmap.mds_state[from]) << " -> " << MDSMap::get_state_name(state) @@ -200,12 +200,12 @@ void MDSMonitor::bcast_latest_mds() dout(10) << "bcast_latest_mds " << mdsmap.get_epoch() << endl; // tell mds - for (set::iterator p = mdsmap.get_mds_set().begin(); - p != mdsmap.get_mds_set().end(); - p++) { - if (mdsmap.is_down(*p)) continue; + set up; + mdsmap.get_up_mds_set(up); + for (set::iterator p = up.begin(); + p != up.end(); + p++) send_full(MSG_ADDR_MDS(*p), mdsmap.get_inst(*p)); - } } void MDSMonitor::send_full(msg_addr_t dest, const entity_inst_t& inst) @@ -244,12 +244,12 @@ void MDSMonitor::tick() bool changed = false; - for (set::iterator p = mdsmap.get_mds_set().begin(); - p != mdsmap.get_mds_set().end(); + set up; + mdsmap.get_up_mds_set(up); + + for (set::iterator p = up.begin(); + p != up.end(); ++p) { - if (!mdsmap.is_up(*p)) - continue; - if (last_beacon.count(*p)) { if (last_beacon[*p] < cutoff) { int newstate = MDSMap::STATE_FAILED; diff --git a/branches/sage/cephmds2/mon/OSDMonitor.cc b/branches/sage/cephmds2/mon/OSDMonitor.cc index bc68a5a9876b0..8e97b54a6da76 100644 --- a/branches/sage/cephmds2/mon/OSDMonitor.cc +++ b/branches/sage/cephmds2/mon/OSDMonitor.cc @@ -563,10 +563,11 @@ void OSDMonitor::bcast_latest_mds() dout(1) << "bcast_latest_mds epoch " << e << endl; // tell mds - for (set::iterator i = mon->mdsmon->mdsmap.get_mds_set().begin(); - i != mon->mdsmon->mdsmap.get_mds_set().end(); + set up; + mon->mdsmon->mdsmap.get_up_mds_set(up); + for (set::iterator i = up.begin(); + i != up.end(); i++) { - if (mon->mdsmon->mdsmap.is_out(*i) || mon->mdsmon->mdsmap.is_down(*i)) continue; send_incremental(osdmap.get_epoch()-1, MSG_ADDR_MDS(*i), mon->mdsmon->mdsmap.get_inst(*i)); } } diff --git a/branches/sage/cephmds2/msg/Dispatcher.h b/branches/sage/cephmds2/msg/Dispatcher.h index e6fe8d8da47ce..4da1f3d3835c6 100644 --- a/branches/sage/cephmds2/msg/Dispatcher.h +++ b/branches/sage/cephmds2/msg/Dispatcher.h @@ -26,15 +26,8 @@ class Dispatcher { // how i receive messages virtual void dispatch(Message *m) = 0; - // how i deal with transmission failures. virtual void ms_handle_failure(Message *m, msg_addr_t dest, const entity_inst_t& inst) { delete m; } - - // lookups - virtual bool ms_lookup(msg_addr_t dest, entity_inst_t& inst) { assert(0); return 0; } - - // this is how i send messages - //int send_message(Message *m, msg_addr_t dest, int dest_port); }; #endif -- 2.39.5