From: Sage Weil Date: Tue, 22 Feb 2011 17:40:47 +0000 (-0800) Subject: osd: improve up_thru request behavior X-Git-Tag: v0.25~72 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=e4b8274241df201e01f2280d9f9e3857b4265f87;p=ceph.git osd: improve up_thru request behavior There is some epoch the OSD wants for up_thru, based on when the PG mapping last changed. However, once the monitor gets to the point where it must update the map, it should set up_thru to the most recent epoch the OSD has seen (i.e. the epoch it is known to be "up thru"!). This will hopefully/ frequently avoid any subsequent up_thru requests. MOSDAlive already has a separate field (in PaxosServiceMessage) to hold the latest epoch; just fix the constructor to set it properly, and make the monitor use it. No protocol change, yay! Signed-off-by: Sage Weil --- diff --git a/src/messages/MOSDAlive.h b/src/messages/MOSDAlive.h index 9dcae67b34750..cac0a30ca12d5 100644 --- a/src/messages/MOSDAlive.h +++ b/src/messages/MOSDAlive.h @@ -21,9 +21,9 @@ class MOSDAlive : public PaxosServiceMessage { public: - epoch_t map_epoch; + epoch_t want; - MOSDAlive(epoch_t e) : PaxosServiceMessage(MSG_OSD_ALIVE, e), map_epoch(e) { } + MOSDAlive(epoch_t h, epoch_t w) : PaxosServiceMessage(MSG_OSD_ALIVE, h), want(w) { } MOSDAlive() : PaxosServiceMessage(MSG_OSD_ALIVE, 0) {} private: ~MOSDAlive() {} @@ -31,17 +31,17 @@ private: public: void encode_payload() { paxos_encode(); - ::encode(map_epoch, payload); + ::encode(want, payload); } void decode_payload() { bufferlist::iterator p = payload.begin(); paxos_decode(p); - ::decode(map_epoch, p); + ::decode(want, p); } const char *get_type_name() { return "osd_alive"; } void print(ostream &out) { - out << "osd_alive(" << map_epoch << " v" << version << ")"; + out << "osd_alive(want up_thru " << want << " have " << version << ")"; } }; diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 11bffad36900e..44b8e9213d773 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -666,14 +666,14 @@ bool OSDMonitor::preprocess_alive(MOSDAlive *m) if (osdmap.is_up(from) && osdmap.get_inst(from) == m->get_orig_source_inst() && - osdmap.get_up_thru(from) >= m->map_epoch) { + osdmap.get_up_thru(from) >= m->want) { // yup. - dout(7) << "preprocess_alive e" << m->map_epoch << " dup from " << m->get_orig_source_inst() << dendl; - _reply_map(m, m->map_epoch); + dout(7) << "preprocess_alive want up_thru " << m->want << " dup from " << m->get_orig_source_inst() << dendl; + _reply_map(m, m->version); return true; } - dout(10) << "preprocess_alive e" << m->map_epoch + dout(10) << "preprocess_alive want up_thru " << m->want << " from " << m->get_orig_source_inst() << dendl; return false; @@ -690,9 +690,10 @@ bool OSDMonitor::prepare_alive(MOSDAlive *m) mon->clog.debug() << m->get_orig_source_inst() << " alive\n"; } - dout(7) << "prepare_alive e" << m->map_epoch << " from " << m->get_orig_source_inst() << dendl; - pending_inc.new_up_thru[from] = m->map_epoch; - paxos->wait_for_commit(new C_ReplyMap(this, m, m->map_epoch)); + dout(7) << "prepare_alive want up_thru " << m->want << " have " << m->version + << " from " << m->get_orig_source_inst() << dendl; + pending_inc.new_up_thru[from] = m->version; // set to the latest map the OSD has + paxos->wait_for_commit(new C_ReplyMap(this, m, m->version)); return true; } diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 5b8555ab96658..44df6581849fd 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -1901,7 +1901,7 @@ void OSD::send_alive() if (up_thru_wanted > up_thru) { up_thru_pending = up_thru_wanted; dout(10) << "send_alive want " << up_thru_wanted << dendl; - monc->send_mon_message(new MOSDAlive(up_thru_wanted)); + monc->send_mon_message(new MOSDAlive(osdmap->get_epoch(), up_thru_wanted)); } }