if (con->get_peer_type() == CEPH_ENTITY_TYPE_MON) {
Mutex::Locker l(osd_lock);
dout(10) << "ms_handle_connect on mon" << dendl;
- if (is_booting())
- send_boot();
- send_alive();
- send_pg_temp();
- send_failures();
- send_pg_stats(ceph_clock_now(g_ceph_context));
+ if (is_booting()) {
+ start_boot();
+ } else {
+ send_alive();
+ send_pg_temp();
+ send_failures();
+ send_pg_stats(ceph_clock_now(g_ceph_context));
+ }
}
}
/* exiting with watch_lock held */
}
+struct C_OSD_GetVersion : public Context {
+ OSD *osd;
+ uint64_t oldest, newest;
+ C_OSD_GetVersion(OSD *o) : osd(o), oldest(0), newest(0) {}
+ void finish(int r) {
+ if (r >= 0)
+ osd->_got_boot_version(oldest, newest);
+ }
+};
+
+void OSD::start_boot()
+{
+ dout(10) << "start_boot - have maps " << superblock.oldest_map << ".." << superblock.newest_map << dendl;
+ C_OSD_GetVersion *c = new C_OSD_GetVersion(this);
+ monc->get_version("osdmap", &c->newest, &c->oldest, c);
+}
+
+void OSD::_got_boot_version(epoch_t oldest, epoch_t newest)
+{
+ Mutex::Locker l(osd_lock);
+ dout(10) << "_got_boot_version mon has osdmaps " << oldest << ".." << newest << dendl;
+
+ // if our map within recent history, try to add ourselves to the osdmap.
+ if (osdmap->get_epoch() >= oldest &&
+ osdmap->get_epoch() < newest + g_conf->osd_map_message_max) {
+ send_boot();
+ return;
+ }
+
+ // get all the latest maps
+ if (osdmap->get_epoch() > oldest)
+ monc->sub_want("osdmap", osdmap->get_epoch(), CEPH_SUBSCRIBE_ONETIME);
+ else
+ monc->sub_want("osdmap", oldest - 1, CEPH_SUBSCRIBE_ONETIME);
+ monc->renew_subs();
+}
+
void OSD::send_boot()
{
dout(10) << "send_boot" << dendl;
}
// missing some?
+ bool skip_maps = false;
if (first > osdmap->get_epoch() + 1) {
- dout(10) << "handle_osd_map message skips epoch " << osdmap->get_epoch() + 1 << dendl;
- monc->sub_want("osdmap", osdmap->get_epoch()+1, CEPH_SUBSCRIBE_ONETIME);
- monc->renew_subs();
- m->put();
- return;
+ dout(10) << "handle_osd_map message skips epochs " << osdmap->get_epoch() + 1
+ << ".." << (first-1) << dendl;
+ if (m->oldest_map && m->oldest_map <= osdmap->get_epoch()) {
+ monc->sub_want("osdmap", osdmap->get_epoch()+1, CEPH_SUBSCRIBE_ONETIME);
+ monc->renew_subs();
+ m->put();
+ return;
+ }
+ skip_maps = true;
}
if (map_in_progress_cond) {
ObjectStore::Transaction t;
// store new maps: queue for disk and put in the osdmap cache
- for (epoch_t e = osdmap->get_epoch() + 1; e <= last; e++) {
+ epoch_t start = MAX(osdmap->get_epoch() + 1, first);
+ for (epoch_t e = start; e <= last; e++) {
map<epoch_t,bufferlist>::iterator p;
p = m->maps.find(e);
if (p != m->maps.end()) {
// check for cluster snapshot
string cluster_snap;
- for (epoch_t cur = superblock.current_epoch + 1; cur <= last && cluster_snap.length() == 0; cur++) {
+ for (epoch_t cur = start; cur <= last && cluster_snap.length() == 0; cur++) {
OSDMap *newmap = get_map(cur);
cluster_snap = newmap->get_cluster_snapshot();
}
assert(osd_lock.is_locked());
- if (!superblock.oldest_map)
+ if (!superblock.oldest_map || skip_maps)
superblock.oldest_map = first;
superblock.newest_map = last;
map_lock.get_write();
// advance through the new maps
- for (epoch_t cur = superblock.current_epoch + 1; cur <= superblock.newest_map; cur++) {
+ for (epoch_t cur = start; cur <= superblock.newest_map; cur++) {
dout(10) << " advance to epoch " << cur << " (<= newest " << superblock.newest_map << ")" << dendl;
OSDMap *newmap = get_map(cur);
recovery_tp.unpause();
disk_tp.unpause();
- m->put();
+ if (m->newest_map && m->newest_map > last) {
+ dout(10) << " msg say newest map is " << m->newest_map << ", requesting more" << dendl;
+ monc->sub_want("osdmap", osdmap->get_epoch()+1, CEPH_SUBSCRIBE_ONETIME);
+ monc->renew_subs();
+ }
+ else if (is_booting()) {
+ start_boot(); // retry
+ }
+ else if (do_restart)
+ start_boot();
- if (do_restart)
- send_boot();
if (do_shutdown)
shutdown();
+ m->put();
+
if (map_in_progress_cond) {
map_in_progress = false;
dout(15) << "unlocking map_in_progress" << dendl;
}
}
- OSDMap *lastmap = get_map(osdmap->get_epoch() - 1);
+ // if we skipped a discontinuity and are the first epoch, we won't have a previous map.
+ OSDMap *lastmap = NULL;
+ if (osdmap->get_epoch() > superblock.oldest_map)
+ lastmap = get_map(osdmap->get_epoch() - 1);
// scan existing pg's
for (hash_map<pg_t,PG*>::iterator it = pg_map.begin();
}
// ok, we have at least as new a map as they do. are we (re)booting?
- if (is_booting()) {
+ if (!is_active()) {
dout(7) << "still in boot state, dropping message " << *m << dendl;
m->put();
return false;