pg->up = pg->acting;
pg->info.history.epoch_created =
pg->info.history.last_epoch_started =
- pg->info.history.same_since =
+ pg->info.history.same_up_since =
+ pg->info.history.same_acting_since =
pg->info.history.same_primary_since = osdmap->get_epoch();
pg->write_info(t);
* up until now
*/
void OSD::project_pg_history(pg_t pgid, PG::Info::History& h, epoch_t from,
- vector<int>& last)
+ vector<int>& lastup, vector<int>& lastacting)
{
dout(15) << "project_pg_history " << pgid
<< " from " << from << " to " << osdmap->get_epoch()
// verify during intermediate epoch (e-1)
OSDMap *oldmap = get_map(e-1);
- vector<int> acting;
- oldmap->pg_to_acting_osds(pgid, acting);
+ vector<int> up, acting;
+ oldmap->pg_to_up_acting_osds(pgid, up, acting);
// acting set change?
- if (acting != last &&
- e > h.same_since) {
+ if (acting != lastacting && e > h.same_acting_since) {
+ dout(15) << "project_pg_history " << pgid << " changed in " << e
+ << " from " << acting << " -> " << lastacting << dendl;
+ h.same_acting_since = e;
+ }
+ // up set change?
+ if (up != lastup && e > h.same_up_since) {
dout(15) << "project_pg_history " << pgid << " changed in " << e
- << " from " << acting << " -> " << last << dendl;
- h.same_since = e;
+ << " from " << up << " -> " << lastup << dendl;
+ h.same_up_since = e;
}
// primary change?
- if (!(!acting.empty() && !last.empty() && acting[0] == last[0]) &&
+ if (!(!acting.empty() && !lastacting.empty() && acting[0] == lastacting[0]) &&
e > h.same_primary_since) {
dout(15) << "project_pg_history " << pgid << " primary changed in " << e << dendl;
h.same_primary_since = e;
}
- if (h.same_since >= e &&
- h.same_primary_since >= e) break;
+ if (h.same_acting_since >= e && h.same_up_since >= e && h.same_primary_since >= e)
+ break;
}
dout(15) << "project_pg_history end " << h << dendl;
pg->up.swap(tup);
pg->set_role(role);
- // did acting, primary|acker change?
- if (tacting != pg->acting) {
+ // did acting, up, primary|acker change?
+ if (tacting != pg->acting || tup != pg->up) {
// remember past interval
- PG::Interval& i = pg->past_intervals[pg->info.history.same_since];
+ PG::Interval& i = pg->past_intervals[pg->info.history.same_acting_since];
+ i.first = pg->info.history.same_acting_since;
+ i.last = osdmap->get_epoch() - 1;
+
i.acting = oldacting;
i.up = oldup;
- i.first = pg->info.history.same_since;
- i.last = osdmap->get_epoch() - 1;
+ if (tacting != pg->acting)
+ pg->info.history.same_acting_since = osdmap->get_epoch();
+ if (tup != pg->up)
+ pg->info.history.same_up_since = osdmap->get_epoch();
+
if (i.acting.size())
i.maybe_went_rw =
lastmap->get_up_thru(i.acting[0]) >= i.first &&
lastmap->get_up_from(i.acting[0]) <= i.first;
else
i.maybe_went_rw = 0;
- dout(10) << *pg << " noting past " << i << dendl;
- pg->info.history.same_since = osdmap->get_epoch();
- pg->dirty_info = true;
- }
- if (oldprimary != pg->get_primary()) {
- pg->info.history.same_primary_since = osdmap->get_epoch();
+ if (oldprimary != pg->get_primary())
+ pg->info.history.same_primary_since = osdmap->get_epoch();
+
+ dout(10) << *pg << " noting past " << i << dendl;
pg->dirty_info = true;
}
pg->cancel_recovery();
!pg->is_active()) {
// i am (inactive) primary
if (!pg->is_peering() ||
- (pg->need_up_thru && up_thru >= pg->info.history.same_since))
+ (pg->need_up_thru && up_thru >= pg->info.history.same_acting_since))
pg->peer(t, query_map, &info_map);
}
else if (pg->is_stray() &&
// figure history
PG::Info::History history;
- project_pg_history(pgid, history, created, acting);
+ project_pg_history(pgid, history, created, up, acting);
// register.
creating_pgs[pgid].created = created;
int role = osdmap->calc_pg_role(whoami, acting, acting.size());
PG::Info::History history = it->history;
- project_pg_history(pgid, history, m->get_epoch(), acting);
+ project_pg_history(pgid, history, m->get_epoch(), up, acting);
if (m->get_epoch() < history.same_primary_since) {
dout(10) << "handle_pg_notify pg " << pgid << " primary changed in "
osdmap->pg_to_up_acting_osds(info.pgid, up, acting);
int role = osdmap->calc_pg_role(whoami, acting, acting.size());
- project_pg_history(info.pgid, info.history, epoch, acting);
- if (epoch < info.history.same_since) {
+ project_pg_history(info.pgid, info.history, epoch, up, acting);
+ if (epoch < info.history.same_acting_since) {
dout(10) << "got old info " << info << " on non-existent pg, ignoring" << dendl;
return;
}
created++;
} else {
pg = _lookup_lock_pg(info.pgid);
- if (epoch < pg->info.history.same_since) {
+ if (epoch < pg->info.history.same_primary_since) {
dout(10) << *pg << " got old info " << info << ", ignoring" << dendl;
pg->unlock();
return;
dout(10) << " don't have pg " << m->pgid << dendl;
} else {
PG *pg = _lookup_lock_pg(m->pgid);
- if (m->epoch < pg->info.history.same_since) {
+ if (m->epoch < pg->info.history.same_primary_since) {
dout(10) << *pg << " got old trim to " << m->trim_to << ", ignoring" << dendl;
pg->unlock();
goto out;
// same primary?
PG::Info::History history = it->second.history;
- project_pg_history(pgid, history, m->get_epoch(), acting);
+ project_pg_history(pgid, history, m->get_epoch(), up, acting);
- if (m->get_epoch() < history.same_since) {
+ if (m->get_epoch() < history.same_primary_since) {
dout(10) << " pg " << pgid << " dne, and pg has changed in "
<< history.same_primary_since << " (msg from " << m->get_epoch() << ")" << dendl;
continue;
pg = _lookup_lock_pg(pgid);
// same primary?
- if (m->get_epoch() < pg->info.history.same_since) {
+ if (m->get_epoch() < pg->info.history.same_primary_since) {
dout(10) << *pg << " handle_pg_query primary changed in "
- << pg->info.history.same_since
+ << pg->info.history.same_primary_since
<< " (msg from " << m->get_epoch() << ")" << dendl;
pg->unlock();
continue;
}
pg = _lookup_lock_pg(pgid);
- if (pg->info.history.same_since <= m->get_epoch()) {
+ if (pg->info.history.same_acting_since <= m->get_epoch()) {
dout(10) << *pg << " removing." << dendl;
assert(pg->get_role() == -1);
assert(pg->get_primary() == m->get_source().num());
_remove_unlock_pg(pg);
} else {
dout(10) << *pg << " ignoring remove request, pg changed in epoch "
- << pg->info.history.same_since << " > " << m->get_epoch() << dendl;
+ << pg->info.history.same_acting_since << " > " << m->get_epoch() << dendl;
pg->unlock();
}
}
// same pg?
// if pg changes _at all_, we reset and repeer!
- if (op->map_epoch < pg->info.history.same_since) {
+ if (op->map_epoch < pg->info.history.same_acting_since) {
dout(10) << "handle_sub_op pg changed " << pg->info.history
<< " after " << op->map_epoch
<< ", dropping" << dendl;
{
epoch_t first_epoch = 0;
epoch_t stop = MAX(1, info.history.last_epoch_started);
- epoch_t last_epoch = info.history.same_since - 1;
+ epoch_t last_epoch = info.history.same_acting_since - 1;
dout(10) << "generate_past_intervals over epochs " << stop << "-" << last_epoch << dendl;
bool some_down = false;
// generate past intervals, if we don't have them.
- if (info.history.same_since > info.history.last_epoch_started &&
+ if (info.history.same_acting_since > info.history.last_epoch_started &&
(past_intervals.empty() ||
past_intervals.begin()->first > info.history.last_epoch_started))
generate_past_intervals();
return;
}
+ // do i need a backlog for an up peer excluded from acting?
+ bool need_backlog = false;
+ for (unsigned i=0; i<up.size(); i++) {
+ int o = up[i];
+ if (o == osd->whoami || is_acting(o))
+ continue;
+ Info& pi = peer_info[o];
+ if (pi.last_update < log.tail && !log.backlog) {
+ dout(10) << "must generate backlog for !acting peer osd" << o
+ << " whose last_update " << pi.last_update << " < my log.tail " << log.tail << dendl;
+ need_backlog = true;
+ }
+ }
+ if (need_backlog)
+ osd->queue_generate_backlog(this);
+
/** COLLECT MISSING+LOG FROM PEERS **********/
/*
// -- do need to notify the monitor?
if (must_notify_mon) {
- if (osd->osdmap->get_up_thru(osd->whoami) < info.history.same_since) {
+ if (osd->osdmap->get_up_thru(osd->whoami) < info.history.same_acting_since) {
dout(10) << "up_thru " << osd->osdmap->get_up_thru(osd->whoami)
- << " < same_since " << info.history.same_since
+ << " < same_since " << info.history.same_acting_since
<< ", must notify monitor" << dendl;
need_up_thru = true;
- osd->queue_want_up_thru(info.history.same_since);
+ osd->queue_want_up_thru(info.history.same_acting_since);
return;
} else {
dout(10) << "up_thru " << osd->osdmap->get_up_thru(osd->whoami)
- << " >= same_since " << info.history.same_since
+ << " >= same_since " << info.history.same_acting_since
<< ", all is well" << dendl;
}
}
{
dout(7) << "sub_op_scrub_reply" << dendl;
- if (op->map_epoch < info.history.same_primary_since) {
+ if (op->map_epoch < info.history.same_acting_since) {
dout(10) << "sub_op_scrub discarding old sub_op from "
- << op->map_epoch << " < " << info.history.same_primary_since << dendl;
+ << op->map_epoch << " < " << info.history.same_acting_since << dendl;
delete op;
return;
}
osd->map_lock.get_read();
lock();
- epoch_t epoch = info.history.same_since;
+ epoch_t epoch = info.history.same_acting_since;
if (!is_primary()) {
dout(10) << "scrub -- not primary" << dendl;
/*
lock();
- if (epoch != info.history.same_since) {
+ if (epoch != info.history.same_acting_since) {
dout(10) << "scrub pg changed, aborting" << dendl;
goto out;
}
<< " maps, waiting" << dendl;
wait();
- if (epoch != info.history.same_since ||
+ if (epoch != info.history.same_acting_since ||
osd->is_stopping()) {
dout(10) << "scrub pg changed, aborting" << dendl;
goto out;
/*
lock();
- if (epoch != info.history.same_since) {
+ if (epoch != info.history.same_acting_since) {
dout(10) << "scrub pg changed, aborting" << dendl;
goto out;
}
/*
lock();
- if (epoch != info.history.same_since) {
+ if (epoch != info.history.same_acting_since) {
dout(10) << "scrub pg changed, aborting" << dendl;
goto out;
}
epoch_t epoch_created; // epoch in which PG was created
epoch_t last_epoch_started; // lower bound on last epoch started (anywhere, not necessarily locally)
- epoch_t same_since; // same acting set since
+ epoch_t same_up_since; // same acting set since
+ epoch_t same_acting_since; // same acting set since
epoch_t same_primary_since; // same primary at least back through this epoch.
History() :
epoch_created(0),
last_epoch_started(0),
- same_since(0), same_primary_since(0) {}
+ same_up_since(0), same_acting_since(0), same_primary_since(0) {}
void merge(const History &other) {
if (epoch_created < other.epoch_created)
void encode(bufferlist &bl) const {
::encode(epoch_created, bl);
::encode(last_epoch_started, bl);
- ::encode(same_since, bl);
+ ::encode(same_up_since, bl);
+ ::encode(same_acting_since, bl);
::encode(same_primary_since, bl);
}
- void decode(bufferlist::iterator &bl) {
+ void decode(bufferlist::iterator &bl, int v=0) {
::decode(epoch_created, bl);
::decode(last_epoch_started, bl);
- ::decode(same_since, bl);
+ if (v && v >= 20)
+ ::decode(same_up_since, bl);
+ ::decode(same_acting_since, bl);
::decode(same_primary_since, bl);
}
} history;
bool dne() const { return history.epoch_created == 0; }
void encode(bufferlist &bl) const {
- __u8 v = CEPH_OSD_ONDISK_VERSION;
+ __u8 v = 20;
::encode(v, bl);
::encode(pgid, bl);
::encode(snap_trimq, bl);
}
void decode(bufferlist::iterator &bl) {
- __u8 v = CEPH_OSD_ONDISK_VERSION;
+ __u8 v;
::decode(v, bl);
::decode(pgid, bl);
::decode(log_tail, bl);
::decode(log_backlog, bl);
::decode(stats, bl);
- history.decode(bl);
+ history.decode(bl, v);
::decode(snap_trimq, bl);
}
};
{
return out << "ec=" << h.epoch_created
<< " les=" << h.last_epoch_started
- << " " << h.same_since << "/" << h.same_primary_since;
+ << " " << h.same_up_since << "/" << h.same_acting_since;
}
inline ostream& operator<<(ostream& out, const PG::Info& pgi)