PG *pg = _create_lock_pg(pgid, t);
pg->set_role(0);
pg->acting.swap(acting);
- pg->last_epoch_started_any =
- pg->info.history.epoch_created =
+ pg->info.history.epoch_created =
pg->info.history.last_epoch_started =
pg->info.history.same_since =
pg->info.history.same_primary_since =
pg->acting.swap(acting);
pg->set_role(role);
pg->info.history = history;
- pg->last_epoch_started_any = history.last_epoch_started; // _after_ clear_primary_state()
pg->clear_primary_state(); // yep, notably, set hml=false
pg->build_prior();
pg->write_log(t);
// ok!
dout(10) << *pg << " osd" << from << " " << *it << dendl;
+ pg->info.history.merge(it->history);
// stray?
bool acting = pg->is_acting(from);
- if (!acting && (*it).history.last_epoch_started > 0) {
+ if (!acting && (*it).last_update > eversion_t()) { // FIXME is this right?
dout(10) << *pg << " osd" << from << " has stray content: " << *it << dendl;
pg->stray_set.insert(from);
pg->state_clear(PG_STATE_CLEAN);
if (pg->is_all_uptodate())
pg->finish_recovery();
} else {
- if (it->history.last_epoch_started > pg->last_epoch_started_any)
- pg->adjust_prior();
+ pg->adjust_prior();
pg->peer(t, query_map, &info_map);
}
assert(pg);
dout(10) << *pg << " got " << info << " " << log << " " << missing << dendl;
+ pg->info.history.merge(info.history);
//m->log.print(cout);
do_queries(query_map);
} else {
- // i am REPLICA
- // merge log
- pg->merge_log(log, missing, from);
- pg->proc_missing(log, missing, from);
- assert(pg->missing.num_lost() == 0);
-
- // ok activate!
- pg->activate(t, info_map);
+ if (!pg->info.dne()) {
+ // i am REPLICA
+ // merge log
+ pg->merge_log(log, missing, from);
+ pg->proc_missing(log, missing, from);
+ assert(pg->missing.num_lost() == 0);
+
+ // ok activate!
+ pg->activate(t, info_map);
+ }
}
unsigned tr = store->apply_transaction(t);
}
}
+ pg->info.history.merge(it->second.history);
+
// ok, process query!
assert(!pg->acting.empty());
assert(from == pg->acting[0]);
// for each acting set, we need to know same_since and last_epoch
epoch_t first_epoch = info.history.same_since;
epoch_t last_epoch = first_epoch - 1;
- epoch_t stop = MAX(1, last_epoch_started_any);
+ epoch_t stop = MAX(1, info.history.last_epoch_started);
dout(10) << "build_prior considering interval " << first_epoch << " down to " << stop << dendl;
OSDMap *nextmap = new OSDMap;
{
assert(!prior_set.empty());
- // raise last_epoch_started_any
+ // raise last_epoch_started
epoch_t max = 0;
for (map<int,Info>::iterator it = peer_info.begin();
it != peer_info.end();
max = it->second.history.last_epoch_started;
}
- dout(10) << "adjust_prior last_epoch_started_any "
- << last_epoch_started_any << " -> " << max << dendl;
- assert(max > last_epoch_started_any);
- last_epoch_started_any = max;
+ dout(10) << "adjust_prior last_epoch_started "
+ << info.history.last_epoch_started << " -> " << max << dendl;
+ assert(max > info.history.last_epoch_started);
+ info.history.last_epoch_started = max;
// rebuild prior set
build_prior();
peer_missing.clear();
stat_object_temp_rd.clear();
-
- last_epoch_started_any = info.history.last_epoch_started;
}
void PG::peer(ObjectStore::Transaction& t,
// -- ok, we have all (prior_set) info. (and maybe others.)
// did we crash?
- dout(10) << " last_epoch_started_any " << last_epoch_started_any << dendl;
- if (last_epoch_started_any) {
+ dout(10) << " last_epoch_started " << info.history.last_epoch_started << dendl;
+ if (info.history.last_epoch_started) {
OSDMap omap;
- osd->get_map(last_epoch_started_any, omap);
+ osd->get_map(info.history.last_epoch_started, omap);
// start with the last active set of replicas
set<int> last_started;
last_started.insert(acting[i]);
// make sure at least one of them is still up
- for (epoch_t e = last_epoch_started_any+1;
+ for (epoch_t e = info.history.last_epoch_started+1;
e <= osd->osdmap->get_epoch();
e++) {
OSDMap omap;
if (last_started.empty()) {
if (cleanly_down) {
- dout(10) << " cleanly stopped since epoch " << last_epoch_started_any << dendl;
+ dout(10) << " cleanly stopped since epoch " << info.history.last_epoch_started << dendl;
} else {
- dout(10) << " crashed since epoch " << last_epoch_started_any << dendl;
+ dout(10) << " crashed since epoch " << info.history.last_epoch_started << dendl;
state_set(PG_STATE_CRASHED);
}
} else {
dout(10) << " still active from last started: " << last_started << dendl;
}
} else if (osd->osdmap->get_epoch() > info.history.epoch_created) { // FIXME hrm is htis right?
- dout(10) << " crashed since epoch " << last_epoch_started_any << dendl;
+ dout(10) << " crashed since epoch " << info.history.last_epoch_started << dendl;
state_set(PG_STATE_CRASHED);
}
state_clear(PG_STATE_CRASHED);
state_clear(PG_STATE_REPLAY);
}
- last_epoch_started_any = info.history.last_epoch_started = osd->osdmap->get_epoch();
+ info.history.last_epoch_started = osd->osdmap->get_epoch();
if (role == 0) { // primary state
peers_complete_thru = eversion_t(0,0); // we don't know (yet)!
bool log_backlog; // do we store a complete log?
struct History {
- epoch_t epoch_created; // epoch in which it was created
- epoch_t last_epoch_started; // last epoch started.
+ epoch_t epoch_created; // epoch in which PG was created
+ epoch_t last_epoch_started; // lower bound on last epoch started (anywhere, not necessarily locally)
epoch_t same_since; // same acting set since
epoch_t same_primary_since; // same primary at least back through this epoch.
epoch_created(0),
last_epoch_started(0),
same_since(0), same_primary_since(0), same_acker_since(0) {}
+
+ void merge(const History &other) {
+ if (epoch_created < other.epoch_created)
+ epoch_created = other.epoch_created;
+ if (last_epoch_started < other.last_epoch_started)
+ last_epoch_started = other.last_epoch_started;
+ }
+
void encode(bufferlist &bl) const {
::encode(epoch_created, bl);
::encode(last_epoch_started, bl);
// primary state
public:
vector<int> acting;
- epoch_t last_epoch_started_any;
eversion_t last_complete_commit;
// [primary only] content recovery state
eversion_t peers_complete_thru;
bool have_master_log;
protected:
- set<int> prior_set; // current+prior OSDs, as defined by last_epoch_started_any.
+ set<int> prior_set; // current+prior OSDs, as defined by info.history.last_epoch_started.
bool must_notify_mon;
set<int> stray_set; // non-acting osds that have PG data.
set<int> uptodate_set; // current OSDs that are uptodate
bool is_all_uptodate() const { return uptodate_set.size() == acting.size(); }
void build_prior();
- void adjust_prior(); // based on new peer_info.last_epoch_started_any
+ void adjust_prior(); // based on new peer_info.last_epoch_started
bool adjust_peers_complete_thru() {
eversion_t t = info.last_complete;
info(p),
role(0),
state(0),
- last_epoch_started_any(0),
have_master_log(true),
must_notify_mon(false),
stat_num_bytes(0), stat_num_blocks(0)