pg_t pgid = it->first;
PG *pg = it->second;
- // get new acting set
- vector<int> tup, tacting;
- osdmap->pg_to_up_acting_osds(pgid, tup, tacting);
- int role = osdmap->calc_pg_role(whoami, tacting, tacting.size());
-
pg->lock();
-
- // adjust removed_snaps?
- if (pg->is_active() &&
- !pg->pool->newly_removed_snaps.empty()) {
- pg->snap_trimq.union_of(pg->pool->newly_removed_snaps);
- dout(10) << *pg << " snap_trimq now " << pg->snap_trimq << dendl;
- pg->dirty_info = true;
- }
-
- // no change?
- if (tacting == pg->acting && tup == pg->up) {
- if ((pg->prior_set.get() == NULL) || (!pg->prior_set_affected(osdmap))) {
- dout(15) << *pg << " unaffected with "
- << tup << "/" << tacting << " up/acting" << dendl;
- pg->unlock();
- continue;
- }
+ if (pg->handle_advance_map(*osdmap, *lastmap)) {
+ pg->unlock();
+ continue;
}
-
- // -- there was a change! --
- int oldrole = pg->get_role();
- int oldprimary = pg->get_primary();
- vector<int> oldacting = pg->acting;
- vector<int> oldup = pg->up;
-
+
// make sure we clear out any pg_temp change requests
pg_temp_wanted.erase(pgid);
-
- pg->kick();
pg->prior_set.reset(NULL);
- // update PG
- pg->acting.swap(tacting);
- pg->up.swap(tup);
- pg->set_role(role);
- // did acting, up, primary|acker change?
- if (tacting != pg->acting || tup != pg->up) {
- // remember past interval
- PG::Interval& i = pg->past_intervals[pg->info.history.same_acting_since];
- i.first = pg->info.history.same_acting_since;
- i.last = osdmap->get_epoch() - 1;
-
- i.acting = oldacting;
- i.up = oldup;
- if (tacting != pg->acting)
- pg->info.history.same_acting_since = osdmap->get_epoch();
- if (tup != pg->up)
- pg->info.history.same_up_since = osdmap->get_epoch();
-
- if (i.acting.size())
- i.maybe_went_rw =
- (lastmap->get_up_thru(i.acting[0]) >= i.first &&
- lastmap->get_up_from(i.acting[0]) <= i.first) ||
- i.first == pg->info.history.epoch_created;
- else
- i.maybe_went_rw = 0;
-
- if (oldprimary != pg->get_primary())
- pg->info.history.same_primary_since = osdmap->get_epoch();
-
- dout(10) << *pg << " noting past " << i << dendl;
- pg->dirty_info = true;
- }
pg->cancel_recovery();
- // deactivate.
- pg->state_clear(PG_STATE_ACTIVE);
- pg->state_clear(PG_STATE_DOWN);
- pg->state_clear(PG_STATE_PEERING); // we'll need to restart peering
- pg->state_clear(PG_STATE_DEGRADED);
- pg->state_clear(PG_STATE_REPLAY);
-
- if (pg->is_primary()) {
- if (osdmap->get_pg_size(pg->info.pgid) != pg->acting.size())
- pg->state_set(PG_STATE_DEGRADED);
- }
-
- // reset primary state?
- if (oldrole == 0 || pg->get_role() == 0)
- pg->clear_primary_state();
-
- dout(10) << *pg
- << " up " << oldup << " -> " << pg->up
- << ", acting " << oldacting << " -> " << pg->acting
- << ", role " << oldrole << " -> " << role << dendl;
-
- // pg->on_*
- for (unsigned i=0; i<oldacting.size(); i++)
- if (osdmap->is_down(oldacting[i]))
- pg->on_osd_failure(oldacting[i]);
- pg->on_change();
-
- if (pg->deleting) {
- dout(10) << *pg << " canceling deletion!" << dendl;
- pg->deleting = false;
- remove_wq.dequeue(pg);
- }
-
- if (role != oldrole) {
- // old primary?
- if (oldrole == 0) {
- pg->state_clear(PG_STATE_CLEAN);
- pg->clear_stats();
-
- // take replay queue waiters
- list<Message*> ls;
- for (map<eversion_t,MOSDOp*>::iterator it = pg->replay_queue.begin();
- it != pg->replay_queue.end();
- it++)
- ls.push_back(it->second);
- pg->replay_queue.clear();
- take_waiters(ls);
- }
-
- pg->on_role_change();
-
- // interrupt backlog generation
- cancel_generate_backlog(pg);
-
- // take active waiters
- take_waiters(pg->waiting_for_active);
-
- // new primary?
- if (role == 0) {
- // i am new primary
- pg->state_clear(PG_STATE_STRAY);
- } else {
- // i am now replica|stray. we need to send a notify.
- pg->state_set(PG_STATE_STRAY);
- pg->have_master_log = false;
- }
-
- } else {
- // no role change.
- // did primary change?
- if (pg->get_primary() != oldprimary) {
- // we need to announce
- pg->state_set(PG_STATE_STRAY);
-
- dout(10) << *pg << " " << oldacting << " -> " << pg->acting
- << ", acting primary "
- << oldprimary << " -> " << pg->get_primary()
- << dendl;
- } else {
- // primary is the same.
- if (role == 0) {
- // i am (still) primary. but my replica set changed.
- pg->state_clear(PG_STATE_CLEAN);
-
- dout(10) << *pg << " " << oldacting << " -> " << pg->acting
- << ", replicas changed" << dendl;
- }
- }
- }
// sanity check pg_temp
if (pg->acting.empty() && pg->up.size() && pg->up[0] == whoami) {
}
}
+bool PG::handle_advance_map(OSDMap &osdmap,
+ const OSDMap &lastmap)
+{
+ if (acting_up_affected(osdmap, lastmap)) {
+ return true;
+ } else {
+ warm_restart();
+ return false;
+ }
+}
+
+bool PG::acting_up_affected(OSDMap &osdmap,
+ const OSDMap &lastmap)
+{
+ // get new acting set
+ vector<int> tup, tacting;
+ osdmap.pg_to_up_acting_osds(info.pgid, tup, tacting);
+
+ if (acting != tacting || up != tup) {
+ return true;
+ } else {
+ return false;
+ }
+}
+
+/* Called before initializing peering during advance_map */
+void PG::warm_restart()
+{
+ OSDMap &lastmap = *osd->get_map(osd->osdmap->get_epoch() - 1);
+ OSDMap &osdmap = *osd->osdmap;
+ // -- there was a change! --
+ kick();
+
+ int oldrole = get_role();
+ int oldprimary = get_primary();
+
+ vector<int> oldacting, oldup; //About to get swapped with current (old)
+ osdmap.pg_to_up_acting_osds(info.pgid, oldup, oldacting);
+
+ // update PG
+ acting.swap(oldacting);
+ up.swap(oldup);
+
+ int role = osdmap.calc_pg_role(osd->whoami, acting, acting.size());
+ set_role(role);
+
+ // did acting, up, primary|acker change?
+ if (acting != oldacting || up != oldup) {
+ // remember past interval
+ PG::Interval& i = past_intervals[info.history.same_acting_since];
+ i.first = info.history.same_acting_since;
+ i.last = osdmap.get_epoch() - 1;
+ i.acting = oldacting;
+ i.up = oldup;
+
+ if (oldacting != acting) {
+ info.history.same_acting_since = osdmap.get_epoch();
+ }
+ if (oldup != up) {
+ info.history.same_up_since = osdmap.get_epoch();
+ }
+
+ if (i.acting.size()) {
+ i.maybe_went_rw =
+ (lastmap.get_up_thru(i.acting[0]) >= i.first &&
+ lastmap.get_up_from(i.acting[0]) <= i.first) ||
+ i.first == info.history.epoch_created;
+ } else {
+ i.maybe_went_rw = 0;
+ }
+
+ if (oldprimary != get_primary()) {
+ info.history.same_primary_since = osdmap.get_epoch();
+ }
+
+ dout(10) << *this << " noting past " << i << dendl;
+ dirty_info = true;
+ }
+
+ dout(10) << " up " << oldup << " -> " << up
+ << ", acting " << oldacting << " -> " << acting
+ << ", role " << oldrole << " -> " << role << dendl;
+
+ // deactivate.
+ state_clear(PG_STATE_ACTIVE);
+ state_clear(PG_STATE_DOWN);
+ state_clear(PG_STATE_PEERING); // we'll need to restart peering
+ state_clear(PG_STATE_DEGRADED);
+ state_clear(PG_STATE_REPLAY);
+ state_clear(PG_STATE_CRASHED);
+
+ osd->cancel_generate_backlog(this);
+
+ peer_missing.clear();
+
+ if (is_primary()) {
+ if (osdmap.get_pg_size(info.pgid) != acting.size())
+ state_set(PG_STATE_DEGRADED);
+ }
+
+ // reset primary state?
+ if (oldrole == 0 || get_role() == 0)
+ clear_primary_state();
+
+
+ // pg->on_*
+ /* TODO on_osd_failure does NOTHING! */
+#if 0
+ for (unsigned i=0; i<oldacting.size(); i++)
+ if (osdmap.is_down(oldacting[i]))
+ ->on_osd_failure(oldacting[i]);
+#endif
+
+ on_change();
+
+ if (deleting) {
+ dout(10) << *this << " canceling deletion!" << dendl;
+ deleting = false;
+ osd->remove_wq.dequeue(this);
+ }
+
+ if (role != oldrole) {
+ // old primary?
+ if (oldrole == 0) {
+ state_clear(PG_STATE_CLEAN);
+ clear_stats();
+
+ // take replay queue waiters
+ list<Message*> ls;
+ for (map<eversion_t,MOSDOp*>::iterator it = replay_queue.begin();
+ it != replay_queue.end();
+ it++)
+ ls.push_back(it->second);
+ replay_queue.clear();
+ osd->take_waiters(ls);
+ }
+
+ on_role_change();
+
+ // take active waiters
+ osd->take_waiters(waiting_for_active);
+
+ // new primary?
+ if (role == 0) {
+ // i am new primary
+ state_clear(PG_STATE_STRAY);
+ } else {
+ // i am now replica|stray. we need to send a notify.
+ state_set(PG_STATE_STRAY);
+ have_master_log = false;
+ }
+
+ } else {
+ // no role change.
+ // did primary change?
+ if (get_primary() != oldprimary) {
+ // we need to announce
+ state_set(PG_STATE_STRAY);
+
+ dout(10) << *this << " " << oldacting << " -> " << acting
+ << ", acting primary "
+ << oldprimary << " -> " << get_primary()
+ << dendl;
+ } else {
+ // primary is the same.
+ if (role == 0) {
+ // i am (still) primary. but my replica set changed.
+ state_clear(PG_STATE_CLEAN);
+
+ dout(10) << oldacting << " -> " << acting
+ << ", replicas changed" << dendl;
+ }
+ }
+ }
+ // make sure we clear out any pg_temp change requests
+ osd->pg_temp_wanted.erase(info.pgid);
+ cancel_recovery();
+
+ if (acting.empty() && up.size() && up[0] == osd->whoami) {
+ dout(10) << " acting empty, but i am up[0], clearing pg_temp" << dendl;
+ osd->queue_want_pg_temp(info.pgid, acting);
+ }
+}
+
unsigned int PG::Missing::num_missing() const
{
return missing.size();
void share_pg_info();
void share_pg_log(const eversion_t &oldver);
+ void warm_restart();
+
+ bool acting_up_affected(OSDMap &osdmap,
+ const OSDMap &lastmap);
// abstract bits
+ virtual bool handle_advance_map(OSDMap &osdmap,
+ const OSDMap &lastmap);
virtual void do_op(MOSDOp *op) = 0;
virtual void do_sub_op(MOSDSubOp *op) = 0;
virtual void do_sub_op_reply(MOSDSubOpReply *op) = 0;