From: Sage Weil Date: Fri, 21 Oct 2011 21:44:56 +0000 (-0700) Subject: osd: eliminate CRASHED state X-Git-Tag: v0.38~57^2~26 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=cf6a9404f0873f939dacbd118ca26384c07b0661;p=ceph.git osd: eliminate CRASHED state This was an intermediate state that indicated that replay would be needed. It was poorly named, and not very useful. Instead, just set the REPLAY bit if we need replay, and then do it. No need for a separate CRASHED. Signed-off-by: Sage Weil --- diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 2d8a0ba0e4de..81e325df1f43 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -1045,7 +1045,7 @@ void PG::build_prior(std::auto_ptr &prior_set) PriorSet &prior(*prior_set.get()); if (prior.crashed) { - state_set(PG_STATE_CRASHED); + state_set(PG_STATE_REPLAY); } if (prior.pg_down) { state_set(PG_STATE_DOWN); @@ -1327,19 +1327,20 @@ void PG::activate(ObjectStore::Transaction& t, list& tfin, map *activator_map) { assert(!is_active()); + // -- crash recovery? - if (is_crashed()) { + if (is_replay()) { if (g_conf->osd_replay_window > 0) { replay_until = ceph_clock_now(g_ceph_context); replay_until += g_conf->osd_replay_window; dout(10) << "crashed, allowing op replay for " << g_conf->osd_replay_window << " until " << replay_until << dendl; - state_set(PG_STATE_REPLAY); osd->replay_queue_lock.Lock(); osd->replay_queue.push_back(pair(info.pgid, replay_until)); osd->replay_queue_lock.Unlock(); } else { dout(10) << "crashed, but osd_replay_window=0. skipping replay." << dendl; + state_clear(PG_STATE_REPLAY); } } @@ -1348,7 +1349,6 @@ void PG::activate(ObjectStore::Transaction& t, list& tfin, state_clear(PG_STATE_STRAY); state_clear(PG_STATE_DOWN); state_clear(PG_STATE_PEERING); - state_clear(PG_STATE_CRASHED); if (is_primary() && osd->osdmap->get_pg_size(info.pgid) != acting.size()) state_set(PG_STATE_DEGRADED); @@ -1534,7 +1534,7 @@ void PG::activate(ObjectStore::Transaction& t, list& tfin, void PG::replay_queued_ops() { - assert(is_replay() && is_active() && !is_crashed()); + assert(is_replay() && is_active()); eversion_t c = info.last_update; list replay; dout(10) << "replay_queued_ops" << dendl; @@ -1556,7 +1556,7 @@ void PG::replay_queued_ops() replay_queue.clear(); osd->requeue_ops(this, replay); osd->requeue_ops(this, waiting_for_active); - state_clear(PG_STATE_REPLAY); + update_stats(); } @@ -3430,7 +3430,6 @@ void PG::start_peering_interval(const OSDMap *lastmap, state_clear(PG_STATE_PEERING); // we'll need to restart peering state_clear(PG_STATE_DEGRADED); state_clear(PG_STATE_REPLAY); - state_clear(PG_STATE_CRASHED); osd->cancel_generate_backlog(this); diff --git a/src/osd/PG.h b/src/osd/PG.h index 03937844c2c0..aee0447c3fb7 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -1592,7 +1592,6 @@ public: int get_state() const { return state; } bool is_active() const { return state_test(PG_STATE_ACTIVE); } bool is_peering() const { return state_test(PG_STATE_PEERING); } - bool is_crashed() const { return state_test(PG_STATE_CRASHED); } bool is_down() const { return state_test(PG_STATE_DOWN); } bool is_replay() const { return state_test(PG_STATE_REPLAY); } bool is_clean() const { return state_test(PG_STATE_CLEAN); } diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index 292836e6fb3c..59f764050aa0 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -170,8 +170,6 @@ std::string pg_state_string(int state) oss << "active+"; if (state & PG_STATE_CLEAN) oss << "clean+"; - if (state & PG_STATE_CRASHED) - oss << "crashed+"; if (state & PG_STATE_DOWN) oss << "down+"; if (state & PG_STATE_REPLAY) diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 06da61123116..5ea7295afd58 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -492,7 +492,6 @@ inline ostream& operator<<(ostream& out, const osd_stat_t& s) { #define PG_STATE_CREATING (1<<0) // creating #define PG_STATE_ACTIVE (1<<1) // i am active. (primary: replicas too) #define PG_STATE_CLEAN (1<<2) // peers are complete, clean of stray replicas. -#define PG_STATE_CRASHED (1<<3) // all replicas went down, clients needs to replay #define PG_STATE_DOWN (1<<4) // a needed replica is down, PG offline #define PG_STATE_REPLAY (1<<5) // crashed, waiting for replay #define PG_STATE_STRAY (1<<6) // i must notify the primary i exist.