From: Sage Weil Date: Sat, 5 May 2012 18:24:57 +0000 (-0700) Subject: osd: do not mark pg clean until active is durable X-Git-Tag: v0.47~24^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=8ec476e526c14d229d05887241a7dff02c751e90;p=ceph.git osd: do not mark pg clean until active is durable Do not mark a PG CLEAN or set last_epoch_clean until after the PG activate is stable on all replicas. This effectively means that last_epoch_clean will never fall in an interval that follows last_epoch_started's interval. It *can* be > last_epoch_started when it falls within the same interval. Signed-off-by: Sage Weil --- diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index fdceb67064a0..48d8db53e53e 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -5065,7 +5065,6 @@ void OSD::do_recovery(PG *pg) recovery_wq.lock(); pg->recovery_item.remove_myself(); // sigh... recovery_wq.unlock(); - } } @@ -5073,6 +5072,8 @@ void OSD::do_recovery(PG *pg) do_queries(query_map); do_infos(info_map); + pg->write_if_dirty(*t); + if (!t->empty()) { int tr = store->queue_transaction(&pg->osr, t, new ObjectStore::C_DeleteTransaction(t), fin); assert(tr == 0); @@ -5080,7 +5081,6 @@ void OSD::do_recovery(PG *pg) delete t; delete fin; } - pg->unlock(); } pg->put(); diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 5d23cdff70b6..ba97b968930d 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -1521,9 +1521,17 @@ void PG::all_activated_and_committed() assert(peer_activated.size() == acting.size()); info.history.last_epoch_started = get_osdmap()->get_epoch(); - share_pg_info(); - dirty_info = true; + + // make sure CLEAN is marked if we've been clean in this interval + if (info.last_complete == info.last_update && + !state_test(PG_STATE_BACKFILL) && + !state_test(PG_STATE_RECOVERING)) { + mark_clean(); + } + + share_pg_info(); + update_stats(); } void PG::queue_snap_trim() @@ -1555,30 +1563,38 @@ struct C_PG_FinishRecovery : public Context { } }; -void PG::finish_recovery(ObjectStore::Transaction& t, list& tfin) +void PG::mark_clean() { - dout(10) << "finish_recovery" << dendl; - state_clear(PG_STATE_BACKFILL); - state_clear(PG_STATE_RECOVERING); - // only mark CLEAN if we have the desired number of replicas AND we // are not remapped. if (acting.size() == get_osdmap()->get_pg_size(info.pgid) && up == acting) state_set(PG_STATE_CLEAN); - assert(info.last_complete == info.last_update); - // NOTE: this is actually a bit premature: we haven't purged the // strays yet. info.history.last_epoch_clean = get_osdmap()->get_epoch(); - share_pg_info(); - - clear_recovery_state(); trim_past_intervals(); - - write_info(t); + + dirty_info = true; +} + +void PG::finish_recovery(ObjectStore::Transaction& t, list& tfin) +{ + dout(10) << "finish_recovery" << dendl; + assert(info.last_complete == info.last_update); + + state_clear(PG_STATE_BACKFILL); + state_clear(PG_STATE_RECOVERING); + + // only mark CLEAN if last_epoch_started is already stable. + if (info.history.last_epoch_started >= info.history.same_interval_since) { + mark_clean(); + share_pg_info(); + } + + clear_recovery_state(); /* * sync all this before purging strays. but don't block! diff --git a/src/osd/PG.h b/src/osd/PG.h index 027e386dfdef..eec356775bef 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -638,6 +638,7 @@ public: bool needs_recovery() const; + void mark_clean(); ///< mark an active pg clean void generate_past_intervals(); void trim_past_intervals(); void build_prior(std::auto_ptr &prior_set);