From 8b33d60fc483dae8e82fde7b17e2032a4bce8c0f Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Wed, 12 Feb 2014 10:53:13 -0800 Subject: [PATCH] PG: allow PGBackend to set criteria for PG up-ness ECBackend needs to be able to require that a readable set of the most recent interval to write be available in order to ensure that it rolls back the log far enough. Signed-off-by: Samuel Just --- src/osd/PG.cc | 28 +++++++++++++++++----------- src/osd/PG.h | 2 ++ src/osd/PGBackend.h | 10 +++++++++- src/osd/ReplicatedBackend.h | 10 ++++++++++ 4 files changed, 38 insertions(+), 12 deletions(-) diff --git a/src/osd/PG.cc b/src/osd/PG.cc index f2d09f0690514..398c1f5574d16 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -749,6 +749,7 @@ void PG::build_prior(std::auto_ptr &prior_set) prior_set.reset( new PriorSet( pool.info.ec_pool(), + get_pgbackend()->get_is_recoverable_predicate(), *get_osdmap(), past_intervals, up, @@ -1245,13 +1246,16 @@ bool PG::choose_acting(pg_shard_t &auth_log_shard_id) return false; } - /* Check whether we have enough acting shards to perform reads */ - boost::scoped_ptr readable_dec( - get_pgbackend()->get_is_readable_predicate()); + /* Check whether we have enough acting shards to later perform recovery */ + boost::scoped_ptr recoverable_predicate( + get_pgbackend()->get_is_recoverable_predicate()); set have; for (int i = 0; i < (int)want.size(); ++i) - have.insert(pg_shard_t(acting[i], i)); - if (!(*readable_dec)(have)) { + have.insert( + pg_shard_t( + want[i], + pool.info.ec_pool() ? i : ghobject_t::NO_SHARD)); + if (!(*recoverable_predicate)(have)) { want_acting.clear(); return false; } @@ -7126,13 +7130,14 @@ void PG::RecoveryState::RecoveryMachine::log_exit(const char *state_name, utime_ #define dout_prefix (*_dout << (debug_pg ? debug_pg->gen_prefix() : string()) << " PriorSet: ") PG::PriorSet::PriorSet(bool ec_pool, + PGBackend::IsRecoverablePredicate *c, const OSDMap &osdmap, const map &past_intervals, const vector &up, const vector &acting, const pg_info_t &info, const PG *debug_pg) - : ec_pool(ec_pool), pg_down(false) + : ec_pool(ec_pool), pg_down(false), pcontdec(c) { /* * We have to be careful to gracefully deal with situations like @@ -7210,7 +7215,7 @@ PG::PriorSet::PriorSet(bool ec_pool, // look at candidate osds during this interval. each falls into // one of three categories: up, down (but potentially // interesting), or lost (down, but we won't wait for it). - bool any_up_now = false; // any candidates up now + set up_now; bool any_down_now = false; // any candidates down now (that might have useful data) // consider ACTING osds @@ -7227,7 +7232,7 @@ PG::PriorSet::PriorSet(bool ec_pool, if (osdmap.is_up(o)) { // include past acting osds if they are up. probe.insert(so); - any_up_now = true; + up_now.insert(so); } else if (!pinfo) { dout(10) << "build_prior prior osd." << o << " no longer exists" << dendl; down.insert(o); @@ -7241,12 +7246,13 @@ PG::PriorSet::PriorSet(bool ec_pool, } } - // if nobody survived this interval, and we may have gone rw, + // if not enough osds survived this interval, and we may have gone rw, // then we need to wait for one of those osds to recover to // ensure that we haven't lost any information. - if (!any_up_now && any_down_now) { + if (!(*pcontdec)(up_now) && any_down_now) { // fixme: how do we identify a "clean" shutdown anyway? - dout(10) << "build_prior possibly went active+rw, none up; including down osds" << dendl; + dout(10) << "build_prior possibly went active+rw, insufficient up;" + << " including down osds" << dendl; for (vector::const_iterator i = interval.acting.begin(); i != interval.acting.end(); ++i) { diff --git a/src/osd/PG.h b/src/osd/PG.h index 6746e010fa67b..769f55f33794b 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -350,7 +350,9 @@ public: map blocked_by; /// current lost_at values for any OSDs in cur set for which (re)marking them lost would affect cur set bool pg_down; /// some down osds are included in @a cur; the DOWN pg state bit should be set. + boost::scoped_ptr pcontdec; PriorSet(bool ec_pool, + PGBackend::IsRecoverablePredicate *c, const OSDMap &osdmap, const map &past_intervals, const vector &up, diff --git a/src/osd/PGBackend.h b/src/osd/PGBackend.h index 5332530fe9bcd..eb515217ddf29 100644 --- a/src/osd/PGBackend.h +++ b/src/osd/PGBackend.h @@ -305,7 +305,15 @@ virtual void on_flushed() = 0; - + class IsRecoverablePredicate { + public: + /** + * have encodes the shards available + */ + virtual bool operator()(const set &have) const = 0; + virtual ~IsRecoverablePredicate() {} + }; + virtual IsRecoverablePredicate *get_is_recoverable_predicate() = 0; void temp_colls(list *out) { if (temp_created) diff --git a/src/osd/ReplicatedBackend.h b/src/osd/ReplicatedBackend.h index 7f37fd285ddf2..ef2153e292d19 100644 --- a/src/osd/ReplicatedBackend.h +++ b/src/osd/ReplicatedBackend.h @@ -70,6 +70,16 @@ public: void clear_state(); void on_flushed(); + class RPCRecPred : public IsRecoverablePredicate { + public: + bool operator()(const set &have) const { + return have.size() >= 1; + } + }; + IsRecoverablePredicate *get_is_recoverable_predicate() { + return new RPCRecPred; + } + virtual void dump_recovery_info(Formatter *f) const { { f->open_array_section("pull_from_peer"); -- 2.39.5