PG: allow PGBackend to set criteria for PG up-ness

author Samuel Just <sam.just@inktank.com>

Wed, 12 Feb 2014 18:53:13 +0000 (10:53 -0800)

committer Samuel Just <sam.just@inktank.com>

Tue, 18 Feb 2014 04:12:14 +0000 (20:12 -0800)
author Samuel Just <sam.just@inktank.com>
Wed, 12 Feb 2014 18:53:13 +0000 (10:53 -0800)
committer Samuel Just <sam.just@inktank.com>
Tue, 18 Feb 2014 04:12:14 +0000 (20:12 -0800)
diff --git a/src/osd/PG.cc b/src/osd/PG.cc

index f2d09f069051483a1d32946ad4bd2c6e7e78cef5..398c1f5574d1684e2b18bc2cac165fb475b40100 100644 (file)
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -749,6 +749,7 @@ void PG::build_prior(std::auto_ptr<PriorSet> &prior_set)
    prior_set.reset(
      new PriorSet(
        pool.info.ec_pool(),
+      get_pgbackend()->get_is_recoverable_predicate(),
        *get_osdmap(),
        past_intervals,
        up,
@@ -1245,13 +1246,16 @@ bool PG::choose_acting(pg_shard_t &auth_log_shard_id)
      return false;
    }
  
-  /* Check whether we have enough acting shards to perform reads */
-  boost::scoped_ptr<PGBackend::IsReadablePredicate> readable_dec(
-    get_pgbackend()->get_is_readable_predicate());
+  /* Check whether we have enough acting shards to later perform recovery */
+  boost::scoped_ptr<PGBackend::IsRecoverablePredicate> recoverable_predicate(
+    get_pgbackend()->get_is_recoverable_predicate());
    set<pg_shard_t> have;
    for (int i = 0; i < (int)want.size(); ++i)
-    have.insert(pg_shard_t(acting[i], i));
-  if (!(*readable_dec)(have)) {
+    have.insert(
+      pg_shard_t(
+       want[i],
+       pool.info.ec_pool() ? i : ghobject_t::NO_SHARD));
+  if (!(*recoverable_predicate)(have)) {
      want_acting.clear();
      return false;
    }
@@ -7126,13 +7130,14 @@ void PG::RecoveryState::RecoveryMachine::log_exit(const char *state_name, utime_
  #define dout_prefix (*_dout << (debug_pg ? debug_pg->gen_prefix() : string()) << " PriorSet: ")
  
  PG::PriorSet::PriorSet(bool ec_pool,
+                      PGBackend::IsRecoverablePredicate *c,
                        const OSDMap &osdmap,
                        const map<epoch_t, pg_interval_t> &past_intervals,
                        const vector<int> &up,
                        const vector<int> &acting,
                        const pg_info_t &info,
                        const PG *debug_pg)
-  : ec_pool(ec_pool), pg_down(false)
+  : ec_pool(ec_pool), pg_down(false), pcontdec(c)
  {
    /*
     * We have to be careful to gracefully deal with situations like
@@ -7210,7 +7215,7 @@ PG::PriorSet::PriorSet(bool ec_pool,
      // look at candidate osds during this interval.  each falls into
      // one of three categories: up, down (but potentially
      // interesting), or lost (down, but we won't wait for it).
-    bool any_up_now = false;    // any candidates up now
+    set<pg_shard_t> up_now;
      bool any_down_now = false;  // any candidates down now (that might have useful data)
  
      // consider ACTING osds
@@ -7227,7 +7232,7 @@ PG::PriorSet::PriorSet(bool ec_pool,
        if (osdmap.is_up(o)) {
         // include past acting osds if they are up.
         probe.insert(so);
-       any_up_now = true;
+       up_now.insert(so);
        } else if (!pinfo) {
         dout(10) << "build_prior  prior osd." << o << " no longer exists" << dendl;
         down.insert(o);
@@ -7241,12 +7246,13 @@ PG::PriorSet::PriorSet(bool ec_pool,
        }
      }
  
-    // if nobody survived this interval, and we may have gone rw,
+    // if not enough osds survived this interval, and we may have gone rw,
      // then we need to wait for one of those osds to recover to
      // ensure that we haven't lost any information.
-    if (!any_up_now && any_down_now) {
+    if (!(*pcontdec)(up_now) && any_down_now) {
        // fixme: how do we identify a "clean" shutdown anyway?
-      dout(10) << "build_prior  possibly went active+rw, none up; including down osds" << dendl;
+      dout(10) << "build_prior  possibly went active+rw, insufficient up;"
+              << " including down osds" << dendl;
        for (vector<int>::const_iterator i = interval.acting.begin();
            i != interval.acting.end();
            ++i) {
diff --git a/src/osd/PG.h b/src/osd/PG.h

index 6746e010fa67bd4184257cffca71fd7c215e9e00..769f55f33794b6c2963058f735056fa79d34ae94 100644 (file)
--- a/src/osd/PG.h
+++ b/src/osd/PG.h
@@ -350,7 +350,9 @@ public:
      map<int, epoch_t> blocked_by;  /// current lost_at values for any OSDs in cur set for which (re)marking them lost would affect cur set
  
      bool pg_down;   /// some down osds are included in @a cur; the DOWN pg state bit should be set.
+    boost::scoped_ptr<PGBackend::IsRecoverablePredicate> pcontdec;
      PriorSet(bool ec_pool,
+            PGBackend::IsRecoverablePredicate *c,
              const OSDMap &osdmap,
              const map<epoch_t, pg_interval_t> &past_intervals,
              const vector<int> &up,
diff --git a/src/osd/PGBackend.h b/src/osd/PGBackend.h

index 5332530fe9bcdd35bc432855fa7116d901f2ea44..eb515217ddf292e18d978575a9d69db02cbc1036 100644 (file)
--- a/src/osd/PGBackend.h
+++ b/src/osd/PGBackend.h
@@ -305,7 +305,15 @@
  
     virtual void on_flushed() = 0;
  
-
+   class IsRecoverablePredicate {
+   public:
+     /**
+      * have encodes the shards available
+      */
+     virtual bool operator()(const set<pg_shard_t> &have) const = 0;
+     virtual ~IsRecoverablePredicate() {}
+   };
+   virtual IsRecoverablePredicate *get_is_recoverable_predicate() = 0;
  
     void temp_colls(list<coll_t> *out) {
       if (temp_created)
diff --git a/src/osd/ReplicatedBackend.h b/src/osd/ReplicatedBackend.h

index 7f37fd285ddf2815c1cc8effb6be9af326cb1898..ef2153e292d192c418a1910d2a56fe3af09a0fab 100644 (file)
--- a/src/osd/ReplicatedBackend.h
+++ b/src/osd/ReplicatedBackend.h
@@ -70,6 +70,16 @@ public:
    void clear_state();
    void on_flushed();
  
+  class RPCRecPred : public IsRecoverablePredicate {
+  public:
+    bool operator()(const set<pg_shard_t> &have) const {
+      return have.size() >= 1;
+    }
+  };
+  IsRecoverablePredicate *get_is_recoverable_predicate() {
+    return new RPCRecPred;
+  }
+
    virtual void dump_recovery_info(Formatter *f) const {
      {
        f->open_array_section("pull_from_peer");
author	Samuel Just <sam.just@inktank.com>
	Wed, 12 Feb 2014 18:53:13 +0000 (10:53 -0800)
committer	Samuel Just <sam.just@inktank.com>
	Tue, 18 Feb 2014 04:12:14 +0000 (20:12 -0800)
src/osd/PG.cc		patch \| blob \| history
src/osd/PG.h		patch \| blob \| history
src/osd/PGBackend.h		patch \| blob \| history
src/osd/ReplicatedBackend.h		patch \| blob \| history