]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
PG: allow PGBackend to set criteria for PG up-ness
authorSamuel Just <sam.just@inktank.com>
Wed, 12 Feb 2014 18:53:13 +0000 (10:53 -0800)
committerSamuel Just <sam.just@inktank.com>
Tue, 18 Feb 2014 04:12:14 +0000 (20:12 -0800)
ECBackend needs to be able to require that a readable
set of the most recent interval to write be available
in order to ensure that it rolls back the log far
enough.

Signed-off-by: Samuel Just <sam.just@inktank.com>
src/osd/PG.cc
src/osd/PG.h
src/osd/PGBackend.h
src/osd/ReplicatedBackend.h

index f2d09f069051483a1d32946ad4bd2c6e7e78cef5..398c1f5574d1684e2b18bc2cac165fb475b40100 100644 (file)
@@ -749,6 +749,7 @@ void PG::build_prior(std::auto_ptr<PriorSet> &prior_set)
   prior_set.reset(
     new PriorSet(
       pool.info.ec_pool(),
+      get_pgbackend()->get_is_recoverable_predicate(),
       *get_osdmap(),
       past_intervals,
       up,
@@ -1245,13 +1246,16 @@ bool PG::choose_acting(pg_shard_t &auth_log_shard_id)
     return false;
   }
 
-  /* Check whether we have enough acting shards to perform reads */
-  boost::scoped_ptr<PGBackend::IsReadablePredicate> readable_dec(
-    get_pgbackend()->get_is_readable_predicate());
+  /* Check whether we have enough acting shards to later perform recovery */
+  boost::scoped_ptr<PGBackend::IsRecoverablePredicate> recoverable_predicate(
+    get_pgbackend()->get_is_recoverable_predicate());
   set<pg_shard_t> have;
   for (int i = 0; i < (int)want.size(); ++i)
-    have.insert(pg_shard_t(acting[i], i));
-  if (!(*readable_dec)(have)) {
+    have.insert(
+      pg_shard_t(
+       want[i],
+       pool.info.ec_pool() ? i : ghobject_t::NO_SHARD));
+  if (!(*recoverable_predicate)(have)) {
     want_acting.clear();
     return false;
   }
@@ -7126,13 +7130,14 @@ void PG::RecoveryState::RecoveryMachine::log_exit(const char *state_name, utime_
 #define dout_prefix (*_dout << (debug_pg ? debug_pg->gen_prefix() : string()) << " PriorSet: ")
 
 PG::PriorSet::PriorSet(bool ec_pool,
+                      PGBackend::IsRecoverablePredicate *c,
                       const OSDMap &osdmap,
                       const map<epoch_t, pg_interval_t> &past_intervals,
                       const vector<int> &up,
                       const vector<int> &acting,
                       const pg_info_t &info,
                       const PG *debug_pg)
-  : ec_pool(ec_pool), pg_down(false)
+  : ec_pool(ec_pool), pg_down(false), pcontdec(c)
 {
   /*
    * We have to be careful to gracefully deal with situations like
@@ -7210,7 +7215,7 @@ PG::PriorSet::PriorSet(bool ec_pool,
     // look at candidate osds during this interval.  each falls into
     // one of three categories: up, down (but potentially
     // interesting), or lost (down, but we won't wait for it).
-    bool any_up_now = false;    // any candidates up now
+    set<pg_shard_t> up_now;
     bool any_down_now = false;  // any candidates down now (that might have useful data)
 
     // consider ACTING osds
@@ -7227,7 +7232,7 @@ PG::PriorSet::PriorSet(bool ec_pool,
       if (osdmap.is_up(o)) {
        // include past acting osds if they are up.
        probe.insert(so);
-       any_up_now = true;
+       up_now.insert(so);
       } else if (!pinfo) {
        dout(10) << "build_prior  prior osd." << o << " no longer exists" << dendl;
        down.insert(o);
@@ -7241,12 +7246,13 @@ PG::PriorSet::PriorSet(bool ec_pool,
       }
     }
 
-    // if nobody survived this interval, and we may have gone rw,
+    // if not enough osds survived this interval, and we may have gone rw,
     // then we need to wait for one of those osds to recover to
     // ensure that we haven't lost any information.
-    if (!any_up_now && any_down_now) {
+    if (!(*pcontdec)(up_now) && any_down_now) {
       // fixme: how do we identify a "clean" shutdown anyway?
-      dout(10) << "build_prior  possibly went active+rw, none up; including down osds" << dendl;
+      dout(10) << "build_prior  possibly went active+rw, insufficient up;"
+              << " including down osds" << dendl;
       for (vector<int>::const_iterator i = interval.acting.begin();
           i != interval.acting.end();
           ++i) {
index 6746e010fa67bd4184257cffca71fd7c215e9e00..769f55f33794b6c2963058f735056fa79d34ae94 100644 (file)
@@ -350,7 +350,9 @@ public:
     map<int, epoch_t> blocked_by;  /// current lost_at values for any OSDs in cur set for which (re)marking them lost would affect cur set
 
     bool pg_down;   /// some down osds are included in @a cur; the DOWN pg state bit should be set.
+    boost::scoped_ptr<PGBackend::IsRecoverablePredicate> pcontdec;
     PriorSet(bool ec_pool,
+            PGBackend::IsRecoverablePredicate *c,
             const OSDMap &osdmap,
             const map<epoch_t, pg_interval_t> &past_intervals,
             const vector<int> &up,
index 5332530fe9bcdd35bc432855fa7116d901f2ea44..eb515217ddf292e18d978575a9d69db02cbc1036 100644 (file)
 
    virtual void on_flushed() = 0;
 
-
+   class IsRecoverablePredicate {
+   public:
+     /**
+      * have encodes the shards available
+      */
+     virtual bool operator()(const set<pg_shard_t> &have) const = 0;
+     virtual ~IsRecoverablePredicate() {}
+   };
+   virtual IsRecoverablePredicate *get_is_recoverable_predicate() = 0;
 
    void temp_colls(list<coll_t> *out) {
      if (temp_created)
index 7f37fd285ddf2815c1cc8effb6be9af326cb1898..ef2153e292d192c418a1910d2a56fe3af09a0fab 100644 (file)
@@ -70,6 +70,16 @@ public:
   void clear_state();
   void on_flushed();
 
+  class RPCRecPred : public IsRecoverablePredicate {
+  public:
+    bool operator()(const set<pg_shard_t> &have) const {
+      return have.size() >= 1;
+    }
+  };
+  IsRecoverablePredicate *get_is_recoverable_predicate() {
+    return new RPCRecPred;
+  }
+
   virtual void dump_recovery_info(Formatter *f) const {
     {
       f->open_array_section("pull_from_peer");