osd/PG: do not go into replay state

author Sage Weil <sage@redhat.com>

Tue, 20 Dec 2016 20:44:28 +0000 (15:44 -0500)

committer Sage Weil <sage@redhat.com>

Thu, 29 Dec 2016 15:30:07 +0000 (10:30 -0500)
author Sage Weil <sage@redhat.com>
Tue, 20 Dec 2016 20:44:28 +0000 (15:44 -0500)
committer Sage Weil <sage@redhat.com>
Thu, 29 Dec 2016 15:30:07 +0000 (10:30 -0500)
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc

index 3927929609acad455677548689d598c0be7c5ce4..b18e9a4b6b1901f618be013ca1da4f0b22605566 100644 (file)
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -9331,7 +9331,7 @@ int OSD::init_op_flags(OpRequestRef& op)
          iter->op.watch.op == CEPH_OSD_WATCH_OP_PING)) {
        /* This a bit odd.  PING isn't actually a write.  It can't
         * result in an update to the object_info.  PINGs also aren'ty
-       * replayed, so there's no reason to write out a log entry
+       * resent, so there's no reason to write out a log entry
         *
         * However, we pipeline them behind writes, so let's force
         * the write_ordered flag.
diff --git a/src/osd/OSD.h b/src/osd/OSD.h

index 28ecab2a5d6a6a7769f66be81c4ffcb5b9df3bd3..e07e7804a8983b28a9d8bc7afee866bb2db6281b 100644 (file)
--- a/src/osd/OSD.h
+++ b/src/osd/OSD.h
@@ -973,7 +973,7 @@ public:
        }
      }
    }
-  // replay / delayed pg activation
+  // delayed pg activation
    void queue_for_recovery(PG *pg, bool front = false) {
      Mutex::Locker l(recovery_lock);
      if (front) {
diff --git a/src/osd/PG.cc b/src/osd/PG.cc

index aa40f058d309a574e72f57e56b62d0a7869dfec5..111f8287e6c8279834127c0e54b0924c4d21bad6 100644 (file)
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -1551,24 +1551,6 @@ void PG::activate(ObjectStore::Transaction& t,
    assert(scrubber.callbacks.empty());
    assert(callbacks_for_degraded_object.empty());
  
-  // -- crash recovery?
-  if (acting.size() >= pool.info.min_size &&
-      is_primary() &&
-      pool.info.crash_replay_interval > 0 &&
-      may_need_replay(get_osdmap())) {
-    replay_until = ceph_clock_now();
-    replay_until += pool.info.crash_replay_interval;
-    dout(10) << "activate starting replay interval for " << pool.info.crash_replay_interval
-            << " until " << replay_until << dendl;
-    state_set(PG_STATE_REPLAY);
-
-    // TODOSAM: osd->osd-> is no good
-    osd->osd->replay_queue_lock.Lock();
-    osd->osd->replay_queue.push_back(pair<spg_t,utime_t>(
-       info.pgid, replay_until));
-    osd->osd->replay_queue_lock.Unlock();
-  }
-
    // twiddle pg state
    state_clear(PG_STATE_DOWN);
  
@@ -4857,86 +4839,6 @@ void PG::fulfill_log(
    osd->send_message_osd_cluster(mlog, con.get());
  }
  
-
-// true if all OSDs in prior intervals may have crashed, and we need to replay
-// false positives are okay, false negatives are not.
-bool PG::may_need_replay(const OSDMapRef osdmap) const
-{
-  bool crashed = false;
-
-  for (map<epoch_t,pg_interval_t>::const_reverse_iterator p = past_intervals.rbegin();
-       p != past_intervals.rend();
-       ++p) {
-    const pg_interval_t &interval = p->second;
-    dout(10) << "may_need_replay " << interval << dendl;
-
-    if (interval.last < info.history.last_epoch_started)
-      break;  // we don't care
-
-    if (interval.acting.empty())
-      continue;
-
-    if (!interval.maybe_went_rw)
-      continue;
-
-    // look at whether any of the osds during this interval survived
-    // past the end of the interval (i.e., didn't crash and
-    // potentially fail to COMMIT a write that it ACKed).
-    bool any_survived_interval = false;
-
-    // consider ACTING osds
-    for (unsigned i=0; i<interval.acting.size(); i++) {
-      int o = interval.acting[i];
-      if (o == CRUSH_ITEM_NONE)
-       continue;
-
-      const osd_info_t *pinfo = 0;
-      if (osdmap->exists(o))
-       pinfo = &osdmap->get_info(o);
-
-      // does this osd appear to have survived through the end of the
-      // interval?
-      if (pinfo) {
-       if (pinfo->up_from <= interval.first && pinfo->up_thru > interval.last) {
-         dout(10) << "may_need_replay  osd." << o
-                  << " up_from " << pinfo->up_from << " up_thru " << pinfo->up_thru
-                  << " survived the interval" << dendl;
-         any_survived_interval = true;
-       }
-       else if (pinfo->up_from <= interval.first &&
-                (std::find(acting.begin(), acting.end(), o) != acting.end() ||
-                 std::find(up.begin(), up.end(), o) != up.end())) {
-         dout(10) << "may_need_replay  osd." << o
-                  << " up_from " << pinfo->up_from << " and is in acting|up,"
-                  << " assumed to have survived the interval" << dendl;
-         // (if it hasn't, we will rebuild PriorSet)
-         any_survived_interval = true;
-       }
-       else if (pinfo->up_from > interval.last &&
-                pinfo->last_clean_begin <= interval.first &&
-                pinfo->last_clean_end > interval.last) {
-         dout(10) << "may_need_replay  prior osd." << o
-                  << " up_from " << pinfo->up_from
-                  << " and last clean interval ["
-                  << pinfo->last_clean_begin << "," << pinfo->last_clean_end
-                  << ") survived the interval" << dendl;
-         any_survived_interval = true;
-       }
-      }
-    }
-
-    if (!any_survived_interval) {
-      dout(3) << "may_need_replay  no known survivors of interval "
-             << interval.first << "-" << interval.last
-             << ", may need replay" << dendl;
-      crashed = true;
-      break;
-    }
-  }
-
-  return crashed;
-}
-
  void PG::check_full_transition(OSDMapRef lastmap, OSDMapRef osdmap)
  {
    bool changed = false;
diff --git a/src/osd/PG.h b/src/osd/PG.h

index 5860e267eeaeb61165e493a72ca0669f50a7f151..461fc18be8b961c12acaf45de82eb6214a88eee6 100644 (file)
--- a/src/osd/PG.h
+++ b/src/osd/PG.h
@@ -521,8 +521,6 @@ public:
    set<hobject_t, hobject_t::BitwiseComparator> recovering_oids;
  #endif
  
-  utime_t replay_until;
-
  protected:
    int         role;    // 0 = primary, 1 = replica, -1=none.
    unsigned    state;   // PG_STATE_*
@@ -593,8 +591,6 @@ public:
    friend std::ostream& operator<<(std::ostream& oss,
                                   const struct PriorSet &prior);
  
-  bool may_need_replay(const OSDMapRef osdmap) const;
-
  
  public:    
    struct BufferedRecoveryMessages {
@@ -2172,7 +2168,6 @@ public:
    bool       is_activating() const { return state_test(PG_STATE_ACTIVATING); }
    bool       is_peering() const { return state_test(PG_STATE_PEERING); }
    bool       is_down() const { return state_test(PG_STATE_DOWN); }
-  bool       is_replay() const { return state_test(PG_STATE_REPLAY); }
    bool       is_clean() const { return state_test(PG_STATE_CLEAN); }
    bool       is_degraded() const { return state_test(PG_STATE_DEGRADED); }
    bool       is_undersized() const { return state_test(PG_STATE_UNDERSIZED); }
diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc

index bf4bada253d067b0e9c429f48372bc9083f96c10..e0ed6192dd1ae1439e478acf3c03e984c61ee511 100644 (file)
--- a/src/osd/PrimaryLogPG.cc
+++ b/src/osd/PrimaryLogPG.cc
@@ -1637,12 +1637,6 @@ void PrimaryLogPG::do_request(
        op->mark_delayed("waiting for active");
        return;
      }
-    if (is_replay()) {
-      dout(20) << " replay, waiting for active on " << op << dendl;
-      waiting_for_active.push_back(op);
-      op->mark_delayed("waiting for replay end");
-      return;
-    }
      // verify client features
      if ((pool.info.has_tiers() || pool.info.is_tier()) &&
         !op->has_feature(CEPH_FEATURE_OSD_CACHEPOOL)) {
author	Sage Weil <sage@redhat.com>
	Tue, 20 Dec 2016 20:44:28 +0000 (15:44 -0500)
committer	Sage Weil <sage@redhat.com>
	Thu, 29 Dec 2016 15:30:07 +0000 (10:30 -0500)
src/osd/OSD.cc		patch \| blob \| history
src/osd/OSD.h		patch \| blob \| history
src/osd/PG.cc		patch \| blob \| history
src/osd/PG.h		patch \| blob \| history
src/osd/PrimaryLogPG.cc		patch \| blob \| history