]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
osd/: move start_peering_interval and callees into PeeringState
authorsjust@redhat.com <sjust@redhat.com>
Wed, 27 Mar 2019 21:24:01 +0000 (14:24 -0700)
committersjust@redhat.com <sjust@redhat.com>
Wed, 1 May 2019 18:22:13 +0000 (11:22 -0700)
Signed-off-by: sjust@redhat.com <sjust@redhat.com>
src/mon/OSDMonitor.cc
src/osd/MissingLoc.h
src/osd/OSD.cc
src/osd/PG.cc
src/osd/PG.h
src/osd/PeeringState.cc
src/osd/PeeringState.h
src/osd/PrimaryLogPG.cc
src/osd/PrimaryLogPG.h
src/osd/osd_types.cc
src/osd/osd_types.h

index 1758d28bbe6424a35e68f83fa56e17fba585b274..b0b59296c1f39ff2735cf7f08dc79e4dad10c879 100644 (file)
@@ -914,7 +914,7 @@ OSDMonitor::update_pending_pgs(const OSDMap::Incremental& inc,
              &nextmap,
              &osdmap,
              pgid,
-             &min_size_predicate,
+             min_size_predicate,
              &i.second.past_intervals,
              &debug)) {
          epoch_t e = inc.epoch;
index 19aef8175cb1ebef18fc77f6a19dc3c27d245256..87c0d3c4cea55d40da4dedf9228213b8b9c85ed5 100644 (file)
@@ -120,6 +120,9 @@ class MissingLoc {
     is_readable.reset(_is_readable);
     is_recoverable.reset(_is_recoverable);
   }
+  IsPGRecoverablePredicate &get_recoverable_predicate() {
+    return *is_recoverable;
+  }
   std::ostream& gen_prefix(std::ostream& out) const {
     return dpp->gen_prefix(out);
   }
index 24fc51b7306a2977ec85252176ba1aa9ec10fe69..14e3fea5767f3c479edd1e117f982b1500df7410 100644 (file)
@@ -4742,7 +4742,7 @@ void OSD::build_initial_pg_history(
       osdmap.get(),
       lastmap.get(),
       pgid.pgid,
-      &min_size_predicate,
+      min_size_predicate,
       pi,
       &debug);
     if (new_interval) {
index 5af3b642b74c3ab8b236b52a96863f84f754db7f..45b63bc7935a1882f6e91cfc3a753ce2d9052636 100644 (file)
@@ -725,35 +725,18 @@ PastIntervals::PriorSet PG::build_prior()
 
 void PG::clear_primary_state()
 {
-  dout(10) << "clear_primary_state" << dendl;
-
-  // clear peering state
-  stray_set.clear();
-  peer_log_requested.clear();
-  peer_missing_requested.clear();
-  peer_info.clear();
-  peer_bytes.clear();
-  peer_missing.clear();
   need_up_thru = false;
-  peer_last_complete_ondisk.clear();
-  peer_activated.clear();
-  min_last_complete_ondisk = eversion_t();
-  pg_trim_to = eversion_t();
-  might_have_unfound.clear();
   projected_log = PGLog::IndexedLog();
-
   last_update_ondisk = eversion_t();
 
-  snap_trimq.clear();
-
-  finish_sync_event = 0;  // so that _finish_recovery doesn't go off in another thread
-
   missing_loc.clear();
 
-  release_pg_backoffs();
-
   pg_log.reset_recovery_pointers();
 
+  snap_trimq.clear();
+  finish_sync_event = 0;  // so that _finish_recovery doesn't go off in another thread
+  release_pg_backoffs();
+
   scrubber.reserved_peers.clear();
   scrub_after_recovery = false;
 
@@ -2357,7 +2340,7 @@ void PG::split_into(pg_t child_pgid, PG *child, unsigned split_bits)
   vector<int> newup, newacting;
   get_osdmap()->pg_to_up_acting_osds(
     child->info.pgid.pgid, &newup, &up_primary, &newacting, &primary);
-  child->init_primary_up_acting(
+  child->recovery_state.init_primary_up_acting(
     newup,
     newacting,
     up_primary,
@@ -2382,7 +2365,7 @@ void PG::split_into(pg_t child_pgid, PG *child, unsigned split_bits)
   // release all backoffs for simplicity
   release_backoffs(hobject_t(), hobject_t::get_max());
 
-  child->on_new_interval();
+  child->recovery_state.on_new_interval();
 
   child->send_notify = !child->is_primary();
 
@@ -3228,8 +3211,8 @@ void PG::init(
           << " past_intervals " << pi
           << dendl;
 
-  set_role(role);
-  init_primary_up_acting(
+  recovery_state.set_role(role);
+  recovery_state.init_primary_up_acting(
     newup,
     newacting,
     new_up_primary,
@@ -3251,7 +3234,7 @@ void PG::init(
     pg_log.mark_log_for_rewrite();
   }
 
-  on_new_interval();
+  recovery_state.on_new_interval();
 
   dirty_info = true;
   dirty_big_info = true;
@@ -3717,16 +3700,16 @@ void PG::read_state(ObjectStore *store)
     vector<int> acting, up;
     get_osdmap()->pg_to_up_acting_osds(
       pg_id.pgid, &up, &up_primary, &acting, &primary);
-    init_primary_up_acting(
+    recovery_state.init_primary_up_acting(
       up,
       acting,
       up_primary,
       primary);
     int rr = OSDMap::calc_pg_role(osd->whoami, acting);
     if (pool.info.is_replicated() || rr == pg_whoami.shard)
-      set_role(rr);
+      recovery_state.set_role(rr);
     else
-      set_role(-1);
+      recovery_state.set_role(-1);
   }
 
   PG::PeeringCtx rctx(0, 0, 0, new ObjectStore::Transaction);
@@ -4103,6 +4086,34 @@ void PG::unreg_next_scrub()
   }
 }
 
+void PG::clear_ready_to_merge() {
+  osd->clear_ready_to_merge(this);
+}
+
+void PG::queue_want_pg_temp(const vector<int> &wanted) {
+  osd->queue_want_pg_temp(get_pgid().pgid, wanted);
+}
+
+void PG::clear_want_pg_temp() {
+  osd->remove_want_pg_temp(get_pgid().pgid);
+}
+
+void PG::on_role_change() {
+  requeue_ops(waiting_for_peered);
+  plpg_on_role_change();
+}
+
+void PG::on_new_interval() {
+  scrub_queued = false;
+  projected_last_update = eversion_t();
+  cancel_recovery();
+  plpg_on_new_interval();
+}
+
+epoch_t PG::oldest_stored_osdmap() {
+  return osd->get_superblock().oldest_map;
+}
+
 void PG::do_replica_scrub_map(OpRequestRef op)
 {
   const MOSDRepScrubMap *m = static_cast<const MOSDRepScrubMap*>(op->get_req());
@@ -5783,234 +5794,6 @@ bool PG::try_flush_or_schedule_async()
   }
 }
 
-/* Called before initializing peering during advance_map */
-void PG::start_peering_interval(
-  const OSDMapRef lastmap,
-  const vector<int>& newup, int new_up_primary,
-  const vector<int>& newacting, int new_acting_primary,
-  ObjectStore::Transaction *t)
-{
-  const OSDMapRef osdmap = get_osdmap();
-
-  recovery_state.set_last_peering_reset();
-
-  vector<int> oldacting, oldup;
-  int oldrole = get_role();
-
-  unreg_next_scrub();
-
-  if (is_primary()) {
-    osd->clear_ready_to_merge(this);
-  }
-
-  pg_shard_t old_acting_primary = get_primary();
-  pg_shard_t old_up_primary = up_primary;
-  bool was_old_primary = is_primary();
-  bool was_old_replica = is_replica();
-
-  acting.swap(oldacting);
-  up.swap(oldup);
-  init_primary_up_acting(
-    newup,
-    newacting,
-    new_up_primary,
-    new_acting_primary);
-
-  if (info.stats.up != up ||
-      info.stats.acting != acting ||
-      info.stats.up_primary != new_up_primary ||
-      info.stats.acting_primary != new_acting_primary) {
-    info.stats.up = up;
-    info.stats.up_primary = new_up_primary;
-    info.stats.acting = acting;
-    info.stats.acting_primary = new_acting_primary;
-    info.stats.mapping_epoch = osdmap->get_epoch();
-  }
-
-  pg_stats_publish_lock.Lock();
-  pg_stats_publish_valid = false;
-  pg_stats_publish_lock.Unlock();
-
-  // This will now be remapped during a backfill in cases
-  // that it would not have been before.
-  if (up != acting)
-    state_set(PG_STATE_REMAPPED);
-  else
-    state_clear(PG_STATE_REMAPPED);
-
-  int role = osdmap->calc_pg_role(osd->whoami, acting, acting.size());
-  if (pool.info.is_replicated() || role == pg_whoami.shard)
-    set_role(role);
-  else
-    set_role(-1);
-
-  // did acting, up, primary|acker change?
-  if (!lastmap) {
-    dout(10) << " no lastmap" << dendl;
-    dirty_info = true;
-    dirty_big_info = true;
-    info.history.same_interval_since = osdmap->get_epoch();
-  } else {
-    std::stringstream debug;
-    ceph_assert(info.history.same_interval_since != 0);
-    boost::scoped_ptr<IsPGRecoverablePredicate> recoverable(
-      get_is_recoverable_predicate());
-    bool new_interval = PastIntervals::check_new_interval(
-      old_acting_primary.osd,
-      new_acting_primary,
-      oldacting, newacting,
-      old_up_primary.osd,
-      new_up_primary,
-      oldup, newup,
-      info.history.same_interval_since,
-      info.history.last_epoch_clean,
-      osdmap.get(),
-      lastmap.get(),
-      info.pgid.pgid,
-      recoverable.get(),
-      &past_intervals,
-      &debug);
-    dout(10) << __func__ << ": check_new_interval output: "
-            << debug.str() << dendl;
-    if (new_interval) {
-      if (osdmap->get_epoch() == osd->get_superblock().oldest_map &&
-         info.history.last_epoch_clean < osdmap->get_epoch()) {
-       dout(10) << " map gap, clearing past_intervals and faking" << dendl;
-       // our information is incomplete and useless; someone else was clean
-       // after everything we know if osdmaps were trimmed.
-       past_intervals.clear();
-      } else {
-       dout(10) << " noting past " << past_intervals << dendl;
-      }
-      dirty_info = true;
-      dirty_big_info = true;
-      info.history.same_interval_since = osdmap->get_epoch();
-      if (osdmap->have_pg_pool(info.pgid.pgid.pool()) &&
-         info.pgid.pgid.is_split(lastmap->get_pg_num(info.pgid.pgid.pool()),
-                                 osdmap->get_pg_num(info.pgid.pgid.pool()),
-                                 nullptr)) {
-       info.history.last_epoch_split = osdmap->get_epoch();
-      }
-    }
-  }
-
-  if (old_up_primary != up_primary ||
-      oldup != up) {
-    info.history.same_up_since = osdmap->get_epoch();
-  }
-  // this comparison includes primary rank via pg_shard_t
-  if (old_acting_primary != get_primary()) {
-    info.history.same_primary_since = osdmap->get_epoch();
-  }
-
-  on_new_interval();
-
-  dout(1) << __func__ << " up " << oldup << " -> " << up
-          << ", acting " << oldacting << " -> " << acting 
-          << ", acting_primary " << old_acting_primary << " -> " << new_acting_primary
-          << ", up_primary " << old_up_primary << " -> " << new_up_primary
-          << ", role " << oldrole << " -> " << role
-          << ", features acting " << acting_features
-          << " upacting " << upacting_features
-          << dendl;
-
-  // deactivate.
-  state_clear(PG_STATE_ACTIVE);
-  state_clear(PG_STATE_PEERED);
-  state_clear(PG_STATE_PREMERGE);
-  state_clear(PG_STATE_DOWN);
-  state_clear(PG_STATE_RECOVERY_WAIT);
-  state_clear(PG_STATE_RECOVERY_TOOFULL);
-  state_clear(PG_STATE_RECOVERING);
-
-  peer_purged.clear();
-  acting_recovery_backfill.clear();
-  scrub_queued = false;
-
-  // reset primary/replica state?
-  if (was_old_primary || is_primary()) {
-    osd->remove_want_pg_temp(info.pgid.pgid);
-  } else if (was_old_replica || is_replica()) {
-    osd->remove_want_pg_temp(info.pgid.pgid);
-  }
-  clear_primary_state();
-
-    
-  // pg->on_*
-  on_change(t);
-
-  projected_last_update = eversion_t();
-
-  ceph_assert(!deleting);
-
-  // should we tell the primary we are here?
-  send_notify = !is_primary();
-
-  if (role != oldrole ||
-      was_old_primary != is_primary()) {
-    // did primary change?
-    if (was_old_primary != is_primary()) {
-      state_clear(PG_STATE_CLEAN);
-      clear_publish_stats();
-    }
-
-    on_role_change();
-
-    // take active waiters
-    requeue_ops(waiting_for_peered);
-
-  } else {
-    // no role change.
-    // did primary change?
-    if (get_primary() != old_acting_primary) {    
-      dout(10) << *this << " " << oldacting << " -> " << acting 
-              << ", acting primary " 
-              << old_acting_primary << " -> " << get_primary() 
-              << dendl;
-    } else {
-      // primary is the same.
-      if (is_primary()) {
-       // i am (still) primary. but my replica set changed.
-       state_clear(PG_STATE_CLEAN);
-         
-       dout(10) << oldacting << " -> " << acting
-                << ", replicas changed" << dendl;
-      }
-    }
-  }
-  cancel_recovery();
-
-  if (acting.empty() && !up.empty() && up_primary == pg_whoami) {
-    dout(10) << " acting empty, but i am up[0], clearing pg_temp" << dendl;
-    osd->queue_want_pg_temp(info.pgid.pgid, acting);
-  }
-}
-
-void PG::on_new_interval()
-{
-  const OSDMapRef osdmap = get_osdmap();
-
-  reg_next_scrub();
-
-  // initialize features
-  acting_features = CEPH_FEATURES_SUPPORTED_DEFAULT;
-  upacting_features = CEPH_FEATURES_SUPPORTED_DEFAULT;
-  for (vector<int>::iterator p = acting.begin(); p != acting.end(); ++p) {
-    if (*p == CRUSH_ITEM_NONE)
-      continue;
-    uint64_t f = osdmap->get_xinfo(*p).features;
-    acting_features &= f;
-    upacting_features &= f;
-  }
-  for (vector<int>::iterator p = up.begin(); p != up.end(); ++p) {
-    if (*p == CRUSH_ITEM_NONE)
-      continue;
-    upacting_features &= osdmap->get_xinfo(*p).features;
-  }
-
-  _on_new_interval();
-}
-
 void PG::proc_primary_info(ObjectStore::Transaction &t, const pg_info_t &oinfo)
 {
   ceph_assert(!is_primary());
@@ -6404,9 +6187,6 @@ void PG::handle_activate_map(PeeringCtx *rctx)
   dout(10) << __func__ << ": " << get_osdmap()->get_epoch()
           << dendl;
   recovery_state.activate_map(rctx);
-  if (get_osdmap()->check_new_blacklist_entries()) {
-    check_blacklisted_watchers();
-  }
 }
 
 void PG::handle_initialize(PeeringCtx *rctx)
index 167471c98e8b9dc6de47652ce4f139ff7c5c0352..f16f180f2230a056a1be97dde49d9ed0e98e1cf4 100644 (file)
@@ -406,6 +406,22 @@ public:
   void reg_next_scrub() override;
   void unreg_next_scrub() override;
 
+  void clear_ready_to_merge() override;
+
+  void queue_want_pg_temp(const vector<int> &wanted) override;
+  void clear_want_pg_temp() override;
+
+  void on_new_interval() override;
+  virtual void plpg_on_new_interval() = 0;
+
+  void on_role_change() override;
+  virtual void plpg_on_role_change() = 0;
+
+  void clear_publish_stats() override;
+  void clear_primary_state() override;
+
+  epoch_t oldest_stored_osdmap() override;
+
   bool is_forced_recovery_or_backfill() const {
     return get_state() & (PG_STATE_FORCED_RECOVERY | PG_STATE_FORCED_BACKFILL);
   }
@@ -568,12 +584,6 @@ protected:
   virtual PGBackend *get_pgbackend() = 0;
   virtual const PGBackend* get_pgbackend() const = 0;
 
-protected:
-  /*** PG ****/
-  /// get_is_recoverable_predicate: caller owns returned pointer and must delete when done
-  IsPGRecoverablePredicate *get_is_recoverable_predicate() const {
-    return get_pgbackend()->get_is_recoverable_predicate();
-  }
 protected:
   void requeue_map_waiters();
 
@@ -922,9 +932,6 @@ protected:
   void _update_blocked_by();
   friend class TestOpsSocketHook;
   void publish_stats_to_osd();
-  void clear_publish_stats();
-
-  void clear_primary_state();
 
   bool needs_recovery() const;
   bool needs_backfill() const;
@@ -1471,56 +1478,6 @@ protected:
     return (get_osdmap()->test_flag(CEPH_OSDMAP_PGLOG_HARDLIMIT));
   }
 
-  void init_primary_up_acting(
-    const vector<int> &newup,
-    const vector<int> &newacting,
-    int new_up_primary,
-    int new_acting_primary) {
-    actingset.clear();
-    acting = newacting;
-    for (uint8_t i = 0; i < acting.size(); ++i) {
-      if (acting[i] != CRUSH_ITEM_NONE)
-       actingset.insert(
-         pg_shard_t(
-           acting[i],
-           pool.info.is_erasure() ? shard_id_t(i) : shard_id_t::NO_SHARD));
-    }
-    upset.clear();
-    up = newup;
-    for (uint8_t i = 0; i < up.size(); ++i) {
-      if (up[i] != CRUSH_ITEM_NONE)
-       upset.insert(
-         pg_shard_t(
-           up[i],
-           pool.info.is_erasure() ? shard_id_t(i) : shard_id_t::NO_SHARD));
-    }
-    if (!pool.info.is_erasure()) {
-      up_primary = pg_shard_t(new_up_primary, shard_id_t::NO_SHARD);
-      primary = pg_shard_t(new_acting_primary, shard_id_t::NO_SHARD);
-      return;
-    }
-    up_primary = pg_shard_t();
-    primary = pg_shard_t();
-    for (uint8_t i = 0; i < up.size(); ++i) {
-      if (up[i] == new_up_primary) {
-       up_primary = pg_shard_t(up[i], shard_id_t(i));
-       break;
-      }
-    }
-    for (uint8_t i = 0; i < acting.size(); ++i) {
-      if (acting[i] == new_acting_primary) {
-       primary = pg_shard_t(acting[i], shard_id_t(i));
-       break;
-      }
-    }
-    ceph_assert(up_primary.osd == new_up_primary);
-    ceph_assert(primary.osd == new_acting_primary);
-  }
-
-  void set_role(int r) {
-    role = r;
-  }
-
   bool state_test(uint64_t m) const { return recovery_state.state_test(m); }
   void state_set(uint64_t m) { recovery_state.state_set(m); }
   void state_clear(uint64_t m) { recovery_state.state_clear(m); }
@@ -1661,13 +1618,6 @@ protected:
     boost::optional<eversion_t> roll_forward_to);
 
   bool try_flush_or_schedule_async() override;
-  void start_peering_interval(
-    const OSDMapRef lastmap,
-    const vector<int>& newup, int up_primary,
-    const vector<int>& newacting, int acting_primary,
-    ObjectStore::Transaction *t);
-  void on_new_interval();
-  virtual void _on_new_interval() = 0;
   void start_flush_on_transaction(
     ObjectStore::Transaction *t) override;
 
index eab00391a48c08efd9c8c299de3f9df51f96e46b..367ee623961d37f9e154c3b27801a582cb9a453f 100644 (file)
@@ -370,6 +370,10 @@ void PeeringState::activate_map(PeeringCtx *rctx)
              << " while current is " << osdmap_ref->get_epoch() << dendl;
   }
   write_if_dirty(*rctx->transaction);
+
+  if (get_osdmap()->check_new_blacklist_entries()) {
+    pl->check_blacklisted_watchers();
+  }
 }
 
 void PeeringState::set_last_peering_reset()
@@ -421,7 +425,7 @@ void PeeringState::check_full_transition(OSDMapRef lastmap, OSDMapRef osdmap)
   }
 }
 
-bool PG::should_restart_peering(
+bool PeeringState::should_restart_peering(
   int newupprimary,
   int newactingprimary,
   const vector<int>& newup,
@@ -445,13 +449,293 @@ bool PG::should_restart_peering(
             << " newacting " << newacting << dendl;
     return true;
   }
-  if (!lastmap->is_up(osd->whoami) && osdmap->is_up(osd->whoami)) {
-    dout(10) << __func__ << " osd transitioned from down -> up" << dendl;
+  if (!lastmap->is_up(pg_whoami.osd) && osdmap->is_up(pg_whoami.osd)) {
+    psdout(10) << __func__ << " osd transitioned from down -> up"
+              << dendl;
     return true;
   }
   return false;
 }
 
+/* Called before initializing peering during advance_map */
+void PeeringState::start_peering_interval(
+  const OSDMapRef lastmap,
+  const vector<int>& newup, int new_up_primary,
+  const vector<int>& newacting, int new_acting_primary,
+  ObjectStore::Transaction *t)
+{
+  const OSDMapRef osdmap = get_osdmap();
+
+  set_last_peering_reset();
+
+  vector<int> oldacting, oldup;
+  int oldrole = get_role();
+
+  pl->unreg_next_scrub();
+  if (is_primary()) {
+    pl->clear_ready_to_merge();
+  }
+
+
+  pg_shard_t old_acting_primary = get_primary();
+  pg_shard_t old_up_primary = up_primary;
+  bool was_old_primary = is_primary();
+  bool was_old_replica = is_replica();
+
+  acting.swap(oldacting);
+  up.swap(oldup);
+  init_primary_up_acting(
+    newup,
+    newacting,
+    new_up_primary,
+    new_acting_primary);
+
+  if (info.stats.up != up ||
+      info.stats.acting != acting ||
+      info.stats.up_primary != new_up_primary ||
+      info.stats.acting_primary != new_acting_primary) {
+    info.stats.up = up;
+    info.stats.up_primary = new_up_primary;
+    info.stats.acting = acting;
+    info.stats.acting_primary = new_acting_primary;
+    info.stats.mapping_epoch = osdmap->get_epoch();
+  }
+
+  pl->clear_publish_stats();
+
+  // This will now be remapped during a backfill in cases
+  // that it would not have been before.
+  if (up != acting)
+    state_set(PG_STATE_REMAPPED);
+  else
+    state_clear(PG_STATE_REMAPPED);
+
+  int role = osdmap->calc_pg_role(pg_whoami.osd, acting, acting.size());
+  if (pool.info.is_replicated() || role == pg_whoami.shard)
+    set_role(role);
+  else
+    set_role(-1);
+
+  // did acting, up, primary|acker change?
+  if (!lastmap) {
+    psdout(10) << " no lastmap" << dendl;
+    dirty_info = true;
+    dirty_big_info = true;
+    info.history.same_interval_since = osdmap->get_epoch();
+  } else {
+    std::stringstream debug;
+    ceph_assert(info.history.same_interval_since != 0);
+    bool new_interval = PastIntervals::check_new_interval(
+      old_acting_primary.osd,
+      new_acting_primary,
+      oldacting, newacting,
+      old_up_primary.osd,
+      new_up_primary,
+      oldup, newup,
+      info.history.same_interval_since,
+      info.history.last_epoch_clean,
+      osdmap.get(),
+      lastmap.get(),
+      info.pgid.pgid,
+      missing_loc.get_recoverable_predicate(),
+      &past_intervals,
+      &debug);
+    psdout(10) << __func__ << ": check_new_interval output: "
+              << debug.str() << dendl;
+    if (new_interval) {
+      if (osdmap->get_epoch() == pl->oldest_stored_osdmap() &&
+         info.history.last_epoch_clean < osdmap->get_epoch()) {
+       psdout(10) << " map gap, clearing past_intervals and faking" << dendl;
+       // our information is incomplete and useless; someone else was clean
+       // after everything we know if osdmaps were trimmed.
+       past_intervals.clear();
+      } else {
+       psdout(10) << " noting past " << past_intervals << dendl;
+      }
+      dirty_info = true;
+      dirty_big_info = true;
+      info.history.same_interval_since = osdmap->get_epoch();
+      if (osdmap->have_pg_pool(info.pgid.pgid.pool()) &&
+         info.pgid.pgid.is_split(lastmap->get_pg_num(info.pgid.pgid.pool()),
+                                 osdmap->get_pg_num(info.pgid.pgid.pool()),
+                                 nullptr)) {
+       info.history.last_epoch_split = osdmap->get_epoch();
+      }
+    }
+  }
+
+  if (old_up_primary != up_primary ||
+      oldup != up) {
+    info.history.same_up_since = osdmap->get_epoch();
+  }
+  // this comparison includes primary rank via pg_shard_t
+  if (old_acting_primary != get_primary()) {
+    info.history.same_primary_since = osdmap->get_epoch();
+  }
+
+  pl->on_new_interval();
+  pl->reg_next_scrub();
+
+  psdout(1) << __func__ << " up " << oldup << " -> " << up
+           << ", acting " << oldacting << " -> " << acting
+           << ", acting_primary " << old_acting_primary << " -> "
+           << new_acting_primary
+           << ", up_primary " << old_up_primary << " -> " << new_up_primary
+           << ", role " << oldrole << " -> " << role
+           << ", features acting " << acting_features
+           << " upacting " << upacting_features
+           << dendl;
+
+  // deactivate.
+  state_clear(PG_STATE_ACTIVE);
+  state_clear(PG_STATE_PEERED);
+  state_clear(PG_STATE_PREMERGE);
+  state_clear(PG_STATE_DOWN);
+  state_clear(PG_STATE_RECOVERY_WAIT);
+  state_clear(PG_STATE_RECOVERY_TOOFULL);
+  state_clear(PG_STATE_RECOVERING);
+
+  peer_purged.clear();
+  acting_recovery_backfill.clear();
+
+  // reset primary/replica state?
+  if (was_old_primary || is_primary()) {
+    pl->clear_want_pg_temp();
+  } else if (was_old_replica || is_replica()) {
+    pl->clear_want_pg_temp();
+  }
+  clear_primary_state();
+
+  pl->on_change(t);
+
+  ceph_assert(!deleting);
+
+  // should we tell the primary we are here?
+  send_notify = !is_primary();
+
+  if (role != oldrole ||
+      was_old_primary != is_primary()) {
+    // did primary change?
+    if (was_old_primary != is_primary()) {
+      state_clear(PG_STATE_CLEAN);
+    }
+
+    pl->on_role_change();
+  } else {
+    // no role change.
+    // did primary change?
+    if (get_primary() != old_acting_primary) {
+      psdout(10) << oldacting << " -> " << acting
+              << ", acting primary "
+              << old_acting_primary << " -> " << get_primary()
+              << dendl;
+    } else {
+      // primary is the same.
+      if (is_primary()) {
+       // i am (still) primary. but my replica set changed.
+       state_clear(PG_STATE_CLEAN);
+
+       psdout(10) << oldacting << " -> " << acting
+                << ", replicas changed" << dendl;
+      }
+    }
+  }
+
+  if (acting.empty() && !up.empty() && up_primary == pg_whoami) {
+    psdout(10) << " acting empty, but i am up[0], clearing pg_temp" << dendl;
+    pl->queue_want_pg_temp(acting);
+  }
+}
+
+void PeeringState::on_new_interval()
+{
+  const OSDMapRef osdmap = get_osdmap();
+
+  // initialize features
+  acting_features = CEPH_FEATURES_SUPPORTED_DEFAULT;
+  upacting_features = CEPH_FEATURES_SUPPORTED_DEFAULT;
+  for (vector<int>::iterator p = acting.begin(); p != acting.end(); ++p) {
+    if (*p == CRUSH_ITEM_NONE)
+      continue;
+    uint64_t f = osdmap->get_xinfo(*p).features;
+    acting_features &= f;
+    upacting_features &= f;
+  }
+  for (vector<int>::iterator p = up.begin(); p != up.end(); ++p) {
+    if (*p == CRUSH_ITEM_NONE)
+      continue;
+    upacting_features &= osdmap->get_xinfo(*p).features;
+  }
+
+  pl->on_new_interval();
+}
+
+void PeeringState::init_primary_up_acting(
+  const vector<int> &newup,
+  const vector<int> &newacting,
+  int new_up_primary,
+  int new_acting_primary) {
+  actingset.clear();
+  acting = newacting;
+  for (uint8_t i = 0; i < acting.size(); ++i) {
+    if (acting[i] != CRUSH_ITEM_NONE)
+      actingset.insert(
+       pg_shard_t(
+         acting[i],
+         pool.info.is_erasure() ? shard_id_t(i) : shard_id_t::NO_SHARD));
+  }
+  upset.clear();
+  up = newup;
+  for (uint8_t i = 0; i < up.size(); ++i) {
+    if (up[i] != CRUSH_ITEM_NONE)
+      upset.insert(
+       pg_shard_t(
+         up[i],
+         pool.info.is_erasure() ? shard_id_t(i) : shard_id_t::NO_SHARD));
+  }
+  if (!pool.info.is_erasure()) {
+    up_primary = pg_shard_t(new_up_primary, shard_id_t::NO_SHARD);
+    primary = pg_shard_t(new_acting_primary, shard_id_t::NO_SHARD);
+    return;
+  }
+  up_primary = pg_shard_t();
+  primary = pg_shard_t();
+  for (uint8_t i = 0; i < up.size(); ++i) {
+    if (up[i] == new_up_primary) {
+      up_primary = pg_shard_t(up[i], shard_id_t(i));
+      break;
+    }
+  }
+  for (uint8_t i = 0; i < acting.size(); ++i) {
+    if (acting[i] == new_acting_primary) {
+      primary = pg_shard_t(acting[i], shard_id_t(i));
+      break;
+    }
+  }
+  ceph_assert(up_primary.osd == new_up_primary);
+  ceph_assert(primary.osd == new_acting_primary);
+}
+
+void PeeringState::clear_primary_state()
+{
+  psdout(10) << "clear_primary_state" << dendl;
+
+  // clear peering state
+  stray_set.clear();
+  peer_log_requested.clear();
+  peer_missing_requested.clear();
+  peer_info.clear();
+  peer_bytes.clear();
+  peer_missing.clear();
+  peer_last_complete_ondisk.clear();
+  peer_activated.clear();
+  min_last_complete_ondisk = eversion_t();
+  pg_trim_to = eversion_t();
+  might_have_unfound.clear();
+  pl->clear_primary_state();
+}
+
+
 /*------------ Peering State Machine----------------*/
 #undef dout_prefix
 #define dout_prefix (context< PeeringMachine >().dpp->gen_prefix(*_dout) \
@@ -529,7 +813,6 @@ PeeringState::Started::react(const IntervalFlush&)
 boost::statechart::result PeeringState::Started::react(const AdvMap& advmap)
 {
   PeeringState *ps = context< PeeringMachine >().state;
-  PG *pg = context< PeeringMachine >().pg;
   psdout(10) << "Started advmap" << dendl;
   ps->check_full_transition(advmap.lastmap, advmap.osdmap);
   if (ps->should_restart_peering(
@@ -602,7 +885,7 @@ boost::statechart::result PeeringState::Reset::react(const AdvMap& advmap)
        advmap.osdmap)) {
     psdout(10) << "should restart peering, calling start_peering_interval again"
                       << dendl;
-    pg->start_peering_interval(
+    ps->start_peering_interval(
       advmap.lastmap,
       advmap.newup, advmap.up_primary,
       advmap.newacting, advmap.acting_primary,
@@ -778,7 +1061,7 @@ void PeeringState::Primary::exit()
   pg->want_acting.clear();
   utime_t dur = ceph_clock_now() - enter_time;
   pl->get_peering_perf().tinc(rs_primary_latency, dur);
-  pg->clear_primary_state();
+  pl->clear_primary_state();
   pg->state_clear(PG_STATE_CREATING);
 }
 
@@ -1815,6 +2098,7 @@ PeeringState::Active::Active(my_context ctx)
 boost::statechart::result PeeringState::Active::react(const AdvMap& advmap)
 {
   PG *pg = context< PeeringMachine >().pg;
+  PeeringState *ps = context< PeeringMachine >().state;
   if (ps->should_restart_peering(
        advmap.up_primary,
        advmap.acting_primary,
index 8d39db3a9a7f6f0d8bbde3a538a3e869a35cfc7f..0dc182ac1d8fd8585666107fefec149d7457dddd 100644 (file)
@@ -79,12 +79,25 @@ public:
 
     virtual PerfCounters &get_peering_perf() = 0;
 
+    virtual void clear_ready_to_merge() = 0;
+
+    virtual void queue_want_pg_temp(const vector<int> &wanted) = 0;
+    virtual void clear_want_pg_temp() = 0;
+
+    virtual void clear_publish_stats() = 0;
+
     virtual void check_recovery_sources(const OSDMapRef& newmap) = 0;
+    virtual void check_blacklisted_watchers() = 0;
+    virtual void clear_primary_state() = 0;
+
     virtual void on_pool_change() = 0;
     virtual void on_role_change() = 0;
     virtual void on_change(ObjectStore::Transaction *t) = 0;
     virtual void on_activate() = 0;
-    virtual void check_blacklisted_watchers() = 0;
+    virtual void on_new_interval() = 0;
+
+    virtual epoch_t oldest_stored_osdmap() = 0;
+
     virtual ~PeeringListener() {}
   };
 
@@ -1168,7 +1181,18 @@ public:
     const vector<int>& newacting,
     OSDMapRef lastmap,
     OSDMapRef osdmap);
-
+  void start_peering_interval(
+    const OSDMapRef lastmap,
+    const vector<int>& newup, int up_primary,
+    const vector<int>& newacting, int acting_primary,
+    ObjectStore::Transaction *t);
+  void on_new_interval();
+  void init_primary_up_acting(
+    const vector<int> &newup,
+    const vector<int> &newacting,
+    int new_up_primary,
+    int new_acting_primary);
+  void clear_primary_state();
 
 public:
   PeeringState(
@@ -1180,6 +1204,12 @@ public:
     PeeringListener *pl,
     PG *pg);
 
+  void set_backend_predicates(
+    IsPGReadablePredicate *is_readable,
+    IsPGRecoverablePredicate *is_recoverable) {
+    missing_loc.set_backend_predicates(is_readable, is_recoverable);
+  }
+
   // MissingLoc::MappingInfo
   const set<pg_shard_t> &get_upset() const override {
     return upset;
@@ -1291,6 +1321,9 @@ public:
     return deleted || e < get_last_peering_reset();
   }
 
+  void set_role(int r) {
+    role = r;
+  }
   int get_role() const {
     return role;
   }
index 9136127b7998d18a8012235c16bcdb29a9e32329..e4307c6e91fb0d53a0acef3d61eddc66a86a24c0 100644 (file)
@@ -1721,7 +1721,7 @@ PrimaryLogPG::PrimaryLogPG(OSDService *o, OSDMapRef curmap,
   temp_seq(0),
   snap_trimmer_machine(this)
 { 
-  missing_loc.set_backend_predicates(
+  recovery_state.set_backend_predicates(
     pgbackend->get_is_readable_predicate(),
     pgbackend->get_is_recoverable_predicate());
   snap_trimmer_machine.initiate();
@@ -12187,9 +12187,10 @@ void PrimaryLogPG::on_activate()
   agent_setup();
 }
 
-void PrimaryLogPG::_on_new_interval()
+void PrimaryLogPG::plpg_on_new_interval()
 {
   dout(20) << __func__ << " checking missing set deletes flag. missing = " << pg_log.get_missing() << dendl;
+
   if (!pg_log.get_missing().may_include_deletes &&
       get_osdmap()->test_flag(CEPH_OSDMAP_RECOVERY_DELETES)) {
     pg_log.rebuild_missing_set_with_deletes(osd->store, ch, info);
@@ -12307,7 +12308,7 @@ void PrimaryLogPG::on_change(ObjectStore::Transaction *t)
   ceph_assert(objects_blocked_on_degraded_snap.empty());
 }
 
-void PrimaryLogPG::on_role_change()
+void PrimaryLogPG::plpg_on_role_change()
 {
   dout(10) << __func__ << dendl;
   if (get_role() != 0 && hit_set) {
index 910a45f8d4b0b42ecf26b8e4c90f1df0dae4fff1..c101df13cc6edbdd9fafd28f27aa7a6b6f40be7c 100644 (file)
@@ -1858,9 +1858,9 @@ public:
   void do_update_log_missing_reply(
     OpRequestRef &op);
 
-  void on_role_change() override;
+  void plpg_on_role_change() override;
   void on_pool_change() override;
-  void _on_new_interval() override;
+  void plpg_on_new_interval() override;
   void clear_async_reads();
   void on_change(ObjectStore::Transaction *t) override;
   void on_activate() override;
index 0e81292ad8cacff6f38cefd6e9fe1fe76895e9b7..5908a3e4824981a6fcc4cbbf0b96f15587cec3ca 100644 (file)
@@ -3850,7 +3850,7 @@ bool PastIntervals::check_new_interval(
   const OSDMap *osdmap,
   const OSDMap *lastmap,
   pg_t pgid,
-  IsPGRecoverablePredicate *could_have_gone_active,
+  IsPGRecoverablePredicate &could_have_gone_active,
   PastIntervals *past_intervals,
   std::ostream *out)
 {
@@ -3935,7 +3935,7 @@ bool PastIntervals::check_new_interval(
     if (num_acting &&
        i.primary != -1 &&
        num_acting >= old_pg_pool.min_size &&
-        (*could_have_gone_active)(old_acting_shards)) {
+        could_have_gone_active(old_acting_shards)) {
       if (out)
        *out << __func__ << " " << i
             << " up_thru " << lastmap->get_up_thru(i.primary)
index c0b9cf65d909b52dedb254ec4fb52d84cf9f8a8b..4fe9c352c1980e17cdb919f31eec0a0e1d040a6a 100644 (file)
@@ -3176,7 +3176,7 @@ public:
     const OSDMap *osdmap,      ///< [in] current map
     const OSDMap *lastmap,     ///< [in] last map
     pg_t pgid,                                  ///< [in] pgid for pg
-    IsPGRecoverablePredicate *could_have_gone_active, ///< [in] predicate whether the pg can be active
+    const IsPGRecoverablePredicate &could_have_gone_active, ///< [in] predicate whether the pg can be active
     PastIntervals *past_intervals,              ///< [out] intervals
     std::ostream *out = 0                            ///< [out] debug ostream
     );
@@ -3194,7 +3194,7 @@ public:
     std::shared_ptr<const OSDMap> osdmap,      ///< [in] current map
     std::shared_ptr<const OSDMap> lastmap,     ///< [in] last map
     pg_t pgid,                                  ///< [in] pgid for pg
-    IsPGRecoverablePredicate *could_have_gone_active, ///< [in] predicate whether the pg can be active
+    IsPGRecoverablePredicate &could_have_gone_active, ///< [in] predicate whether the pg can be active
     PastIntervals *past_intervals,              ///< [out] intervals
     std::ostream *out = 0                            ///< [out] debug ostream
     ) {