]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/: Move osdmap handling into PeeringState
authorsjust@redhat.com <sjust@redhat.com>
Fri, 22 Mar 2019 23:47:59 +0000 (16:47 -0700)
committersjust@redhat.com <sjust@redhat.com>
Wed, 1 May 2019 18:22:12 +0000 (11:22 -0700)
Signed-off-by: Samuel Just <sjust@redhat.com>
src/osd/PG.cc
src/osd/PG.h
src/osd/PeeringState.cc
src/osd/PeeringState.h

index ed052c63db31c12b096939279894886a01bccf39..66e3f26f5fe7594528947a0a794612ce406b9ce9 100644 (file)
@@ -177,91 +177,16 @@ void PG::dump_live_ids()
 }
 #endif
 
-
-void PGPool::update(CephContext *cct, OSDMapRef map)
-{
-  const pg_pool_t *pi = map->get_pg_pool(id);
-  if (!pi) {
-    return; // pool has been deleted
-  }
-  info = *pi;
-  name = map->get_pool_name(id);
-
-  bool updated = false;
-  if ((map->get_epoch() != cached_epoch + 1) ||
-      (pi->get_snap_epoch() == map->get_epoch())) {
-    updated = true;
-  }
-
-  if (map->require_osd_release >= CEPH_RELEASE_MIMIC) {
-    // mimic tracks removed_snaps_queue in the OSDmap and purged_snaps
-    // in the pg_info_t, with deltas for both in each OSDMap.  we don't
-    // need to (and can't) track it here.
-    cached_removed_snaps.clear();
-    newly_removed_snaps.clear();
-  } else {
-    // legacy (<= luminous) removed_snaps tracking
-    if (updated) {
-      if (pi->maybe_updated_removed_snaps(cached_removed_snaps)) {
-       pi->build_removed_snaps(newly_removed_snaps);
-       if (cached_removed_snaps.subset_of(newly_removed_snaps)) {
-          interval_set<snapid_t> removed_snaps = newly_removed_snaps;
-          newly_removed_snaps.subtract(cached_removed_snaps);
-          cached_removed_snaps.swap(removed_snaps);
-       } else {
-          lgeneric_subdout(cct, osd, 0) << __func__
-               << " cached_removed_snaps shrank from " << cached_removed_snaps
-               << " to " << newly_removed_snaps << dendl;
-          cached_removed_snaps.swap(newly_removed_snaps);
-          newly_removed_snaps.clear();
-       }
-      } else {
-       newly_removed_snaps.clear();
-      }
-    } else {
-      /* 1) map->get_epoch() == cached_epoch + 1 &&
-       * 2) pi->get_snap_epoch() != map->get_epoch()
-       *
-       * From the if branch, 1 && 2 must be true.  From 2, we know that
-       * this map didn't change the set of removed snaps.  From 1, we
-       * know that our cached_removed_snaps matches the previous map.
-       * Thus, from 1 && 2, cached_removed snaps matches the current
-       * set of removed snaps and all we have to do is clear
-       * newly_removed_snaps.
-       */
-      newly_removed_snaps.clear();
-    }
-    lgeneric_subdout(cct, osd, 20)
-      << "PGPool::update cached_removed_snaps "
-      << cached_removed_snaps
-      << " newly_removed_snaps "
-      << newly_removed_snaps
-      << " snapc " << snapc
-      << (updated ? " (updated)":" (no change)")
-      << dendl;
-    if (cct->_conf->osd_debug_verify_cached_snaps) {
-      interval_set<snapid_t> actual_removed_snaps;
-      pi->build_removed_snaps(actual_removed_snaps);
-      if (!(actual_removed_snaps == cached_removed_snaps)) {
-       lgeneric_derr(cct) << __func__
-                  << ": mismatch between the actual removed snaps "
-                  << actual_removed_snaps
-                  << " and pool.cached_removed_snaps "
-                  << " pool.cached_removed_snaps " << cached_removed_snaps
-                  << dendl;
-      }
-      ceph_assert(actual_removed_snaps == cached_removed_snaps);
-    }
-  }
-  if (info.is_pool_snaps_mode() && updated) {
-    snapc = pi->get_snap_context();
-  }
-  cached_epoch = map->get_epoch();
-}
-
 PG::PG(OSDService *o, OSDMapRef curmap,
        const PGPool &_pool, spg_t p) :
-  recovery_state(cct, p, this, this, this),
+  recovery_state(
+    cct,
+    p,
+    _pool,
+    curmap,
+    this,
+    this,
+    this),
   role(recovery_state.role),
   state(recovery_state.state),
   primary(recovery_state.primary),
@@ -311,8 +236,7 @@ PG::PG(OSDService *o, OSDMapRef curmap,
   coll(p),
   osd(o),
   cct(o->cct),
-  osdmap_ref(curmap),
-  pool(_pool),
+  pool(recovery_state.get_pool()),
   osdriver(osd->store, coll_t(), OSD::make_snapmapper_oid()),
   snap_mapper(
     cct,
@@ -321,7 +245,6 @@ PG::PG(OSDService *o, OSDMapRef curmap,
     p.get_split_bits(_pool.info.get_pg_num()),
     _pool.id,
     p.shard),
-  last_persisted_osdmap(curmap->get_epoch()),
   trace_endpoint("0.0.0.0", 0, "PG"),
   info_struct_v(0),
   pgmeta_oid(p.make_pgmeta_oid()),
@@ -370,7 +293,7 @@ void PG::lock(bool no_lockdep) const
 
 std::ostream& PG::gen_prefix(std::ostream& out) const
 {
-  OSDMapRef mapref = osdmap_ref;
+  OSDMapRef mapref = recovery_state.get_osdmap();
   if (_lock.is_locked_by_me()) {
     out << "osd." << osd->whoami
        << " pg_epoch: " << (mapref ? mapref->get_epoch():0)
@@ -378,7 +301,7 @@ std::ostream& PG::gen_prefix(std::ostream& out) const
   } else {
     out << "osd." << osd->whoami
        << " pg_epoch: " << (mapref ? mapref->get_epoch():0)
-       << " pg[" << info.pgid << "(unlocked)] ";
+       << " pg[" << pg_id.pgid << "(unlocked)] ";
   }
   return out;
 }
@@ -2628,9 +2551,9 @@ void PG::finish_recovery_op(const hobject_t& soid, bool dequeue)
 void PG::split_into(pg_t child_pgid, PG *child, unsigned split_bits)
 {
   child->update_snap_mapper_bits(split_bits);
-  child->update_osdmap_ref(get_osdmap());
+  child->recovery_state.update_osdmap_ref(get_osdmap());
 
-  child->pool = pool;
+  child->recovery_state.pool = pool;
 
   // Log
   pg_log.split_into(child_pgid, split_bits, &(child->pg_log));
@@ -3792,12 +3715,36 @@ void PG::_init(ObjectStore::Transaction& t, spg_t pgid, const pg_pool_t *pool)
   t.omap_setkeys(coll, pgmeta_oid, values);
 }
 
-void PG::prepare_write_info(map<string,bufferlist> *km)
+void PG::prepare_write(
+  pg_info_t &info,
+  PGLog &pglog,
+  bool dirty_info,
+  bool dirty_big_info,
+  bool need_write_epoch,
+  ObjectStore::Transaction &t)
 {
   info.stats.stats.add(unstable_stats);
   unstable_stats.clear();
+  map<string,bufferlist> km;
+  if (dirty_big_info || dirty_info) {
+    prepare_write_info(
+      dirty_info,
+      dirty_big_info,
+      need_write_epoch,
+      &km);
+  }
+  pg_log.write_log_and_missing(
+    t, &km, coll, pgmeta_oid, pool.info.require_rollback());
+  if (!km.empty())
+    t.omap_setkeys(coll, pgmeta_oid, km);
+}
 
-  bool need_update_epoch = last_epoch < get_osdmap_epoch();
+void PG::prepare_write_info(
+  bool dirty_info,
+  bool dirty_big_info,
+  bool need_update_epoch,
+  map<string,bufferlist> *km)
+{
   int ret = _prepare_write_info(cct, km, get_osdmap_epoch(),
                                info,
                                last_written_info,
@@ -3806,9 +3753,6 @@ void PG::prepare_write_info(map<string,bufferlist> *km)
                                cct->_conf->osd_fast_info,
                                osd->logger);
   ceph_assert(ret == 0);
-  if (need_update_epoch)
-    last_epoch = get_osdmap_epoch();
-  last_persisted_osdmap = last_epoch;
 
   dirty_info = false;
   dirty_big_info = false;
@@ -3881,16 +3825,6 @@ int PG::peek_map_epoch(ObjectStore *store,
 #pragma GCC diagnostic pop
 #pragma GCC diagnostic warning "-Wpragmas"
 
-void PG::write_if_dirty(ObjectStore::Transaction& t)
-{
-  map<string,bufferlist> km;
-  if (dirty_big_info || dirty_info)
-    prepare_write_info(&km);
-  pg_log.write_log_and_missing(t, &km, coll, pgmeta_oid, pool.info.require_rollback());
-  if (!km.empty())
-    t.omap_setkeys(coll, pgmeta_oid, km);
-}
-
 void PG::add_log_entry(const pg_log_entry_t& e, bool applied)
 {
   // raise last_complete only if we were previously up to date
@@ -6845,48 +6779,26 @@ void PG::handle_advance_map(
   vector<int>& newacting, int acting_primary,
   PeeringCtx *rctx)
 {
-  ceph_assert(lastmap->get_epoch() == osdmap_ref->get_epoch());
-  ceph_assert(lastmap == osdmap_ref);
-  dout(10) << "handle_advance_map "
-          << newup << "/" << newacting
-          << " -- " << up_primary << "/" << acting_primary
-          << dendl;
-  update_osdmap_ref(osdmap);
+  dout(10) << __func__ << ": " << osdmap->get_epoch() << dendl;
   osd_shard->update_pg_epoch(pg_slot, osdmap->get_epoch());
-
-  pool.update(cct, osdmap);
-
-  PeeringState::AdvMap evt(
-    osdmap, lastmap, newup, up_primary,
-    newacting, acting_primary);
-  recovery_state.handle_event(evt, rctx);
-  if (pool.info.last_change == osdmap_ref->get_epoch()) {
-    on_pool_change();
-    update_store_with_options();
-  }
-  last_require_osd_release = osdmap->require_osd_release;
+  recovery_state.advance_map(
+    osdmap,
+    lastmap,
+    newup,
+    up_primary,
+    newacting,
+    acting_primary,
+    rctx);
 }
 
 void PG::handle_activate_map(PeeringCtx *rctx)
 {
-  dout(10) << "handle_activate_map " << dendl;
-  PeeringState::ActMap evt;
-  recovery_state.handle_event(evt, rctx);
-  if (osdmap_ref->get_epoch() - last_persisted_osdmap >
-    cct->_conf->osd_pg_epoch_persisted_max_stale) {
-    dout(20) << __func__ << ": Dirtying info: last_persisted is "
-            << last_persisted_osdmap
-            << " while current is " << osdmap_ref->get_epoch() << dendl;
-    dirty_info = true;
-  } else {
-    dout(20) << __func__ << ": Not dirtying info: last_persisted is "
-            << last_persisted_osdmap
-            << " while current is " << osdmap_ref->get_epoch() << dendl;
-  }
-  if (osdmap_ref->check_new_blacklist_entries()) {
+  dout(10) << __func__ << ": " << get_osdmap()->get_epoch()
+          << dendl;
+  recovery_state.activate_map(rctx);
+  if (get_osdmap()->check_new_blacklist_entries()) {
     check_blacklisted_watchers();
   }
-  write_if_dirty(*rctx->transaction);
 }
 
 void PG::handle_initialize(PeeringCtx *rctx)
@@ -6903,9 +6815,9 @@ void PG::handle_query_state(Formatter *f)
   recovery_state.handle_event(q, 0);
 }
 
-void PG::update_store_with_options()
+void PG::update_store_with_options(const pool_opts_t &opts)
 {
-  auto r = osd->store->set_collection_opts(ch, pool.info.opts);
+  auto r = osd->store->set_collection_opts(ch, opts);
   if(r < 0 && r != -EOPNOTSUPP) {
     derr << __func__ << " set_collection_opts returns error:" << r << dendl;
   }
@@ -7009,7 +6921,7 @@ void PG::_delete_some(ObjectStore::Transaction *t)
              info.pgid,
              info.pgid.get_split_bits(pool.info.get_pg_num()));
       _init(*t, info.pgid, &pool.info);
-      last_epoch = 0;  // to ensure pg epoch is also written
+      recovery_state.reset_last_persisted();
       dirty_info = true;
       dirty_big_info = true;
     } else {
index 6d744c5d001f2389f0242b960376ef90a67404ee..45d77170abcae87f23765e420e5ac59e85482df2 100644 (file)
@@ -163,35 +163,6 @@ class PGRecoveryStats {
   }
 };
 
-struct PGPool {
-  CephContext* cct;
-  epoch_t cached_epoch;
-  int64_t id;
-  string name;
-
-  pg_pool_t info;      
-  SnapContext snapc;   // the default pool snapc, ready to go.
-
-  // these two sets are for < mimic only
-  interval_set<snapid_t> cached_removed_snaps;      // current removed_snaps set
-  interval_set<snapid_t> newly_removed_snaps;  // newly removed in the last epoch
-
-  PGPool(CephContext* cct, OSDMapRef map, int64_t i, const pg_pool_t& info,
-        const string& name)
-    : cct(cct),
-      cached_epoch(map->get_epoch()),
-      id(i),
-      name(name),
-      info(info) {
-    snapc = info.get_snap_context();
-    if (map->require_osd_release < CEPH_RELEASE_MIMIC) {
-      info.build_removed_snaps(cached_removed_snaps);
-    }
-  }
-
-  void update(CephContext *cct, OSDMapRef map);
-};
-
 /** PG - Replica Placement Group
  *
  */
@@ -273,11 +244,11 @@ public:
 
   const OSDMapRef& get_osdmap() const {
     ceph_assert(is_locked());
-    ceph_assert(osdmap_ref);
-    return osdmap_ref;
+    return recovery_state.get_osdmap();
   }
+
   epoch_t get_osdmap_epoch() const override {
-    return osdmap_ref->get_epoch();
+    return recovery_state.get_osdmap()->get_epoch();
   }
 
   void lock_suspend_timeout(ThreadPool::TPHandle &handle) {
@@ -539,10 +510,7 @@ public:
 protected:
   CephContext *cct;
 
-  // osdmap
-  OSDMapRef osdmap_ref;
-
-  PGPool pool;
+  const PGPool &pool;
 
   // locking and reference counting.
   // I destroy myself when the reference count hits zero.
@@ -590,15 +558,8 @@ protected:
     return get_pgbackend()->get_is_recoverable_predicate();
   }
 protected:
-  epoch_t last_persisted_osdmap;
-
   void requeue_map_waiters();
 
-  void update_osdmap_ref(OSDMapRef newmap) {
-    ceph_assert(_lock.is_locked_by_me());
-    osdmap_ref = std::move(newmap);
-  }
-
 protected:
 
   ZTracer::Endpoint trace_endpoint;
@@ -1811,8 +1772,6 @@ protected:
 public:
   int pg_stat_adjust(osd_stat_t *new_stat);
 protected:
-  epoch_t last_epoch;
-
   bool delete_needs_sleep = false;
 
 protected:
@@ -1919,12 +1878,22 @@ public:
   static void _init(ObjectStore::Transaction& t,
                    spg_t pgid, const pg_pool_t *pool);
 
-protected:
-  void prepare_write_info(map<string,bufferlist> *km);
+  virtual void prepare_write(
+    pg_info_t &info,
+    PGLog &pglog,
+    bool dirty_info,
+    bool dirty_big_info,
+    bool need_write_epoch,
+    ObjectStore::Transaction &t) override;
 
-  void update_store_with_options();
+  void prepare_write_info(
+    bool dirty_info,
+    bool dirty_big_info,
+    bool need_update_epoch,
+    map<string,bufferlist> *km);
+
+  void update_store_with_options(const pool_opts_t &opts) override;
 
-public:
   static int _prepare_write_info(
     CephContext* cct,
     map<string,bufferlist> *km,
@@ -1941,7 +1910,9 @@ public:
     write_if_dirty(*rctx->transaction);
   }
 protected:
-  void write_if_dirty(ObjectStore::Transaction& t);
+  void write_if_dirty(ObjectStore::Transaction& t) {
+    recovery_state.write_if_dirty(t);
+  }
 
   PGLog::IndexedLog projected_log;
   bool check_in_progress_op(
@@ -2063,13 +2034,6 @@ protected:
   // abstract bits
   friend class FlushState;
 
-  virtual void on_role_change() = 0;
-  virtual void on_pool_change() = 0;
-  virtual void on_change(ObjectStore::Transaction *t) = 0;
-  virtual void on_activate() = 0;
-  virtual void on_flushed() = 0;
-  virtual void check_blacklisted_watchers() = 0;
-
   friend ostream& operator<<(ostream& out, const PG& pg);
 };
 
index 640db3fa70c272769784a4c621aea9847fe1d663..3eb88eeba820eda273e944553e5b799fea4701ab 100644 (file)
 #include "messages/MRecoveryReserve.h"
 #include "messages/MOSDScrubReserve.h"
 
+#define dout_context cct
+#define dout_subsys ceph_subsys_osd
+
+void PGPool::update(CephContext *cct, OSDMapRef map)
+{
+  const pg_pool_t *pi = map->get_pg_pool(id);
+  if (!pi) {
+    return; // pool has been deleted
+  }
+  info = *pi;
+  name = map->get_pool_name(id);
+
+  bool updated = false;
+  if ((map->get_epoch() != cached_epoch + 1) ||
+      (pi->get_snap_epoch() == map->get_epoch())) {
+    updated = true;
+  }
+
+  if (map->require_osd_release >= CEPH_RELEASE_MIMIC) {
+    // mimic tracks removed_snaps_queue in the OSDmap and purged_snaps
+    // in the pg_info_t, with deltas for both in each OSDMap.  we don't
+    // need to (and can't) track it here.
+    cached_removed_snaps.clear();
+    newly_removed_snaps.clear();
+  } else {
+    // legacy (<= luminous) removed_snaps tracking
+    if (updated) {
+      if (pi->maybe_updated_removed_snaps(cached_removed_snaps)) {
+       pi->build_removed_snaps(newly_removed_snaps);
+       if (cached_removed_snaps.subset_of(newly_removed_snaps)) {
+          interval_set<snapid_t> removed_snaps = newly_removed_snaps;
+          newly_removed_snaps.subtract(cached_removed_snaps);
+          cached_removed_snaps.swap(removed_snaps);
+       } else {
+          lgeneric_subdout(cct, osd, 0) << __func__
+               << " cached_removed_snaps shrank from " << cached_removed_snaps
+               << " to " << newly_removed_snaps << dendl;
+          cached_removed_snaps.swap(newly_removed_snaps);
+          newly_removed_snaps.clear();
+       }
+      } else {
+       newly_removed_snaps.clear();
+      }
+    } else {
+      /* 1) map->get_epoch() == cached_epoch + 1 &&
+       * 2) pi->get_snap_epoch() != map->get_epoch()
+       *
+       * From the if branch, 1 && 2 must be true.  From 2, we know that
+       * this map didn't change the set of removed snaps.  From 1, we
+       * know that our cached_removed_snaps matches the previous map.
+       * Thus, from 1 && 2, cached_removed snaps matches the current
+       * set of removed snaps and all we have to do is clear
+       * newly_removed_snaps.
+       */
+      newly_removed_snaps.clear();
+    }
+    lgeneric_subdout(cct, osd, 20)
+      << "PGPool::update cached_removed_snaps "
+      << cached_removed_snaps
+      << " newly_removed_snaps "
+      << newly_removed_snaps
+      << " snapc " << snapc
+      << (updated ? " (updated)":" (no change)")
+      << dendl;
+    if (cct->_conf->osd_debug_verify_cached_snaps) {
+      interval_set<snapid_t> actual_removed_snaps;
+      pi->build_removed_snaps(actual_removed_snaps);
+      if (!(actual_removed_snaps == cached_removed_snaps)) {
+       lgeneric_derr(cct) << __func__
+                  << ": mismatch between the actual removed snaps "
+                  << actual_removed_snaps
+                  << " and pool.cached_removed_snaps "
+                  << " pool.cached_removed_snaps " << cached_removed_snaps
+                  << dendl;
+      }
+      ceph_assert(actual_removed_snaps == cached_removed_snaps);
+    }
+  }
+  if (info.is_pool_snaps_mode() && updated) {
+    snapc = pi->get_snap_context();
+  }
+  cached_epoch = map->get_epoch();
+}
+
+void PeeringState::PeeringMachine::send_query(
+  pg_shard_t to, const pg_query_t &query) {
+  ceph_assert(state->rctx);
+  ceph_assert(state->rctx->query_map);
+  (*state->rctx->query_map)[to.osd][
+    spg_t(context< PeeringMachine >().spgid.pgid, to.shard)] = query;
+}
+
+/*-------------Peering State Helpers----------------*/
+#undef dout_prefix
+#define dout_prefix (dpp->gen_prefix(*_dout))
+#undef psdout
+#define psdout(x) ldout(cct, x)
+
 PeeringState::PeeringState(
   CephContext *cct,
   spg_t spgid,
+  const PGPool &_pool,
+  OSDMapRef curmap,
   DoutPrefixProvider *dpp,
   PeeringListener *pl,
   PG *pg)
@@ -24,27 +124,77 @@ PeeringState::PeeringState(
     pl(pl),
     pg(pg),
     orig_ctx(0),
+    osdmap_ref(curmap),
+    pool(_pool),
     info(spgid),
     pg_log(cct) {
   machine.initiate();
 }
 
-
-void PeeringState::PeeringMachine::send_query(
-  pg_shard_t to, const pg_query_t &query) {
-  ceph_assert(state->rctx);
-  ceph_assert(state->rctx->query_map);
-  (*state->rctx->query_map)[to.osd][
-    spg_t(context< PeeringMachine >().spgid.pgid, to.shard)] = query;
+void PeeringState::write_if_dirty(ObjectStore::Transaction& t)
+{
+  pl->prepare_write(
+    info,
+    pg_log,
+    dirty_info,
+    dirty_big_info,
+    last_persisted_osdmap < get_osdmap_epoch(),
+    t);
+  last_persisted_osdmap = get_osdmap_epoch();
+}
+
+void PeeringState::advance_map(
+  OSDMapRef osdmap, OSDMapRef lastmap,
+  vector<int>& newup, int up_primary,
+  vector<int>& newacting, int acting_primary,
+  PeeringCtx *rctx)
+{
+  ceph_assert(lastmap->get_epoch() == osdmap_ref->get_epoch());
+  ceph_assert(lastmap == osdmap_ref);
+  psdout(10) << "handle_advance_map "
+           << newup << "/" << newacting
+           << " -- " << up_primary << "/" << acting_primary
+           << dendl;
+
+  update_osdmap_ref(osdmap);
+  pool.update(cct, osdmap);
+
+  AdvMap evt(
+    osdmap, lastmap, newup, up_primary,
+    newacting, acting_primary);
+  handle_event(evt, rctx);
+  if (pool.info.last_change == osdmap_ref->get_epoch()) {
+    pl->on_pool_change();
+    pl->update_store_with_options(pool.info.opts);
+  }
+  last_require_osd_release = osdmap->require_osd_release;
+}
+
+void PeeringState::activate_map(PeeringCtx *rctx)
+{
+  psdout(10) << __func__ << dendl;
+  ActMap evt;
+  handle_event(evt, rctx);
+  if (osdmap_ref->get_epoch() - last_persisted_osdmap >
+    cct->_conf->osd_pg_epoch_persisted_max_stale) {
+    psdout(20) << __func__ << ": Dirtying info: last_persisted is "
+             << last_persisted_osdmap
+             << " while current is " << osdmap_ref->get_epoch() << dendl;
+    dirty_info = true;
+  } else {
+    psdout(20) << __func__ << ": Not dirtying info: last_persisted is "
+             << last_persisted_osdmap
+             << " while current is " << osdmap_ref->get_epoch() << dendl;
+  }
+  write_if_dirty(*rctx->transaction);
 }
 
 
 /*------------ Peering State Machine----------------*/
-#define dout_context cct
-#define dout_subsys ceph_subsys_osd
 #undef dout_prefix
 #define dout_prefix (context< PeeringMachine >().dpp->gen_prefix(*_dout) \
-                    << "state<" << get_state_name() << ">: ")
+                    << "state<" << get_state_name() << ">: ")
+#undef psdout
 #define psdout(x) ldout(context< PeeringMachine >().cct, x)
 
 /*------Crashed-------*/
index fe143a29e0cd69c3532916b67f644bc252c9bf51..fe2be32448279f6b1963abbda8e413d452a33f51 100644 (file)
 #include "PGLog.h"
 #include "PGStateUtils.h"
 #include "PGPeeringEvent.h"
+#include "osd_types.h"
 #include "os/ObjectStore.h"
 #include "OSDMap.h"
 
 class PG;
 
+struct PGPool {
+  CephContext* cct;
+  epoch_t cached_epoch;
+  int64_t id;
+  string name;
+
+  pg_pool_t info;
+  SnapContext snapc;   // the default pool snapc, ready to go.
+
+  // these two sets are for < mimic only
+  interval_set<snapid_t> cached_removed_snaps;      // current removed_snaps set
+  interval_set<snapid_t> newly_removed_snaps;  // newly removed in the last epoch
+
+  PGPool(CephContext* cct, OSDMapRef map, int64_t i, const pg_pool_t& info,
+        const string& name)
+    : cct(cct),
+      cached_epoch(map->get_epoch()),
+      id(i),
+      name(name),
+      info(info) {
+    snapc = info.get_snap_context();
+    if (map->require_osd_release < CEPH_RELEASE_MIMIC) {
+      info.build_removed_snaps(cached_removed_snaps);
+    }
+  }
+
+  void update(CephContext *cct, OSDMapRef map);
+};
+
   /* Encapsulates PG recovery process */
 class PeeringState {
 public:
   struct PeeringListener : public EpochSource {
+    virtual void prepare_write(
+      pg_info_t &info,
+      PGLog &pglog,
+      bool dirty_info,
+      bool dirty_big_info,
+      bool need_write_epoch,
+      ObjectStore::Transaction &t) = 0;
+    virtual void update_store_with_options(const pool_opts_t &opts) = 0;
+
+    virtual void on_pool_change() = 0;
+    virtual void on_role_change() = 0;
+    virtual void on_change(ObjectStore::Transaction *t) = 0;
+    virtual void on_activate() = 0;
+    virtual void on_flushed() = 0;
+    virtual void check_blacklisted_watchers() = 0;
     virtual ~PeeringListener() {}
   };
 
@@ -989,6 +1034,14 @@ public:
   boost::optional<PeeringCtx> rctx;
 
 public:
+  /**
+   * OSDMap state
+   */
+  OSDMapRef osdmap_ref;              ///< Reference to current OSDMap
+  PGPool pool;                       ///< Current pool state
+  epoch_t last_persisted_osdmap = 0; ///< Last osdmap epoch persisted
+
+
   /**
    * Peering state information
    */
@@ -1075,10 +1128,18 @@ public:
   bool deleting = false;  /// true while in removing or OSD is shutting down
   atomic<bool> deleted = {false}; /// true once deletion complete
 
+  void update_osdmap_ref(OSDMapRef newmap) {
+    osdmap_ref = std::move(newmap);
+  }
+
+  void write_if_dirty(ObjectStore::Transaction& t);
+
 public:
   PeeringState(
     CephContext *cct,
     spg_t spgid,
+    const PGPool &pool,
+    OSDMapRef curmap,
     DoutPrefixProvider *dpp,
     PeeringListener *pl,
     PG *pg);
@@ -1109,4 +1170,40 @@ public:
     return last_peering_reset;
   }
 
+  /// Returns stable reference to internal pool structure
+  const PGPool &get_pool() const {
+    return pool;
+  }
+
+  /// Returns reference to current osdmap
+  const OSDMapRef &get_osdmap() const {
+    ceph_assert(osdmap_ref);
+    return osdmap_ref;
+  }
+
+  /// Returns epoch of current osdmap
+  epoch_t get_osdmap_epoch() const {
+    return get_osdmap()->get_epoch();
+  }
+
+  /// Updates peering state with new map
+  void advance_map(
+    OSDMapRef osdmap,       ///< [in] new osdmap
+    OSDMapRef lastmap,      ///< [in] prev osdmap
+    vector<int>& newup,     ///< [in] new up set
+    int up_primary,         ///< [in] new up primary
+    vector<int>& newacting, ///< [in] new acting
+    int acting_primary,     ///< [in] new acting primary
+    PeeringCtx *rctx        ///< [out] recovery context
+    );
+
+  /// Activates most recently updated map
+  void activate_map(
+    PeeringCtx *rctx        ///< [out] recovery context
+    );
+
+  /// resets last_persisted_osdmap
+  void reset_last_persisted() {
+    last_persisted_osdmap = 0;
+  }
 };