osd/: move most peering state to PeeringState

author sjust@redhat.com <sjust@redhat.com>

Fri, 22 Mar 2019 23:19:18 +0000 (16:19 -0700)

committer sjust@redhat.com <sjust@redhat.com>

Wed, 1 May 2019 18:22:12 +0000 (11:22 -0700)
author sjust@redhat.com <sjust@redhat.com>
Fri, 22 Mar 2019 23:19:18 +0000 (16:19 -0700)
committer sjust@redhat.com <sjust@redhat.com>
Wed, 1 May 2019 18:22:12 +0000 (11:22 -0700)
diff --git a/src/osd/PG.cc b/src/osd/PG.cc

index e15809b3154a86400bfd555d9270bb9eae364c78..ed052c63db31c12b096939279894886a01bccf39 100644 (file)
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -261,6 +261,52 @@ void PGPool::update(CephContext *cct, OSDMapRef map)
  
  PG::PG(OSDService *o, OSDMapRef curmap,
         const PGPool &_pool, spg_t p) :
+  recovery_state(cct, p, this, this, this),
+  role(recovery_state.role),
+  state(recovery_state.state),
+  primary(recovery_state.primary),
+  pg_whoami(recovery_state.pg_whoami),
+  up_primary(recovery_state.up_primary),
+  up(recovery_state.up),
+  upset(recovery_state.upset),
+  acting(recovery_state.acting),
+  actingset(recovery_state.actingset),
+  acting_recovery_backfill(recovery_state.acting_recovery_backfill),
+  send_notify(recovery_state.send_notify),
+  dirty_info(recovery_state.dirty_info),
+  dirty_big_info(recovery_state.dirty_big_info),
+  info(recovery_state.info),
+  last_written_info(recovery_state.last_written_info),
+  past_intervals(recovery_state.past_intervals),
+  pg_log(recovery_state.pg_log),
+  last_peering_reset(recovery_state.last_peering_reset),
+  last_update_ondisk(recovery_state.last_update_ondisk),
+  last_complete_ondisk(recovery_state.last_complete_ondisk),
+  last_update_applied(recovery_state.last_update_applied),
+  last_rollback_info_trimmed_to_applied(recovery_state.last_rollback_info_trimmed_to_applied),
+  flushes_in_progress(recovery_state.flushes_in_progress),
+  stray_set(recovery_state.stray_set),
+  peer_info(recovery_state.peer_info),
+  peer_bytes(recovery_state.peer_bytes),
+  peer_purged(recovery_state.peer_purged),
+  peer_missing(recovery_state.peer_missing),
+  peer_log_requested(recovery_state.peer_log_requested),
+  peer_missing_requested(recovery_state.peer_missing_requested),
+  peer_features(recovery_state.peer_features),
+  acting_features(recovery_state.acting_features),
+  upacting_features(recovery_state.upacting_features),
+  last_require_osd_release(recovery_state.last_require_osd_release),
+  want_acting(recovery_state.want_acting),
+  peer_last_complete_ondisk(recovery_state.peer_last_complete_ondisk),
+  min_last_complete_ondisk(recovery_state.min_last_complete_ondisk),
+  pg_trim_to(recovery_state.pg_trim_to),
+  blocked_by(recovery_state.blocked_by),
+  peer_activated(recovery_state.peer_activated),
+  backfill_targets(recovery_state.backfill_targets),
+  async_recovery_targets(recovery_state.async_recovery_targets),
+  might_have_unfound(recovery_state.might_have_unfound),
+  deleting(recovery_state.deleting),
+  deleted(recovery_state.deleted),
    pg_id(p),
    coll(p),
    osd(o),
@@ -276,40 +322,24 @@ PG::PG(OSDService *o, OSDMapRef curmap,
      _pool.id,
      p.shard),
    last_persisted_osdmap(curmap->get_epoch()),
-  deleting(false),
    trace_endpoint("0.0.0.0", 0, "PG"),
-  dirty_info(false), dirty_big_info(false),
-  info(p),
    info_struct_v(0),
-  pg_log(cct),
    pgmeta_oid(p.make_pgmeta_oid()),
    missing_loc(this),
    stat_queue_item(this),
    scrub_queued(false),
    recovery_queued(false),
    recovery_ops_active(0),
-  role(-1),
-  state(0),
-  send_notify(false),
-  pg_whoami(osd->whoami, p.shard),
    need_up_thru(false),
-  last_peering_reset(0),
    heartbeat_peer_lock("PG::heartbeat_peer_lock"),
    backfill_reserved(false),
    backfill_reserving(false),
-  flushes_in_progress(0),
    pg_stats_publish_lock("PG::pg_stats_publish_lock"),
    pg_stats_publish_valid(false),
    finish_sync_event(NULL),
    backoff_lock("PG::backoff_lock"),
    scrub_after_recovery(false),
-  active_pushes(0),
-  recovery_state(cct, info.pgid, this, this, this),
-  peer_features(CEPH_FEATURES_SUPPORTED_DEFAULT),
-  acting_features(CEPH_FEATURES_SUPPORTED_DEFAULT),
-  upacting_features(CEPH_FEATURES_SUPPORTED_DEFAULT),
-  last_epoch(0),
-  last_require_osd_release(curmap->require_osd_release)
+  active_pushes(0)
  {
  #ifdef PG_DEBUG_REFS
    osd->add_pgid(p, this);
diff --git a/src/osd/PG.h b/src/osd/PG.h

index 2a1d7f4df3dbe0962e19bd627d937ac7bd1e95aa..6d744c5d001f2389f0242b960376ef90a67404ee 100644 (file)
--- a/src/osd/PG.h
+++ b/src/osd/PG.h
@@ -199,9 +199,63 @@ struct PGPool {
  class PG : public DoutPrefixProvider, public PeeringState::PeeringListener {
    friend class NamedState;
    friend class PeeringState;
+
+  PeeringState recovery_state;
  public:
    using PeeringCtx = PeeringState::PeeringCtx;
  
+
+protected:
+  /**
+   * Peering state information being moved to PeeringState
+   */
+  int &role;
+  uint64_t &state;
+  pg_shard_t &primary;
+  pg_shard_t &pg_whoami;
+  pg_shard_t &up_primary;
+  vector<int> &up;
+  set<pg_shard_t> &upset;
+  vector<int> &acting;
+  set<pg_shard_t> &actingset;
+  set<pg_shard_t> &acting_recovery_backfill;
+  bool &send_notify;
+  bool &dirty_info;
+  bool &dirty_big_info;
+  pg_info_t &info;
+  pg_info_t &last_written_info;
+  PastIntervals &past_intervals;
+  PGLog &pg_log;
+  epoch_t &last_peering_reset;
+  eversion_t &last_update_ondisk;
+  eversion_t &last_complete_ondisk;
+  eversion_t &last_update_applied;
+  eversion_t &last_rollback_info_trimmed_to_applied;
+  unsigned &flushes_in_progress;
+  set<pg_shard_t> &stray_set;
+  map<pg_shard_t, pg_info_t> &peer_info;
+  map<pg_shard_t, int64_t> &peer_bytes;
+  set<pg_shard_t> &peer_purged;
+  map<pg_shard_t, pg_missing_t> &peer_missing;
+  set<pg_shard_t> &peer_log_requested;
+  set<pg_shard_t> &peer_missing_requested;
+  uint64_t &peer_features;
+  uint64_t &acting_features;
+  uint64_t &upacting_features;
+  unsigned &last_require_osd_release;
+  vector<int> &want_acting;
+  map<pg_shard_t,eversion_t> &peer_last_complete_ondisk;
+  eversion_t &min_last_complete_ondisk;
+  eversion_t &pg_trim_to;
+  set<int> &blocked_by;
+  set<pg_shard_t> &peer_activated;
+  set<pg_shard_t> &backfill_targets;
+  set<pg_shard_t> &async_recovery_targets;
+  set<pg_shard_t> &might_have_unfound;
+  bool &deleting;
+  atomic<bool> &deleted;
+
+public:
    // -- members --
    const spg_t pg_id;
    const coll_t coll;
@@ -547,20 +601,10 @@ protected:
  
  protected:
  
-
-  bool deleting;  // true while in removing or OSD is shutting down
-  atomic<bool> deleted = {false};
-
    ZTracer::Endpoint trace_endpoint;
  
  
  protected:
-  bool dirty_info, dirty_big_info;
-
-protected:
-  // pg state
-  pg_info_t info;               ///< current pg info
-  pg_info_t last_written_info;  ///< last written info
    __u8 info_struct_v = 0;
    static const __u8 latest_struct_v = 10;
    // v10 is the new past_intervals encoding
@@ -572,7 +616,6 @@ protected:
    void upgrade(ObjectStore *store);
  
  protected:
-  PGLog  pg_log;
    ghobject_t    pgmeta_oid;
  
    // ------------------
@@ -601,7 +644,6 @@ protected:
  
  
    private:
-
      loc_count_t _get_count(const set<pg_shard_t>& shards) {
        loc_count_t r;
        for (auto s : shards) {
@@ -893,8 +935,6 @@ protected:
      }
    } missing_loc;
    
-  PastIntervals past_intervals;
-
    interval_set<snapid_t> snap_trimq;
  
    /* You should not use these items without taking their respective queue locks
@@ -909,60 +949,11 @@ protected:
    multiset<hobject_t> recovering_oids;
  #endif
  
-protected:
-  int         role;    // 0 = primary, 1 = replica, -1=none.
-  uint64_t    state;   // PG_STATE_*
-
-  bool send_notify;    ///< true if we are non-primary and should notify the primary
-
-protected:
-  eversion_t  last_update_ondisk;    // last_update that has committed; ONLY DEFINED WHEN is_active()
-  eversion_t  last_complete_ondisk;  // last_complete that has committed.
-  eversion_t  last_update_applied;
-
-  // entries <= last_rollback_info_trimmed_to_applied have been trimmed
-  eversion_t  last_rollback_info_trimmed_to_applied;
-
-  // primary state
-protected:
-  pg_shard_t primary;
-  pg_shard_t pg_whoami;
-  pg_shard_t up_primary;
-  vector<int> up, acting, want_acting;
-  // acting_recovery_backfill contains shards that are acting,
-  // async recovery targets, or backfill targets.
-  set<pg_shard_t> acting_recovery_backfill, actingset, upset;
-  map<pg_shard_t,eversion_t> peer_last_complete_ondisk;
-  eversion_t  min_last_complete_ondisk;  // up: min over last_complete_ondisk, peer_last_complete_ondisk
-  eversion_t  pg_trim_to;
-
-  set<int> blocked_by; ///< osds we are blocked by (for pg stats)
-
  public:
    bool dne() { return info.dne(); }
  
  protected:
-  /*
-   * peer_info    -- projected (updates _before_ replicas ack)
-   * peer_missing -- committed (updates _after_ replicas ack)
-   */
-  
-  bool        need_up_thru;
-  set<pg_shard_t>    stray_set;   // non-acting osds that have PG data.
-  map<pg_shard_t, pg_info_t>    peer_info;   // info from peers (stray or prior)
-  map<pg_shard_t, int64_t>    peer_bytes;   // Peer's num_bytes from peer_info
-  set<pg_shard_t> peer_purged; // peers purged
-  map<pg_shard_t, pg_missing_t> peer_missing;
-  set<pg_shard_t> peer_log_requested;  // logs i've requested (and start stamps)
-  set<pg_shard_t> peer_missing_requested;
-
-  // i deleted these strays; ignore racing PGInfo from them
-  set<pg_shard_t> peer_activated;
-
-  // primary-only, recovery-only state
-  set<pg_shard_t> might_have_unfound;  // These osds might have objects on them
-                                       // which are unfound on the primary
-  epoch_t last_peering_reset;
+  bool need_up_thru; ///< Flag indicating that this pg needs up through published
  
    epoch_t get_last_peering_reset() const {
      return last_peering_reset;
@@ -1067,8 +1058,6 @@ protected:
    bool backfill_reserved;
    bool backfill_reserving;
  
-  set<pg_shard_t> backfill_targets,  async_recovery_targets;
-
    // The primary's num_bytes and local num_bytes for this pg, only valid
    // during backfill for non-primary shards.
    // Both of these are adjusted for EC to reflect the on-disk bytes
@@ -1224,9 +1213,6 @@ protected:
     * encounter an unexpected error.  FIXME.
     */
  
-  // pg waiters
-  unsigned flushes_in_progress;
-
    // ops with newer maps than our (or blocked behind them)
    // track these by client, since inter-request ordering doesn't otherwise
    // matter.
@@ -1825,17 +1811,8 @@ protected:
  public:
    int pg_stat_adjust(osd_stat_t *new_stat);
  protected:
-
-  PeeringState recovery_state;
-
-  uint64_t peer_features;
-  uint64_t acting_features;
-  uint64_t upacting_features;
-
    epoch_t last_epoch;
  
-  /// most recently consumed osdmap's require_osd_version
-  unsigned last_require_osd_release = 0;
    bool delete_needs_sleep = false;
  
  protected:
diff --git a/src/osd/PeeringState.cc b/src/osd/PeeringState.cc

index aeaa89720607ad83bde9afaf4009b920d70919dc..640db3fa70c272769784a4c621aea9847fe1d663 100644 (file)
--- a/src/osd/PeeringState.cc
+++ b/src/osd/PeeringState.cc
@@ -19,7 +19,13 @@ PeeringState::PeeringState(
    PG *pg)
    : state_history(pg),
      machine(this, cct, spgid, dpp, pl, pg, &state_history), cct(cct),
-    spgid(spgid), dpp(dpp), pl(pl), pg(pg), orig_ctx(0) {
+    spgid(spgid),
+    dpp(dpp),
+    pl(pl),
+    pg(pg),
+    orig_ctx(0),
+    info(spgid),
+    pg_log(cct) {
    machine.initiate();
  }
  
diff --git a/src/osd/PeeringState.h b/src/osd/PeeringState.h

index 91666963b932d806876c35e757b1f940dc98c7ae..fe143a29e0cd69c3532916b67f644bc252c9bf51 100644 (file)
--- a/src/osd/PeeringState.h
+++ b/src/osd/PeeringState.h
@@ -12,6 +12,7 @@
  #include <boost/statechart/event_base.hpp>
  #include <string>
  
+#include "PGLog.h"
  #include "PGStateUtils.h"
  #include "PGPeeringEvent.h"
  #include "os/ObjectStore.h"
@@ -987,6 +988,93 @@ public:
     */
    boost::optional<PeeringCtx> rctx;
  
+public:
+  /**
+   * Peering state information
+   */
+  int role = -1;             ///< 0 = primary, 1 = replica, -1=none.
+  uint64_t state = 0;        ///< PG_STATE_*
+
+  pg_shard_t primary;        ///< id/shard of primary
+  pg_shard_t pg_whoami;      ///< my id/shard
+  pg_shard_t up_primary;     ///< id/shard of primary of up set
+  vector<int> up;            ///< crush mapping without temp pgs
+  set<pg_shard_t> upset;     ///< up in set form
+  vector<int> acting;        ///< actual acting set for the current interval
+  set<pg_shard_t> actingset; ///< acting in set form
+
+  /// union of acting, recovery, and backfill targets
+  set<pg_shard_t> acting_recovery_backfill;
+
+  bool send_notify = false; ///< True if a notify needs to be sent to the primary
+
+  bool dirty_info = false;          ///< small info structu on disk out of date
+  bool dirty_big_info = false;      ///< big info structure on disk out of date
+
+  pg_info_t info;                   ///< current pg info
+  pg_info_t last_written_info;      ///< last written info
+  PastIntervals past_intervals;     ///< information about prior pg mappings
+  PGLog  pg_log;                    ///< pg log
+
+  epoch_t last_peering_reset = 0;   ///< epoch of last peering reset
+
+  /// last_update that has committed; ONLY DEFINED WHEN is_active()
+  eversion_t  last_update_ondisk;
+  eversion_t  last_complete_ondisk; ///< last_complete that has committed.
+  eversion_t  last_update_applied;  ///< last_update readable
+  /// last version to which rollback_info trimming has been applied
+  eversion_t  last_rollback_info_trimmed_to_applied;
+
+  /// Counter to determine when pending flushes have completed
+  unsigned flushes_in_progress = 0;
+
+  /**
+   * Primary state
+   */
+  set<pg_shard_t>    stray_set; ///< non-acting osds that have PG data.
+  map<pg_shard_t, pg_info_t>    peer_info; ///< info from peers (stray or prior)
+  map<pg_shard_t, int64_t>    peer_bytes; ///< Peer's num_bytes from peer_info
+  set<pg_shard_t> peer_purged; ///< peers purged
+  map<pg_shard_t, pg_missing_t> peer_missing; ///< peer missing sets
+  set<pg_shard_t> peer_log_requested; ///< logs i've requested (and start stamps)
+  set<pg_shard_t> peer_missing_requested; ///< missing sets requested
+
+  /// features supported by all peers
+  uint64_t peer_features = CEPH_FEATURES_SUPPORTED_DEFAULT;
+  /// features supported by acting set
+  uint64_t acting_features = CEPH_FEATURES_SUPPORTED_DEFAULT;
+  /// features supported by up and acting
+  uint64_t upacting_features = CEPH_FEATURES_SUPPORTED_DEFAULT;
+
+  /// most recently consumed osdmap's require_osd_version
+  unsigned last_require_osd_release = 0;
+
+  vector<int> want_acting; ///< non-empty while peering needs a new acting set
+
+  // acting_recovery_backfill contains shards that are acting,
+  // async recovery targets, or backfill targets.
+  map<pg_shard_t,eversion_t> peer_last_complete_ondisk;
+
+  /// up: min over last_complete_ondisk, peer_last_complete_ondisk
+  eversion_t  min_last_complete_ondisk;
+  /// point to which the log should be trimmed
+  eversion_t  pg_trim_to;
+
+  set<int> blocked_by; ///< osds we are blocked by (for pg stats)
+
+
+  /// I deleted these strays; ignore racing PGInfo from them
+  set<pg_shard_t> peer_activated;
+
+  set<pg_shard_t> backfill_targets;       ///< osds to be backfilled
+  set<pg_shard_t> async_recovery_targets; ///< osds to be async recovered
+
+  /// osds which might have objects on them which are unfound on the primary
+  set<pg_shard_t> might_have_unfound;
+
+  bool deleting = false;  /// true while in removing or OSD is shutting down
+  atomic<bool> deleted = {false}; /// true once deletion complete
+
  public:
    PeeringState(
      CephContext *cct,
@@ -1016,4 +1104,9 @@ public:
    const char *get_current_state() const {
      return state_history.get_current_state();
    }
+
+  epoch_t get_last_peering_reset() const {
+    return last_peering_reset;
+  }
+
  };
author	sjust@redhat.com <sjust@redhat.com>
	Fri, 22 Mar 2019 23:19:18 +0000 (16:19 -0700)
committer	sjust@redhat.com <sjust@redhat.com>
	Wed, 1 May 2019 18:22:12 +0000 (11:22 -0700)
src/osd/PG.cc		patch \| blob \| history
src/osd/PG.h		patch \| blob \| history
src/osd/PeeringState.cc		patch \| blob \| history
src/osd/PeeringState.h		patch \| blob \| history