From e3fa38b9845a0de5c03c16b7b9f471054da069c7 Mon Sep 17 00:00:00 2001 From: "sjust@redhat.com" Date: Fri, 22 Mar 2019 16:19:18 -0700 Subject: [PATCH] osd/: move most peering state to PeeringState This patch moves the 40 something peering state variables over into PeeringState while leaving references to them in PG. The following patches will move over the users until all users are in PeeringState. Then, the PG references will be removed. A subsequent patch will also move the recovery_state member to be the last initialize and first destructed. Signed-off-by: sjust@redhat.com --- src/osd/PG.cc | 64 ++++++++++++++----- src/osd/PG.h | 133 +++++++++++++++++----------------------- src/osd/PeeringState.cc | 8 ++- src/osd/PeeringState.h | 93 ++++++++++++++++++++++++++++ 4 files changed, 202 insertions(+), 96 deletions(-) diff --git a/src/osd/PG.cc b/src/osd/PG.cc index e15809b3154..ed052c63db3 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -261,6 +261,52 @@ void PGPool::update(CephContext *cct, OSDMapRef map) PG::PG(OSDService *o, OSDMapRef curmap, const PGPool &_pool, spg_t p) : + recovery_state(cct, p, this, this, this), + role(recovery_state.role), + state(recovery_state.state), + primary(recovery_state.primary), + pg_whoami(recovery_state.pg_whoami), + up_primary(recovery_state.up_primary), + up(recovery_state.up), + upset(recovery_state.upset), + acting(recovery_state.acting), + actingset(recovery_state.actingset), + acting_recovery_backfill(recovery_state.acting_recovery_backfill), + send_notify(recovery_state.send_notify), + dirty_info(recovery_state.dirty_info), + dirty_big_info(recovery_state.dirty_big_info), + info(recovery_state.info), + last_written_info(recovery_state.last_written_info), + past_intervals(recovery_state.past_intervals), + pg_log(recovery_state.pg_log), + last_peering_reset(recovery_state.last_peering_reset), + last_update_ondisk(recovery_state.last_update_ondisk), + last_complete_ondisk(recovery_state.last_complete_ondisk), + last_update_applied(recovery_state.last_update_applied), + last_rollback_info_trimmed_to_applied(recovery_state.last_rollback_info_trimmed_to_applied), + flushes_in_progress(recovery_state.flushes_in_progress), + stray_set(recovery_state.stray_set), + peer_info(recovery_state.peer_info), + peer_bytes(recovery_state.peer_bytes), + peer_purged(recovery_state.peer_purged), + peer_missing(recovery_state.peer_missing), + peer_log_requested(recovery_state.peer_log_requested), + peer_missing_requested(recovery_state.peer_missing_requested), + peer_features(recovery_state.peer_features), + acting_features(recovery_state.acting_features), + upacting_features(recovery_state.upacting_features), + last_require_osd_release(recovery_state.last_require_osd_release), + want_acting(recovery_state.want_acting), + peer_last_complete_ondisk(recovery_state.peer_last_complete_ondisk), + min_last_complete_ondisk(recovery_state.min_last_complete_ondisk), + pg_trim_to(recovery_state.pg_trim_to), + blocked_by(recovery_state.blocked_by), + peer_activated(recovery_state.peer_activated), + backfill_targets(recovery_state.backfill_targets), + async_recovery_targets(recovery_state.async_recovery_targets), + might_have_unfound(recovery_state.might_have_unfound), + deleting(recovery_state.deleting), + deleted(recovery_state.deleted), pg_id(p), coll(p), osd(o), @@ -276,40 +322,24 @@ PG::PG(OSDService *o, OSDMapRef curmap, _pool.id, p.shard), last_persisted_osdmap(curmap->get_epoch()), - deleting(false), trace_endpoint("0.0.0.0", 0, "PG"), - dirty_info(false), dirty_big_info(false), - info(p), info_struct_v(0), - pg_log(cct), pgmeta_oid(p.make_pgmeta_oid()), missing_loc(this), stat_queue_item(this), scrub_queued(false), recovery_queued(false), recovery_ops_active(0), - role(-1), - state(0), - send_notify(false), - pg_whoami(osd->whoami, p.shard), need_up_thru(false), - last_peering_reset(0), heartbeat_peer_lock("PG::heartbeat_peer_lock"), backfill_reserved(false), backfill_reserving(false), - flushes_in_progress(0), pg_stats_publish_lock("PG::pg_stats_publish_lock"), pg_stats_publish_valid(false), finish_sync_event(NULL), backoff_lock("PG::backoff_lock"), scrub_after_recovery(false), - active_pushes(0), - recovery_state(cct, info.pgid, this, this, this), - peer_features(CEPH_FEATURES_SUPPORTED_DEFAULT), - acting_features(CEPH_FEATURES_SUPPORTED_DEFAULT), - upacting_features(CEPH_FEATURES_SUPPORTED_DEFAULT), - last_epoch(0), - last_require_osd_release(curmap->require_osd_release) + active_pushes(0) { #ifdef PG_DEBUG_REFS osd->add_pgid(p, this); diff --git a/src/osd/PG.h b/src/osd/PG.h index 2a1d7f4df3d..6d744c5d001 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -199,9 +199,63 @@ struct PGPool { class PG : public DoutPrefixProvider, public PeeringState::PeeringListener { friend class NamedState; friend class PeeringState; + + PeeringState recovery_state; public: using PeeringCtx = PeeringState::PeeringCtx; + +protected: + /** + * Peering state information being moved to PeeringState + */ + int &role; + uint64_t &state; + pg_shard_t &primary; + pg_shard_t &pg_whoami; + pg_shard_t &up_primary; + vector &up; + set &upset; + vector &acting; + set &actingset; + set &acting_recovery_backfill; + bool &send_notify; + bool &dirty_info; + bool &dirty_big_info; + pg_info_t &info; + pg_info_t &last_written_info; + PastIntervals &past_intervals; + PGLog &pg_log; + epoch_t &last_peering_reset; + eversion_t &last_update_ondisk; + eversion_t &last_complete_ondisk; + eversion_t &last_update_applied; + eversion_t &last_rollback_info_trimmed_to_applied; + unsigned &flushes_in_progress; + set &stray_set; + map &peer_info; + map &peer_bytes; + set &peer_purged; + map &peer_missing; + set &peer_log_requested; + set &peer_missing_requested; + uint64_t &peer_features; + uint64_t &acting_features; + uint64_t &upacting_features; + unsigned &last_require_osd_release; + vector &want_acting; + map &peer_last_complete_ondisk; + eversion_t &min_last_complete_ondisk; + eversion_t &pg_trim_to; + set &blocked_by; + set &peer_activated; + set &backfill_targets; + set &async_recovery_targets; + set &might_have_unfound; + bool &deleting; + atomic &deleted; + +public: // -- members -- const spg_t pg_id; const coll_t coll; @@ -547,20 +601,10 @@ protected: protected: - - bool deleting; // true while in removing or OSD is shutting down - atomic deleted = {false}; - ZTracer::Endpoint trace_endpoint; protected: - bool dirty_info, dirty_big_info; - -protected: - // pg state - pg_info_t info; ///< current pg info - pg_info_t last_written_info; ///< last written info __u8 info_struct_v = 0; static const __u8 latest_struct_v = 10; // v10 is the new past_intervals encoding @@ -572,7 +616,6 @@ protected: void upgrade(ObjectStore *store); protected: - PGLog pg_log; ghobject_t pgmeta_oid; // ------------------ @@ -601,7 +644,6 @@ protected: private: - loc_count_t _get_count(const set& shards) { loc_count_t r; for (auto s : shards) { @@ -893,8 +935,6 @@ protected: } } missing_loc; - PastIntervals past_intervals; - interval_set snap_trimq; /* You should not use these items without taking their respective queue locks @@ -909,60 +949,11 @@ protected: multiset recovering_oids; #endif -protected: - int role; // 0 = primary, 1 = replica, -1=none. - uint64_t state; // PG_STATE_* - - bool send_notify; ///< true if we are non-primary and should notify the primary - -protected: - eversion_t last_update_ondisk; // last_update that has committed; ONLY DEFINED WHEN is_active() - eversion_t last_complete_ondisk; // last_complete that has committed. - eversion_t last_update_applied; - - // entries <= last_rollback_info_trimmed_to_applied have been trimmed - eversion_t last_rollback_info_trimmed_to_applied; - - // primary state -protected: - pg_shard_t primary; - pg_shard_t pg_whoami; - pg_shard_t up_primary; - vector up, acting, want_acting; - // acting_recovery_backfill contains shards that are acting, - // async recovery targets, or backfill targets. - set acting_recovery_backfill, actingset, upset; - map peer_last_complete_ondisk; - eversion_t min_last_complete_ondisk; // up: min over last_complete_ondisk, peer_last_complete_ondisk - eversion_t pg_trim_to; - - set blocked_by; ///< osds we are blocked by (for pg stats) - public: bool dne() { return info.dne(); } protected: - /* - * peer_info -- projected (updates _before_ replicas ack) - * peer_missing -- committed (updates _after_ replicas ack) - */ - - bool need_up_thru; - set stray_set; // non-acting osds that have PG data. - map peer_info; // info from peers (stray or prior) - map peer_bytes; // Peer's num_bytes from peer_info - set peer_purged; // peers purged - map peer_missing; - set peer_log_requested; // logs i've requested (and start stamps) - set peer_missing_requested; - - // i deleted these strays; ignore racing PGInfo from them - set peer_activated; - - // primary-only, recovery-only state - set might_have_unfound; // These osds might have objects on them - // which are unfound on the primary - epoch_t last_peering_reset; + bool need_up_thru; ///< Flag indicating that this pg needs up through published epoch_t get_last_peering_reset() const { return last_peering_reset; @@ -1067,8 +1058,6 @@ protected: bool backfill_reserved; bool backfill_reserving; - set backfill_targets, async_recovery_targets; - // The primary's num_bytes and local num_bytes for this pg, only valid // during backfill for non-primary shards. // Both of these are adjusted for EC to reflect the on-disk bytes @@ -1224,9 +1213,6 @@ protected: * encounter an unexpected error. FIXME. */ - // pg waiters - unsigned flushes_in_progress; - // ops with newer maps than our (or blocked behind them) // track these by client, since inter-request ordering doesn't otherwise // matter. @@ -1825,17 +1811,8 @@ protected: public: int pg_stat_adjust(osd_stat_t *new_stat); protected: - - PeeringState recovery_state; - - uint64_t peer_features; - uint64_t acting_features; - uint64_t upacting_features; - epoch_t last_epoch; - /// most recently consumed osdmap's require_osd_version - unsigned last_require_osd_release = 0; bool delete_needs_sleep = false; protected: diff --git a/src/osd/PeeringState.cc b/src/osd/PeeringState.cc index aeaa8972060..640db3fa70c 100644 --- a/src/osd/PeeringState.cc +++ b/src/osd/PeeringState.cc @@ -19,7 +19,13 @@ PeeringState::PeeringState( PG *pg) : state_history(pg), machine(this, cct, spgid, dpp, pl, pg, &state_history), cct(cct), - spgid(spgid), dpp(dpp), pl(pl), pg(pg), orig_ctx(0) { + spgid(spgid), + dpp(dpp), + pl(pl), + pg(pg), + orig_ctx(0), + info(spgid), + pg_log(cct) { machine.initiate(); } diff --git a/src/osd/PeeringState.h b/src/osd/PeeringState.h index 91666963b93..fe143a29e0c 100644 --- a/src/osd/PeeringState.h +++ b/src/osd/PeeringState.h @@ -12,6 +12,7 @@ #include #include +#include "PGLog.h" #include "PGStateUtils.h" #include "PGPeeringEvent.h" #include "os/ObjectStore.h" @@ -987,6 +988,93 @@ public: */ boost::optional rctx; +public: + /** + * Peering state information + */ + int role = -1; ///< 0 = primary, 1 = replica, -1=none. + uint64_t state = 0; ///< PG_STATE_* + + pg_shard_t primary; ///< id/shard of primary + pg_shard_t pg_whoami; ///< my id/shard + pg_shard_t up_primary; ///< id/shard of primary of up set + vector up; ///< crush mapping without temp pgs + set upset; ///< up in set form + vector acting; ///< actual acting set for the current interval + set actingset; ///< acting in set form + + /// union of acting, recovery, and backfill targets + set acting_recovery_backfill; + + bool send_notify = false; ///< True if a notify needs to be sent to the primary + + bool dirty_info = false; ///< small info structu on disk out of date + bool dirty_big_info = false; ///< big info structure on disk out of date + + pg_info_t info; ///< current pg info + pg_info_t last_written_info; ///< last written info + PastIntervals past_intervals; ///< information about prior pg mappings + PGLog pg_log; ///< pg log + + epoch_t last_peering_reset = 0; ///< epoch of last peering reset + + /// last_update that has committed; ONLY DEFINED WHEN is_active() + eversion_t last_update_ondisk; + eversion_t last_complete_ondisk; ///< last_complete that has committed. + eversion_t last_update_applied; ///< last_update readable + /// last version to which rollback_info trimming has been applied + eversion_t last_rollback_info_trimmed_to_applied; + + /// Counter to determine when pending flushes have completed + unsigned flushes_in_progress = 0; + + /** + * Primary state + */ + set stray_set; ///< non-acting osds that have PG data. + map peer_info; ///< info from peers (stray or prior) + map peer_bytes; ///< Peer's num_bytes from peer_info + set peer_purged; ///< peers purged + map peer_missing; ///< peer missing sets + set peer_log_requested; ///< logs i've requested (and start stamps) + set peer_missing_requested; ///< missing sets requested + + /// features supported by all peers + uint64_t peer_features = CEPH_FEATURES_SUPPORTED_DEFAULT; + /// features supported by acting set + uint64_t acting_features = CEPH_FEATURES_SUPPORTED_DEFAULT; + /// features supported by up and acting + uint64_t upacting_features = CEPH_FEATURES_SUPPORTED_DEFAULT; + + /// most recently consumed osdmap's require_osd_version + unsigned last_require_osd_release = 0; + + vector want_acting; ///< non-empty while peering needs a new acting set + + // acting_recovery_backfill contains shards that are acting, + // async recovery targets, or backfill targets. + map peer_last_complete_ondisk; + + /// up: min over last_complete_ondisk, peer_last_complete_ondisk + eversion_t min_last_complete_ondisk; + /// point to which the log should be trimmed + eversion_t pg_trim_to; + + set blocked_by; ///< osds we are blocked by (for pg stats) + + + /// I deleted these strays; ignore racing PGInfo from them + set peer_activated; + + set backfill_targets; ///< osds to be backfilled + set async_recovery_targets; ///< osds to be async recovered + + /// osds which might have objects on them which are unfound on the primary + set might_have_unfound; + + bool deleting = false; /// true while in removing or OSD is shutting down + atomic deleted = {false}; /// true once deletion complete + public: PeeringState( CephContext *cct, @@ -1016,4 +1104,9 @@ public: const char *get_current_state() const { return state_history.get_current_state(); } + + epoch_t get_last_peering_reset() const { + return last_peering_reset; + } + }; -- 2.39.5