PG::PG(OSDService *o, OSDMapRef curmap,
const PGPool &_pool, spg_t p) :
+ recovery_state(cct, p, this, this, this),
+ role(recovery_state.role),
+ state(recovery_state.state),
+ primary(recovery_state.primary),
+ pg_whoami(recovery_state.pg_whoami),
+ up_primary(recovery_state.up_primary),
+ up(recovery_state.up),
+ upset(recovery_state.upset),
+ acting(recovery_state.acting),
+ actingset(recovery_state.actingset),
+ acting_recovery_backfill(recovery_state.acting_recovery_backfill),
+ send_notify(recovery_state.send_notify),
+ dirty_info(recovery_state.dirty_info),
+ dirty_big_info(recovery_state.dirty_big_info),
+ info(recovery_state.info),
+ last_written_info(recovery_state.last_written_info),
+ past_intervals(recovery_state.past_intervals),
+ pg_log(recovery_state.pg_log),
+ last_peering_reset(recovery_state.last_peering_reset),
+ last_update_ondisk(recovery_state.last_update_ondisk),
+ last_complete_ondisk(recovery_state.last_complete_ondisk),
+ last_update_applied(recovery_state.last_update_applied),
+ last_rollback_info_trimmed_to_applied(recovery_state.last_rollback_info_trimmed_to_applied),
+ flushes_in_progress(recovery_state.flushes_in_progress),
+ stray_set(recovery_state.stray_set),
+ peer_info(recovery_state.peer_info),
+ peer_bytes(recovery_state.peer_bytes),
+ peer_purged(recovery_state.peer_purged),
+ peer_missing(recovery_state.peer_missing),
+ peer_log_requested(recovery_state.peer_log_requested),
+ peer_missing_requested(recovery_state.peer_missing_requested),
+ peer_features(recovery_state.peer_features),
+ acting_features(recovery_state.acting_features),
+ upacting_features(recovery_state.upacting_features),
+ last_require_osd_release(recovery_state.last_require_osd_release),
+ want_acting(recovery_state.want_acting),
+ peer_last_complete_ondisk(recovery_state.peer_last_complete_ondisk),
+ min_last_complete_ondisk(recovery_state.min_last_complete_ondisk),
+ pg_trim_to(recovery_state.pg_trim_to),
+ blocked_by(recovery_state.blocked_by),
+ peer_activated(recovery_state.peer_activated),
+ backfill_targets(recovery_state.backfill_targets),
+ async_recovery_targets(recovery_state.async_recovery_targets),
+ might_have_unfound(recovery_state.might_have_unfound),
+ deleting(recovery_state.deleting),
+ deleted(recovery_state.deleted),
pg_id(p),
coll(p),
osd(o),
_pool.id,
p.shard),
last_persisted_osdmap(curmap->get_epoch()),
- deleting(false),
trace_endpoint("0.0.0.0", 0, "PG"),
- dirty_info(false), dirty_big_info(false),
- info(p),
info_struct_v(0),
- pg_log(cct),
pgmeta_oid(p.make_pgmeta_oid()),
missing_loc(this),
stat_queue_item(this),
scrub_queued(false),
recovery_queued(false),
recovery_ops_active(0),
- role(-1),
- state(0),
- send_notify(false),
- pg_whoami(osd->whoami, p.shard),
need_up_thru(false),
- last_peering_reset(0),
heartbeat_peer_lock("PG::heartbeat_peer_lock"),
backfill_reserved(false),
backfill_reserving(false),
- flushes_in_progress(0),
pg_stats_publish_lock("PG::pg_stats_publish_lock"),
pg_stats_publish_valid(false),
finish_sync_event(NULL),
backoff_lock("PG::backoff_lock"),
scrub_after_recovery(false),
- active_pushes(0),
- recovery_state(cct, info.pgid, this, this, this),
- peer_features(CEPH_FEATURES_SUPPORTED_DEFAULT),
- acting_features(CEPH_FEATURES_SUPPORTED_DEFAULT),
- upacting_features(CEPH_FEATURES_SUPPORTED_DEFAULT),
- last_epoch(0),
- last_require_osd_release(curmap->require_osd_release)
+ active_pushes(0)
{
#ifdef PG_DEBUG_REFS
osd->add_pgid(p, this);
class PG : public DoutPrefixProvider, public PeeringState::PeeringListener {
friend class NamedState;
friend class PeeringState;
+
+ PeeringState recovery_state;
public:
using PeeringCtx = PeeringState::PeeringCtx;
+
+protected:
+ /**
+ * Peering state information being moved to PeeringState
+ */
+ int &role;
+ uint64_t &state;
+ pg_shard_t &primary;
+ pg_shard_t &pg_whoami;
+ pg_shard_t &up_primary;
+ vector<int> &up;
+ set<pg_shard_t> &upset;
+ vector<int> &acting;
+ set<pg_shard_t> &actingset;
+ set<pg_shard_t> &acting_recovery_backfill;
+ bool &send_notify;
+ bool &dirty_info;
+ bool &dirty_big_info;
+ pg_info_t &info;
+ pg_info_t &last_written_info;
+ PastIntervals &past_intervals;
+ PGLog &pg_log;
+ epoch_t &last_peering_reset;
+ eversion_t &last_update_ondisk;
+ eversion_t &last_complete_ondisk;
+ eversion_t &last_update_applied;
+ eversion_t &last_rollback_info_trimmed_to_applied;
+ unsigned &flushes_in_progress;
+ set<pg_shard_t> &stray_set;
+ map<pg_shard_t, pg_info_t> &peer_info;
+ map<pg_shard_t, int64_t> &peer_bytes;
+ set<pg_shard_t> &peer_purged;
+ map<pg_shard_t, pg_missing_t> &peer_missing;
+ set<pg_shard_t> &peer_log_requested;
+ set<pg_shard_t> &peer_missing_requested;
+ uint64_t &peer_features;
+ uint64_t &acting_features;
+ uint64_t &upacting_features;
+ unsigned &last_require_osd_release;
+ vector<int> &want_acting;
+ map<pg_shard_t,eversion_t> &peer_last_complete_ondisk;
+ eversion_t &min_last_complete_ondisk;
+ eversion_t &pg_trim_to;
+ set<int> &blocked_by;
+ set<pg_shard_t> &peer_activated;
+ set<pg_shard_t> &backfill_targets;
+ set<pg_shard_t> &async_recovery_targets;
+ set<pg_shard_t> &might_have_unfound;
+ bool &deleting;
+ atomic<bool> &deleted;
+
+public:
// -- members --
const spg_t pg_id;
const coll_t coll;
protected:
-
- bool deleting; // true while in removing or OSD is shutting down
- atomic<bool> deleted = {false};
-
ZTracer::Endpoint trace_endpoint;
protected:
- bool dirty_info, dirty_big_info;
-
-protected:
- // pg state
- pg_info_t info; ///< current pg info
- pg_info_t last_written_info; ///< last written info
__u8 info_struct_v = 0;
static const __u8 latest_struct_v = 10;
// v10 is the new past_intervals encoding
void upgrade(ObjectStore *store);
protected:
- PGLog pg_log;
ghobject_t pgmeta_oid;
// ------------------
private:
-
loc_count_t _get_count(const set<pg_shard_t>& shards) {
loc_count_t r;
for (auto s : shards) {
}
} missing_loc;
- PastIntervals past_intervals;
-
interval_set<snapid_t> snap_trimq;
/* You should not use these items without taking their respective queue locks
multiset<hobject_t> recovering_oids;
#endif
-protected:
- int role; // 0 = primary, 1 = replica, -1=none.
- uint64_t state; // PG_STATE_*
-
- bool send_notify; ///< true if we are non-primary and should notify the primary
-
-protected:
- eversion_t last_update_ondisk; // last_update that has committed; ONLY DEFINED WHEN is_active()
- eversion_t last_complete_ondisk; // last_complete that has committed.
- eversion_t last_update_applied;
-
- // entries <= last_rollback_info_trimmed_to_applied have been trimmed
- eversion_t last_rollback_info_trimmed_to_applied;
-
- // primary state
-protected:
- pg_shard_t primary;
- pg_shard_t pg_whoami;
- pg_shard_t up_primary;
- vector<int> up, acting, want_acting;
- // acting_recovery_backfill contains shards that are acting,
- // async recovery targets, or backfill targets.
- set<pg_shard_t> acting_recovery_backfill, actingset, upset;
- map<pg_shard_t,eversion_t> peer_last_complete_ondisk;
- eversion_t min_last_complete_ondisk; // up: min over last_complete_ondisk, peer_last_complete_ondisk
- eversion_t pg_trim_to;
-
- set<int> blocked_by; ///< osds we are blocked by (for pg stats)
-
public:
bool dne() { return info.dne(); }
protected:
- /*
- * peer_info -- projected (updates _before_ replicas ack)
- * peer_missing -- committed (updates _after_ replicas ack)
- */
-
- bool need_up_thru;
- set<pg_shard_t> stray_set; // non-acting osds that have PG data.
- map<pg_shard_t, pg_info_t> peer_info; // info from peers (stray or prior)
- map<pg_shard_t, int64_t> peer_bytes; // Peer's num_bytes from peer_info
- set<pg_shard_t> peer_purged; // peers purged
- map<pg_shard_t, pg_missing_t> peer_missing;
- set<pg_shard_t> peer_log_requested; // logs i've requested (and start stamps)
- set<pg_shard_t> peer_missing_requested;
-
- // i deleted these strays; ignore racing PGInfo from them
- set<pg_shard_t> peer_activated;
-
- // primary-only, recovery-only state
- set<pg_shard_t> might_have_unfound; // These osds might have objects on them
- // which are unfound on the primary
- epoch_t last_peering_reset;
+ bool need_up_thru; ///< Flag indicating that this pg needs up through published
epoch_t get_last_peering_reset() const {
return last_peering_reset;
bool backfill_reserved;
bool backfill_reserving;
- set<pg_shard_t> backfill_targets, async_recovery_targets;
-
// The primary's num_bytes and local num_bytes for this pg, only valid
// during backfill for non-primary shards.
// Both of these are adjusted for EC to reflect the on-disk bytes
* encounter an unexpected error. FIXME.
*/
- // pg waiters
- unsigned flushes_in_progress;
-
// ops with newer maps than our (or blocked behind them)
// track these by client, since inter-request ordering doesn't otherwise
// matter.
public:
int pg_stat_adjust(osd_stat_t *new_stat);
protected:
-
- PeeringState recovery_state;
-
- uint64_t peer_features;
- uint64_t acting_features;
- uint64_t upacting_features;
-
epoch_t last_epoch;
- /// most recently consumed osdmap's require_osd_version
- unsigned last_require_osd_release = 0;
bool delete_needs_sleep = false;
protected:
PG *pg)
: state_history(pg),
machine(this, cct, spgid, dpp, pl, pg, &state_history), cct(cct),
- spgid(spgid), dpp(dpp), pl(pl), pg(pg), orig_ctx(0) {
+ spgid(spgid),
+ dpp(dpp),
+ pl(pl),
+ pg(pg),
+ orig_ctx(0),
+ info(spgid),
+ pg_log(cct) {
machine.initiate();
}
#include <boost/statechart/event_base.hpp>
#include <string>
+#include "PGLog.h"
#include "PGStateUtils.h"
#include "PGPeeringEvent.h"
#include "os/ObjectStore.h"
*/
boost::optional<PeeringCtx> rctx;
+public:
+ /**
+ * Peering state information
+ */
+ int role = -1; ///< 0 = primary, 1 = replica, -1=none.
+ uint64_t state = 0; ///< PG_STATE_*
+
+ pg_shard_t primary; ///< id/shard of primary
+ pg_shard_t pg_whoami; ///< my id/shard
+ pg_shard_t up_primary; ///< id/shard of primary of up set
+ vector<int> up; ///< crush mapping without temp pgs
+ set<pg_shard_t> upset; ///< up in set form
+ vector<int> acting; ///< actual acting set for the current interval
+ set<pg_shard_t> actingset; ///< acting in set form
+
+ /// union of acting, recovery, and backfill targets
+ set<pg_shard_t> acting_recovery_backfill;
+
+ bool send_notify = false; ///< True if a notify needs to be sent to the primary
+
+ bool dirty_info = false; ///< small info structu on disk out of date
+ bool dirty_big_info = false; ///< big info structure on disk out of date
+
+ pg_info_t info; ///< current pg info
+ pg_info_t last_written_info; ///< last written info
+ PastIntervals past_intervals; ///< information about prior pg mappings
+ PGLog pg_log; ///< pg log
+
+ epoch_t last_peering_reset = 0; ///< epoch of last peering reset
+
+ /// last_update that has committed; ONLY DEFINED WHEN is_active()
+ eversion_t last_update_ondisk;
+ eversion_t last_complete_ondisk; ///< last_complete that has committed.
+ eversion_t last_update_applied; ///< last_update readable
+ /// last version to which rollback_info trimming has been applied
+ eversion_t last_rollback_info_trimmed_to_applied;
+
+ /// Counter to determine when pending flushes have completed
+ unsigned flushes_in_progress = 0;
+
+ /**
+ * Primary state
+ */
+ set<pg_shard_t> stray_set; ///< non-acting osds that have PG data.
+ map<pg_shard_t, pg_info_t> peer_info; ///< info from peers (stray or prior)
+ map<pg_shard_t, int64_t> peer_bytes; ///< Peer's num_bytes from peer_info
+ set<pg_shard_t> peer_purged; ///< peers purged
+ map<pg_shard_t, pg_missing_t> peer_missing; ///< peer missing sets
+ set<pg_shard_t> peer_log_requested; ///< logs i've requested (and start stamps)
+ set<pg_shard_t> peer_missing_requested; ///< missing sets requested
+
+ /// features supported by all peers
+ uint64_t peer_features = CEPH_FEATURES_SUPPORTED_DEFAULT;
+ /// features supported by acting set
+ uint64_t acting_features = CEPH_FEATURES_SUPPORTED_DEFAULT;
+ /// features supported by up and acting
+ uint64_t upacting_features = CEPH_FEATURES_SUPPORTED_DEFAULT;
+
+ /// most recently consumed osdmap's require_osd_version
+ unsigned last_require_osd_release = 0;
+
+ vector<int> want_acting; ///< non-empty while peering needs a new acting set
+
+ // acting_recovery_backfill contains shards that are acting,
+ // async recovery targets, or backfill targets.
+ map<pg_shard_t,eversion_t> peer_last_complete_ondisk;
+
+ /// up: min over last_complete_ondisk, peer_last_complete_ondisk
+ eversion_t min_last_complete_ondisk;
+ /// point to which the log should be trimmed
+ eversion_t pg_trim_to;
+
+ set<int> blocked_by; ///< osds we are blocked by (for pg stats)
+
+
+ /// I deleted these strays; ignore racing PGInfo from them
+ set<pg_shard_t> peer_activated;
+
+ set<pg_shard_t> backfill_targets; ///< osds to be backfilled
+ set<pg_shard_t> async_recovery_targets; ///< osds to be async recovered
+
+ /// osds which might have objects on them which are unfound on the primary
+ set<pg_shard_t> might_have_unfound;
+
+ bool deleting = false; /// true while in removing or OSD is shutting down
+ atomic<bool> deleted = {false}; /// true once deletion complete
+
public:
PeeringState(
CephContext *cct,
const char *get_current_state() const {
return state_history.get_current_state();
}
+
+ epoch_t get_last_peering_reset() const {
+ return last_peering_reset;
+ }
+
};