From: Samuel Just Date: Thu, 28 Mar 2019 22:47:05 +0000 (-0700) Subject: osd/: move build_prior and related state to PeeringState X-Git-Tag: v15.1.0~2774^2~60 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=c66e46fe6b3c8f272b2e3440530155ef6ad86207;p=ceph-ci.git osd/: move build_prior and related state to PeeringState Signed-off-by: Samuel Just --- diff --git a/src/osd/PG.cc b/src/osd/PG.cc index c41420b35cd..87e2f364988 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -224,6 +224,7 @@ PG::PG(OSDService *o, OSDMapRef curmap, min_last_complete_ondisk(recovery_state.min_last_complete_ondisk), pg_trim_to(recovery_state.pg_trim_to), blocked_by(recovery_state.blocked_by), + need_up_thru(recovery_state.need_up_thru), peer_activated(recovery_state.peer_activated), backfill_targets(recovery_state.backfill_targets), async_recovery_targets(recovery_state.async_recovery_targets), @@ -251,7 +252,6 @@ PG::PG(OSDService *o, OSDMapRef curmap, scrub_queued(false), recovery_queued(false), recovery_ops_active(0), - need_up_thru(false), heartbeat_peer_lock("PG::heartbeat_peer_lock"), backfill_reserving(false), pg_stats_publish_lock("PG::pg_stats_publish_lock"), @@ -575,18 +575,6 @@ bool PG::needs_backfill() const return false; } -bool PG::adjust_need_up_thru(const OSDMapRef osdmap) -{ - epoch_t up_thru = osdmap->get_up_thru(osd->whoami); - if (need_up_thru && - up_thru >= info.history.same_interval_since) { - dout(10) << "adjust_need_up_thru now " << up_thru << ", need_up_thru now false" << dendl; - need_up_thru = false; - return true; - } - return false; -} - /* * Returns true unless there is a non-lost OSD in might_have_unfound. */ @@ -617,72 +605,11 @@ bool PG::all_unfound_are_queried_or_lost(const OSDMapRef osdmap) const return true; } -PastIntervals::PriorSet PG::build_prior() -{ - if (1) { - // sanity check - for (map::iterator it = peer_info.begin(); - it != peer_info.end(); - ++it) { - ceph_assert(info.history.last_epoch_started >= it->second.history.last_epoch_started); - } - } - - const OSDMap &osdmap = *get_osdmap(); - PastIntervals::PriorSet prior = past_intervals.get_prior_set( - pool.info.is_erasure(), - info.history.last_epoch_started, - get_pgbackend()->get_is_recoverable_predicate(), - [&](epoch_t start, int osd, epoch_t *lost_at) { - const osd_info_t *pinfo = 0; - if (osdmap.exists(osd)) { - pinfo = &osdmap.get_info(osd); - if (lost_at) - *lost_at = pinfo->lost_at; - } - - if (osdmap.is_up(osd)) { - return PastIntervals::UP; - } else if (!pinfo) { - return PastIntervals::DNE; - } else if (pinfo->lost_at > start) { - return PastIntervals::LOST; - } else { - return PastIntervals::DOWN; - } - }, - up, - acting, - this); - - if (prior.pg_down) { - state_set(PG_STATE_DOWN); - } - - if (get_osdmap()->get_up_thru(osd->whoami) < info.history.same_interval_since) { - dout(10) << "up_thru " << get_osdmap()->get_up_thru(osd->whoami) - << " < same_since " << info.history.same_interval_since - << ", must notify monitor" << dendl; - need_up_thru = true; - } else { - dout(10) << "up_thru " << get_osdmap()->get_up_thru(osd->whoami) - << " >= same_since " << info.history.same_interval_since - << ", all is well" << dendl; - need_up_thru = false; - } - set_probe_targets(prior.probe); - return prior; -} - void PG::clear_primary_state() { - need_up_thru = false; - projected_log = PGLog::IndexedLog(); last_update_ondisk = eversion_t(); - missing_loc.clear(); - - pg_log.reset_recovery_pointers(); + projected_log = PGLog::IndexedLog(); snap_trimq.clear(); finish_sync_event = 0; // so that _finish_recovery doesn't go off in another thread diff --git a/src/osd/PG.h b/src/osd/PG.h index e29903cad31..1cf0b0d5b02 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -220,6 +220,7 @@ protected: eversion_t &min_last_complete_ondisk; eversion_t &pg_trim_to; set &blocked_by; + bool &need_up_thru; set &peer_activated; set &backfill_targets; set &async_recovery_targets; @@ -285,7 +286,7 @@ public: return info.history; } bool get_need_up_thru() const { - return need_up_thru; + return recovery_state.get_need_up_thru(); } epoch_t get_same_interval_since() const { return info.history.same_interval_since; @@ -652,15 +653,13 @@ public: virtual void send_cluster_message(int osd, Message *m, epoch_t epoch); protected: - bool need_up_thru; ///< Flag indicating that this pg needs up through published - epoch_t get_last_peering_reset() const { return last_peering_reset; } /* heartbeat peers */ - void set_probe_targets(const set &probe_set); - void clear_probe_targets(); + void set_probe_targets(const set &probe_set) override; + void clear_probe_targets() override; Mutex heartbeat_peer_lock; set heartbeat_peers; @@ -962,10 +961,6 @@ protected: void try_mark_clean(); ///< mark an active pg clean - PastIntervals::PriorSet build_prior(); - - bool adjust_need_up_thru(const OSDMapRef osdmap); - bool all_unfound_are_queried_or_lost(const OSDMapRef osdmap) const; virtual void dump_recovery_info(Formatter *f) const = 0; diff --git a/src/osd/PeeringState.cc b/src/osd/PeeringState.cc index 8c67af3c97e..bc0b59135b7 100644 --- a/src/osd/PeeringState.cc +++ b/src/osd/PeeringState.cc @@ -729,6 +729,9 @@ void PeeringState::clear_primary_state() min_last_complete_ondisk = eversion_t(); pg_trim_to = eversion_t(); might_have_unfound.clear(); + need_up_thru = false; + missing_loc.clear(); + pg_log.reset_recovery_pointers(); pl->clear_primary_state(); } @@ -950,6 +953,79 @@ bool PeeringState::set_force_backfill(bool b) return did; } +bool PeeringState::adjust_need_up_thru(const OSDMapRef osdmap) +{ + epoch_t up_thru = osdmap->get_up_thru(pg_whoami.osd); + if (need_up_thru && + up_thru >= info.history.same_interval_since) { + psdout(10) << "adjust_need_up_thru now " + << up_thru << ", need_up_thru now false" << dendl; + need_up_thru = false; + return true; + } + return false; +} + +PastIntervals::PriorSet PeeringState::build_prior() +{ + if (1) { + // sanity check + for (map::iterator it = peer_info.begin(); + it != peer_info.end(); + ++it) { + ceph_assert(info.history.last_epoch_started >= + it->second.history.last_epoch_started); + } + } + + const OSDMap &osdmap = *get_osdmap(); + PastIntervals::PriorSet prior = past_intervals.get_prior_set( + pool.info.is_erasure(), + info.history.last_epoch_started, + &missing_loc.get_recoverable_predicate(), + [&](epoch_t start, int osd, epoch_t *lost_at) { + const osd_info_t *pinfo = 0; + if (osdmap.exists(osd)) { + pinfo = &osdmap.get_info(osd); + if (lost_at) + *lost_at = pinfo->lost_at; + } + + if (osdmap.is_up(osd)) { + return PastIntervals::UP; + } else if (!pinfo) { + return PastIntervals::DNE; + } else if (pinfo->lost_at > start) { + return PastIntervals::LOST; + } else { + return PastIntervals::DOWN; + } + }, + up, + acting, + dpp); + + if (prior.pg_down) { + state_set(PG_STATE_DOWN); + } + + if (get_osdmap()->get_up_thru(pg_whoami.osd) < + info.history.same_interval_since) { + psdout(10) << "up_thru " << get_osdmap()->get_up_thru(pg_whoami.osd) + << " < same_since " << info.history.same_interval_since + << ", must notify monitor" << dendl; + need_up_thru = true; + } else { + psdout(10) << "up_thru " << get_osdmap()->get_up_thru(pg_whoami.osd) + << " >= same_since " << info.history.same_interval_since + << ", all is well" << dendl; + need_up_thru = false; + } + pl->set_probe_targets(prior.probe); + return prior; +} + + /*------------ Peering State Machine----------------*/ #undef dout_prefix #define dout_prefix (context< PeeringMachine >().dpp->gen_prefix(*_dout) \ @@ -1279,25 +1355,25 @@ PeeringState::Peering::Peering(my_context ctx) history_les_bound(false) { context< PeeringMachine >().log_enter(state_name); + PeeringState *ps = context< PeeringMachine >().state; - PG *pg = context< PeeringMachine >().pg; - ceph_assert(!pg->is_peered()); - ceph_assert(!pg->is_peering()); - ceph_assert(pg->is_primary()); - pg->state_set(PG_STATE_PEERING); + ceph_assert(!ps->is_peered()); + ceph_assert(!ps->is_peering()); + ceph_assert(ps->is_primary()); + ps->state_set(PG_STATE_PEERING); } boost::statechart::result PeeringState::Peering::react(const AdvMap& advmap) { - PG *pg = context< PeeringMachine >().pg; + PeeringState *ps = context< PeeringMachine >().state; psdout(10) << "Peering advmap" << dendl; - if (prior_set.affected_by_map(*(advmap.osdmap), pg)) { + if (prior_set.affected_by_map(*(advmap.osdmap), ps->dpp)) { psdout(1) << "Peering, affected_by_map, going to Reset" << dendl; post_event(advmap); return transit< Reset >(); } - pg->adjust_need_up_thru(advmap.osdmap); + ps->adjust_need_up_thru(advmap.osdmap); return forward_event(); } @@ -3041,9 +3117,9 @@ PeeringState::GetInfo::GetInfo(my_context ctx) ps->check_past_interval_bounds(); PastIntervals::PriorSet &prior_set = context< Peering >().prior_set; - ceph_assert(pg->blocked_by.empty()); + ceph_assert(ps->blocked_by.empty()); - prior_set = pg->build_prior(); + prior_set = ps->build_prior(); pg->reset_min_peer_features(); get_infos(); @@ -3109,7 +3185,7 @@ boost::statechart::result PeeringState::GetInfo::react(const MNotifyRec& infoevt PastIntervals::PriorSet &prior_set = context< Peering >().prior_set; if (old_start < pg->info.history.last_epoch_started) { psdout(10) << " last_epoch_started moved forward, rebuilding prior" << dendl; - prior_set = pg->build_prior(); + prior_set = ps->build_prior(); // filter out any osds that got dropped from the probe set from // peer_info_requested. this is less expensive than restarting diff --git a/src/osd/PeeringState.h b/src/osd/PeeringState.h index 90ea77500e5..318ebe51481 100644 --- a/src/osd/PeeringState.h +++ b/src/osd/PeeringState.h @@ -95,6 +95,9 @@ public: PGPeeringEventRef on_preempt) = 0; virtual void cancel_remote_recovery_reservation() = 0; + // HB + virtual void set_probe_targets(const set &probe_set) = 0; + virtual void clear_probe_targets() = 0; virtual PerfCounters &get_peering_perf() = 0; @@ -1165,6 +1168,7 @@ public: set blocked_by; ///< osds we are blocked by (for pg stats) + bool need_up_thru = false; ///< true if osdmap with updated up_thru needed /// I deleted these strays; ignore racing PGInfo from them set peer_activated; @@ -1230,6 +1234,9 @@ public: /// get priority for pg deletion unsigned get_delete_priority(); + bool adjust_need_up_thru(const OSDMapRef osdmap); + PastIntervals::PriorSet build_prior(); + public: PeeringState( CephContext *cct, @@ -1403,6 +1410,10 @@ public: bool is_repair() const { return state_test(PG_STATE_REPAIR); } bool is_empty() const { return info.last_update == eversion_t(0,0); } + bool get_need_up_thru() const { + return need_up_thru; + } + bool is_forced_recovery_or_backfill() const { return get_state() & (PG_STATE_FORCED_RECOVERY | PG_STATE_FORCED_BACKFILL); }