From 40f71cda0ed4fe78dbcbc1ba73f0cc973bf5c415 Mon Sep 17 00:00:00 2001 From: "sjust@redhat.com" Date: Wed, 27 Mar 2019 14:24:01 -0700 Subject: [PATCH] osd/: move start_peering_interval and callees into PeeringState Signed-off-by: sjust@redhat.com --- src/mon/OSDMonitor.cc | 2 +- src/osd/MissingLoc.h | 3 + src/osd/OSD.cc | 2 +- src/osd/PG.cc | 300 ++++++---------------------------------- src/osd/PG.h | 82 +++-------- src/osd/PeeringState.cc | 296 ++++++++++++++++++++++++++++++++++++++- src/osd/PeeringState.h | 37 ++++- src/osd/PrimaryLogPG.cc | 7 +- src/osd/PrimaryLogPG.h | 4 +- src/osd/osd_types.cc | 4 +- src/osd/osd_types.h | 4 +- 11 files changed, 396 insertions(+), 345 deletions(-) diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 1758d28bbe6..b0b59296c1f 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -914,7 +914,7 @@ OSDMonitor::update_pending_pgs(const OSDMap::Incremental& inc, &nextmap, &osdmap, pgid, - &min_size_predicate, + min_size_predicate, &i.second.past_intervals, &debug)) { epoch_t e = inc.epoch; diff --git a/src/osd/MissingLoc.h b/src/osd/MissingLoc.h index 19aef8175cb..87c0d3c4cea 100644 --- a/src/osd/MissingLoc.h +++ b/src/osd/MissingLoc.h @@ -120,6 +120,9 @@ class MissingLoc { is_readable.reset(_is_readable); is_recoverable.reset(_is_recoverable); } + IsPGRecoverablePredicate &get_recoverable_predicate() { + return *is_recoverable; + } std::ostream& gen_prefix(std::ostream& out) const { return dpp->gen_prefix(out); } diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 24fc51b7306..14e3fea5767 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -4742,7 +4742,7 @@ void OSD::build_initial_pg_history( osdmap.get(), lastmap.get(), pgid.pgid, - &min_size_predicate, + min_size_predicate, pi, &debug); if (new_interval) { diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 5af3b642b74..45b63bc7935 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -725,35 +725,18 @@ PastIntervals::PriorSet PG::build_prior() void PG::clear_primary_state() { - dout(10) << "clear_primary_state" << dendl; - - // clear peering state - stray_set.clear(); - peer_log_requested.clear(); - peer_missing_requested.clear(); - peer_info.clear(); - peer_bytes.clear(); - peer_missing.clear(); need_up_thru = false; - peer_last_complete_ondisk.clear(); - peer_activated.clear(); - min_last_complete_ondisk = eversion_t(); - pg_trim_to = eversion_t(); - might_have_unfound.clear(); projected_log = PGLog::IndexedLog(); - last_update_ondisk = eversion_t(); - snap_trimq.clear(); - - finish_sync_event = 0; // so that _finish_recovery doesn't go off in another thread - missing_loc.clear(); - release_pg_backoffs(); - pg_log.reset_recovery_pointers(); + snap_trimq.clear(); + finish_sync_event = 0; // so that _finish_recovery doesn't go off in another thread + release_pg_backoffs(); + scrubber.reserved_peers.clear(); scrub_after_recovery = false; @@ -2357,7 +2340,7 @@ void PG::split_into(pg_t child_pgid, PG *child, unsigned split_bits) vector newup, newacting; get_osdmap()->pg_to_up_acting_osds( child->info.pgid.pgid, &newup, &up_primary, &newacting, &primary); - child->init_primary_up_acting( + child->recovery_state.init_primary_up_acting( newup, newacting, up_primary, @@ -2382,7 +2365,7 @@ void PG::split_into(pg_t child_pgid, PG *child, unsigned split_bits) // release all backoffs for simplicity release_backoffs(hobject_t(), hobject_t::get_max()); - child->on_new_interval(); + child->recovery_state.on_new_interval(); child->send_notify = !child->is_primary(); @@ -3228,8 +3211,8 @@ void PG::init( << " past_intervals " << pi << dendl; - set_role(role); - init_primary_up_acting( + recovery_state.set_role(role); + recovery_state.init_primary_up_acting( newup, newacting, new_up_primary, @@ -3251,7 +3234,7 @@ void PG::init( pg_log.mark_log_for_rewrite(); } - on_new_interval(); + recovery_state.on_new_interval(); dirty_info = true; dirty_big_info = true; @@ -3717,16 +3700,16 @@ void PG::read_state(ObjectStore *store) vector acting, up; get_osdmap()->pg_to_up_acting_osds( pg_id.pgid, &up, &up_primary, &acting, &primary); - init_primary_up_acting( + recovery_state.init_primary_up_acting( up, acting, up_primary, primary); int rr = OSDMap::calc_pg_role(osd->whoami, acting); if (pool.info.is_replicated() || rr == pg_whoami.shard) - set_role(rr); + recovery_state.set_role(rr); else - set_role(-1); + recovery_state.set_role(-1); } PG::PeeringCtx rctx(0, 0, 0, new ObjectStore::Transaction); @@ -4103,6 +4086,34 @@ void PG::unreg_next_scrub() } } +void PG::clear_ready_to_merge() { + osd->clear_ready_to_merge(this); +} + +void PG::queue_want_pg_temp(const vector &wanted) { + osd->queue_want_pg_temp(get_pgid().pgid, wanted); +} + +void PG::clear_want_pg_temp() { + osd->remove_want_pg_temp(get_pgid().pgid); +} + +void PG::on_role_change() { + requeue_ops(waiting_for_peered); + plpg_on_role_change(); +} + +void PG::on_new_interval() { + scrub_queued = false; + projected_last_update = eversion_t(); + cancel_recovery(); + plpg_on_new_interval(); +} + +epoch_t PG::oldest_stored_osdmap() { + return osd->get_superblock().oldest_map; +} + void PG::do_replica_scrub_map(OpRequestRef op) { const MOSDRepScrubMap *m = static_cast(op->get_req()); @@ -5783,234 +5794,6 @@ bool PG::try_flush_or_schedule_async() } } -/* Called before initializing peering during advance_map */ -void PG::start_peering_interval( - const OSDMapRef lastmap, - const vector& newup, int new_up_primary, - const vector& newacting, int new_acting_primary, - ObjectStore::Transaction *t) -{ - const OSDMapRef osdmap = get_osdmap(); - - recovery_state.set_last_peering_reset(); - - vector oldacting, oldup; - int oldrole = get_role(); - - unreg_next_scrub(); - - if (is_primary()) { - osd->clear_ready_to_merge(this); - } - - pg_shard_t old_acting_primary = get_primary(); - pg_shard_t old_up_primary = up_primary; - bool was_old_primary = is_primary(); - bool was_old_replica = is_replica(); - - acting.swap(oldacting); - up.swap(oldup); - init_primary_up_acting( - newup, - newacting, - new_up_primary, - new_acting_primary); - - if (info.stats.up != up || - info.stats.acting != acting || - info.stats.up_primary != new_up_primary || - info.stats.acting_primary != new_acting_primary) { - info.stats.up = up; - info.stats.up_primary = new_up_primary; - info.stats.acting = acting; - info.stats.acting_primary = new_acting_primary; - info.stats.mapping_epoch = osdmap->get_epoch(); - } - - pg_stats_publish_lock.Lock(); - pg_stats_publish_valid = false; - pg_stats_publish_lock.Unlock(); - - // This will now be remapped during a backfill in cases - // that it would not have been before. - if (up != acting) - state_set(PG_STATE_REMAPPED); - else - state_clear(PG_STATE_REMAPPED); - - int role = osdmap->calc_pg_role(osd->whoami, acting, acting.size()); - if (pool.info.is_replicated() || role == pg_whoami.shard) - set_role(role); - else - set_role(-1); - - // did acting, up, primary|acker change? - if (!lastmap) { - dout(10) << " no lastmap" << dendl; - dirty_info = true; - dirty_big_info = true; - info.history.same_interval_since = osdmap->get_epoch(); - } else { - std::stringstream debug; - ceph_assert(info.history.same_interval_since != 0); - boost::scoped_ptr recoverable( - get_is_recoverable_predicate()); - bool new_interval = PastIntervals::check_new_interval( - old_acting_primary.osd, - new_acting_primary, - oldacting, newacting, - old_up_primary.osd, - new_up_primary, - oldup, newup, - info.history.same_interval_since, - info.history.last_epoch_clean, - osdmap.get(), - lastmap.get(), - info.pgid.pgid, - recoverable.get(), - &past_intervals, - &debug); - dout(10) << __func__ << ": check_new_interval output: " - << debug.str() << dendl; - if (new_interval) { - if (osdmap->get_epoch() == osd->get_superblock().oldest_map && - info.history.last_epoch_clean < osdmap->get_epoch()) { - dout(10) << " map gap, clearing past_intervals and faking" << dendl; - // our information is incomplete and useless; someone else was clean - // after everything we know if osdmaps were trimmed. - past_intervals.clear(); - } else { - dout(10) << " noting past " << past_intervals << dendl; - } - dirty_info = true; - dirty_big_info = true; - info.history.same_interval_since = osdmap->get_epoch(); - if (osdmap->have_pg_pool(info.pgid.pgid.pool()) && - info.pgid.pgid.is_split(lastmap->get_pg_num(info.pgid.pgid.pool()), - osdmap->get_pg_num(info.pgid.pgid.pool()), - nullptr)) { - info.history.last_epoch_split = osdmap->get_epoch(); - } - } - } - - if (old_up_primary != up_primary || - oldup != up) { - info.history.same_up_since = osdmap->get_epoch(); - } - // this comparison includes primary rank via pg_shard_t - if (old_acting_primary != get_primary()) { - info.history.same_primary_since = osdmap->get_epoch(); - } - - on_new_interval(); - - dout(1) << __func__ << " up " << oldup << " -> " << up - << ", acting " << oldacting << " -> " << acting - << ", acting_primary " << old_acting_primary << " -> " << new_acting_primary - << ", up_primary " << old_up_primary << " -> " << new_up_primary - << ", role " << oldrole << " -> " << role - << ", features acting " << acting_features - << " upacting " << upacting_features - << dendl; - - // deactivate. - state_clear(PG_STATE_ACTIVE); - state_clear(PG_STATE_PEERED); - state_clear(PG_STATE_PREMERGE); - state_clear(PG_STATE_DOWN); - state_clear(PG_STATE_RECOVERY_WAIT); - state_clear(PG_STATE_RECOVERY_TOOFULL); - state_clear(PG_STATE_RECOVERING); - - peer_purged.clear(); - acting_recovery_backfill.clear(); - scrub_queued = false; - - // reset primary/replica state? - if (was_old_primary || is_primary()) { - osd->remove_want_pg_temp(info.pgid.pgid); - } else if (was_old_replica || is_replica()) { - osd->remove_want_pg_temp(info.pgid.pgid); - } - clear_primary_state(); - - - // pg->on_* - on_change(t); - - projected_last_update = eversion_t(); - - ceph_assert(!deleting); - - // should we tell the primary we are here? - send_notify = !is_primary(); - - if (role != oldrole || - was_old_primary != is_primary()) { - // did primary change? - if (was_old_primary != is_primary()) { - state_clear(PG_STATE_CLEAN); - clear_publish_stats(); - } - - on_role_change(); - - // take active waiters - requeue_ops(waiting_for_peered); - - } else { - // no role change. - // did primary change? - if (get_primary() != old_acting_primary) { - dout(10) << *this << " " << oldacting << " -> " << acting - << ", acting primary " - << old_acting_primary << " -> " << get_primary() - << dendl; - } else { - // primary is the same. - if (is_primary()) { - // i am (still) primary. but my replica set changed. - state_clear(PG_STATE_CLEAN); - - dout(10) << oldacting << " -> " << acting - << ", replicas changed" << dendl; - } - } - } - cancel_recovery(); - - if (acting.empty() && !up.empty() && up_primary == pg_whoami) { - dout(10) << " acting empty, but i am up[0], clearing pg_temp" << dendl; - osd->queue_want_pg_temp(info.pgid.pgid, acting); - } -} - -void PG::on_new_interval() -{ - const OSDMapRef osdmap = get_osdmap(); - - reg_next_scrub(); - - // initialize features - acting_features = CEPH_FEATURES_SUPPORTED_DEFAULT; - upacting_features = CEPH_FEATURES_SUPPORTED_DEFAULT; - for (vector::iterator p = acting.begin(); p != acting.end(); ++p) { - if (*p == CRUSH_ITEM_NONE) - continue; - uint64_t f = osdmap->get_xinfo(*p).features; - acting_features &= f; - upacting_features &= f; - } - for (vector::iterator p = up.begin(); p != up.end(); ++p) { - if (*p == CRUSH_ITEM_NONE) - continue; - upacting_features &= osdmap->get_xinfo(*p).features; - } - - _on_new_interval(); -} - void PG::proc_primary_info(ObjectStore::Transaction &t, const pg_info_t &oinfo) { ceph_assert(!is_primary()); @@ -6404,9 +6187,6 @@ void PG::handle_activate_map(PeeringCtx *rctx) dout(10) << __func__ << ": " << get_osdmap()->get_epoch() << dendl; recovery_state.activate_map(rctx); - if (get_osdmap()->check_new_blacklist_entries()) { - check_blacklisted_watchers(); - } } void PG::handle_initialize(PeeringCtx *rctx) diff --git a/src/osd/PG.h b/src/osd/PG.h index 167471c98e8..f16f180f223 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -406,6 +406,22 @@ public: void reg_next_scrub() override; void unreg_next_scrub() override; + void clear_ready_to_merge() override; + + void queue_want_pg_temp(const vector &wanted) override; + void clear_want_pg_temp() override; + + void on_new_interval() override; + virtual void plpg_on_new_interval() = 0; + + void on_role_change() override; + virtual void plpg_on_role_change() = 0; + + void clear_publish_stats() override; + void clear_primary_state() override; + + epoch_t oldest_stored_osdmap() override; + bool is_forced_recovery_or_backfill() const { return get_state() & (PG_STATE_FORCED_RECOVERY | PG_STATE_FORCED_BACKFILL); } @@ -568,12 +584,6 @@ protected: virtual PGBackend *get_pgbackend() = 0; virtual const PGBackend* get_pgbackend() const = 0; -protected: - /*** PG ****/ - /// get_is_recoverable_predicate: caller owns returned pointer and must delete when done - IsPGRecoverablePredicate *get_is_recoverable_predicate() const { - return get_pgbackend()->get_is_recoverable_predicate(); - } protected: void requeue_map_waiters(); @@ -922,9 +932,6 @@ protected: void _update_blocked_by(); friend class TestOpsSocketHook; void publish_stats_to_osd(); - void clear_publish_stats(); - - void clear_primary_state(); bool needs_recovery() const; bool needs_backfill() const; @@ -1471,56 +1478,6 @@ protected: return (get_osdmap()->test_flag(CEPH_OSDMAP_PGLOG_HARDLIMIT)); } - void init_primary_up_acting( - const vector &newup, - const vector &newacting, - int new_up_primary, - int new_acting_primary) { - actingset.clear(); - acting = newacting; - for (uint8_t i = 0; i < acting.size(); ++i) { - if (acting[i] != CRUSH_ITEM_NONE) - actingset.insert( - pg_shard_t( - acting[i], - pool.info.is_erasure() ? shard_id_t(i) : shard_id_t::NO_SHARD)); - } - upset.clear(); - up = newup; - for (uint8_t i = 0; i < up.size(); ++i) { - if (up[i] != CRUSH_ITEM_NONE) - upset.insert( - pg_shard_t( - up[i], - pool.info.is_erasure() ? shard_id_t(i) : shard_id_t::NO_SHARD)); - } - if (!pool.info.is_erasure()) { - up_primary = pg_shard_t(new_up_primary, shard_id_t::NO_SHARD); - primary = pg_shard_t(new_acting_primary, shard_id_t::NO_SHARD); - return; - } - up_primary = pg_shard_t(); - primary = pg_shard_t(); - for (uint8_t i = 0; i < up.size(); ++i) { - if (up[i] == new_up_primary) { - up_primary = pg_shard_t(up[i], shard_id_t(i)); - break; - } - } - for (uint8_t i = 0; i < acting.size(); ++i) { - if (acting[i] == new_acting_primary) { - primary = pg_shard_t(acting[i], shard_id_t(i)); - break; - } - } - ceph_assert(up_primary.osd == new_up_primary); - ceph_assert(primary.osd == new_acting_primary); - } - - void set_role(int r) { - role = r; - } - bool state_test(uint64_t m) const { return recovery_state.state_test(m); } void state_set(uint64_t m) { recovery_state.state_set(m); } void state_clear(uint64_t m) { recovery_state.state_clear(m); } @@ -1661,13 +1618,6 @@ protected: boost::optional roll_forward_to); bool try_flush_or_schedule_async() override; - void start_peering_interval( - const OSDMapRef lastmap, - const vector& newup, int up_primary, - const vector& newacting, int acting_primary, - ObjectStore::Transaction *t); - void on_new_interval(); - virtual void _on_new_interval() = 0; void start_flush_on_transaction( ObjectStore::Transaction *t) override; diff --git a/src/osd/PeeringState.cc b/src/osd/PeeringState.cc index eab00391a48..367ee623961 100644 --- a/src/osd/PeeringState.cc +++ b/src/osd/PeeringState.cc @@ -370,6 +370,10 @@ void PeeringState::activate_map(PeeringCtx *rctx) << " while current is " << osdmap_ref->get_epoch() << dendl; } write_if_dirty(*rctx->transaction); + + if (get_osdmap()->check_new_blacklist_entries()) { + pl->check_blacklisted_watchers(); + } } void PeeringState::set_last_peering_reset() @@ -421,7 +425,7 @@ void PeeringState::check_full_transition(OSDMapRef lastmap, OSDMapRef osdmap) } } -bool PG::should_restart_peering( +bool PeeringState::should_restart_peering( int newupprimary, int newactingprimary, const vector& newup, @@ -445,13 +449,293 @@ bool PG::should_restart_peering( << " newacting " << newacting << dendl; return true; } - if (!lastmap->is_up(osd->whoami) && osdmap->is_up(osd->whoami)) { - dout(10) << __func__ << " osd transitioned from down -> up" << dendl; + if (!lastmap->is_up(pg_whoami.osd) && osdmap->is_up(pg_whoami.osd)) { + psdout(10) << __func__ << " osd transitioned from down -> up" + << dendl; return true; } return false; } +/* Called before initializing peering during advance_map */ +void PeeringState::start_peering_interval( + const OSDMapRef lastmap, + const vector& newup, int new_up_primary, + const vector& newacting, int new_acting_primary, + ObjectStore::Transaction *t) +{ + const OSDMapRef osdmap = get_osdmap(); + + set_last_peering_reset(); + + vector oldacting, oldup; + int oldrole = get_role(); + + pl->unreg_next_scrub(); + if (is_primary()) { + pl->clear_ready_to_merge(); + } + + + pg_shard_t old_acting_primary = get_primary(); + pg_shard_t old_up_primary = up_primary; + bool was_old_primary = is_primary(); + bool was_old_replica = is_replica(); + + acting.swap(oldacting); + up.swap(oldup); + init_primary_up_acting( + newup, + newacting, + new_up_primary, + new_acting_primary); + + if (info.stats.up != up || + info.stats.acting != acting || + info.stats.up_primary != new_up_primary || + info.stats.acting_primary != new_acting_primary) { + info.stats.up = up; + info.stats.up_primary = new_up_primary; + info.stats.acting = acting; + info.stats.acting_primary = new_acting_primary; + info.stats.mapping_epoch = osdmap->get_epoch(); + } + + pl->clear_publish_stats(); + + // This will now be remapped during a backfill in cases + // that it would not have been before. + if (up != acting) + state_set(PG_STATE_REMAPPED); + else + state_clear(PG_STATE_REMAPPED); + + int role = osdmap->calc_pg_role(pg_whoami.osd, acting, acting.size()); + if (pool.info.is_replicated() || role == pg_whoami.shard) + set_role(role); + else + set_role(-1); + + // did acting, up, primary|acker change? + if (!lastmap) { + psdout(10) << " no lastmap" << dendl; + dirty_info = true; + dirty_big_info = true; + info.history.same_interval_since = osdmap->get_epoch(); + } else { + std::stringstream debug; + ceph_assert(info.history.same_interval_since != 0); + bool new_interval = PastIntervals::check_new_interval( + old_acting_primary.osd, + new_acting_primary, + oldacting, newacting, + old_up_primary.osd, + new_up_primary, + oldup, newup, + info.history.same_interval_since, + info.history.last_epoch_clean, + osdmap.get(), + lastmap.get(), + info.pgid.pgid, + missing_loc.get_recoverable_predicate(), + &past_intervals, + &debug); + psdout(10) << __func__ << ": check_new_interval output: " + << debug.str() << dendl; + if (new_interval) { + if (osdmap->get_epoch() == pl->oldest_stored_osdmap() && + info.history.last_epoch_clean < osdmap->get_epoch()) { + psdout(10) << " map gap, clearing past_intervals and faking" << dendl; + // our information is incomplete and useless; someone else was clean + // after everything we know if osdmaps were trimmed. + past_intervals.clear(); + } else { + psdout(10) << " noting past " << past_intervals << dendl; + } + dirty_info = true; + dirty_big_info = true; + info.history.same_interval_since = osdmap->get_epoch(); + if (osdmap->have_pg_pool(info.pgid.pgid.pool()) && + info.pgid.pgid.is_split(lastmap->get_pg_num(info.pgid.pgid.pool()), + osdmap->get_pg_num(info.pgid.pgid.pool()), + nullptr)) { + info.history.last_epoch_split = osdmap->get_epoch(); + } + } + } + + if (old_up_primary != up_primary || + oldup != up) { + info.history.same_up_since = osdmap->get_epoch(); + } + // this comparison includes primary rank via pg_shard_t + if (old_acting_primary != get_primary()) { + info.history.same_primary_since = osdmap->get_epoch(); + } + + pl->on_new_interval(); + pl->reg_next_scrub(); + + psdout(1) << __func__ << " up " << oldup << " -> " << up + << ", acting " << oldacting << " -> " << acting + << ", acting_primary " << old_acting_primary << " -> " + << new_acting_primary + << ", up_primary " << old_up_primary << " -> " << new_up_primary + << ", role " << oldrole << " -> " << role + << ", features acting " << acting_features + << " upacting " << upacting_features + << dendl; + + // deactivate. + state_clear(PG_STATE_ACTIVE); + state_clear(PG_STATE_PEERED); + state_clear(PG_STATE_PREMERGE); + state_clear(PG_STATE_DOWN); + state_clear(PG_STATE_RECOVERY_WAIT); + state_clear(PG_STATE_RECOVERY_TOOFULL); + state_clear(PG_STATE_RECOVERING); + + peer_purged.clear(); + acting_recovery_backfill.clear(); + + // reset primary/replica state? + if (was_old_primary || is_primary()) { + pl->clear_want_pg_temp(); + } else if (was_old_replica || is_replica()) { + pl->clear_want_pg_temp(); + } + clear_primary_state(); + + pl->on_change(t); + + ceph_assert(!deleting); + + // should we tell the primary we are here? + send_notify = !is_primary(); + + if (role != oldrole || + was_old_primary != is_primary()) { + // did primary change? + if (was_old_primary != is_primary()) { + state_clear(PG_STATE_CLEAN); + } + + pl->on_role_change(); + } else { + // no role change. + // did primary change? + if (get_primary() != old_acting_primary) { + psdout(10) << oldacting << " -> " << acting + << ", acting primary " + << old_acting_primary << " -> " << get_primary() + << dendl; + } else { + // primary is the same. + if (is_primary()) { + // i am (still) primary. but my replica set changed. + state_clear(PG_STATE_CLEAN); + + psdout(10) << oldacting << " -> " << acting + << ", replicas changed" << dendl; + } + } + } + + if (acting.empty() && !up.empty() && up_primary == pg_whoami) { + psdout(10) << " acting empty, but i am up[0], clearing pg_temp" << dendl; + pl->queue_want_pg_temp(acting); + } +} + +void PeeringState::on_new_interval() +{ + const OSDMapRef osdmap = get_osdmap(); + + // initialize features + acting_features = CEPH_FEATURES_SUPPORTED_DEFAULT; + upacting_features = CEPH_FEATURES_SUPPORTED_DEFAULT; + for (vector::iterator p = acting.begin(); p != acting.end(); ++p) { + if (*p == CRUSH_ITEM_NONE) + continue; + uint64_t f = osdmap->get_xinfo(*p).features; + acting_features &= f; + upacting_features &= f; + } + for (vector::iterator p = up.begin(); p != up.end(); ++p) { + if (*p == CRUSH_ITEM_NONE) + continue; + upacting_features &= osdmap->get_xinfo(*p).features; + } + + pl->on_new_interval(); +} + +void PeeringState::init_primary_up_acting( + const vector &newup, + const vector &newacting, + int new_up_primary, + int new_acting_primary) { + actingset.clear(); + acting = newacting; + for (uint8_t i = 0; i < acting.size(); ++i) { + if (acting[i] != CRUSH_ITEM_NONE) + actingset.insert( + pg_shard_t( + acting[i], + pool.info.is_erasure() ? shard_id_t(i) : shard_id_t::NO_SHARD)); + } + upset.clear(); + up = newup; + for (uint8_t i = 0; i < up.size(); ++i) { + if (up[i] != CRUSH_ITEM_NONE) + upset.insert( + pg_shard_t( + up[i], + pool.info.is_erasure() ? shard_id_t(i) : shard_id_t::NO_SHARD)); + } + if (!pool.info.is_erasure()) { + up_primary = pg_shard_t(new_up_primary, shard_id_t::NO_SHARD); + primary = pg_shard_t(new_acting_primary, shard_id_t::NO_SHARD); + return; + } + up_primary = pg_shard_t(); + primary = pg_shard_t(); + for (uint8_t i = 0; i < up.size(); ++i) { + if (up[i] == new_up_primary) { + up_primary = pg_shard_t(up[i], shard_id_t(i)); + break; + } + } + for (uint8_t i = 0; i < acting.size(); ++i) { + if (acting[i] == new_acting_primary) { + primary = pg_shard_t(acting[i], shard_id_t(i)); + break; + } + } + ceph_assert(up_primary.osd == new_up_primary); + ceph_assert(primary.osd == new_acting_primary); +} + +void PeeringState::clear_primary_state() +{ + psdout(10) << "clear_primary_state" << dendl; + + // clear peering state + stray_set.clear(); + peer_log_requested.clear(); + peer_missing_requested.clear(); + peer_info.clear(); + peer_bytes.clear(); + peer_missing.clear(); + peer_last_complete_ondisk.clear(); + peer_activated.clear(); + min_last_complete_ondisk = eversion_t(); + pg_trim_to = eversion_t(); + might_have_unfound.clear(); + pl->clear_primary_state(); +} + + /*------------ Peering State Machine----------------*/ #undef dout_prefix #define dout_prefix (context< PeeringMachine >().dpp->gen_prefix(*_dout) \ @@ -529,7 +813,6 @@ PeeringState::Started::react(const IntervalFlush&) boost::statechart::result PeeringState::Started::react(const AdvMap& advmap) { PeeringState *ps = context< PeeringMachine >().state; - PG *pg = context< PeeringMachine >().pg; psdout(10) << "Started advmap" << dendl; ps->check_full_transition(advmap.lastmap, advmap.osdmap); if (ps->should_restart_peering( @@ -602,7 +885,7 @@ boost::statechart::result PeeringState::Reset::react(const AdvMap& advmap) advmap.osdmap)) { psdout(10) << "should restart peering, calling start_peering_interval again" << dendl; - pg->start_peering_interval( + ps->start_peering_interval( advmap.lastmap, advmap.newup, advmap.up_primary, advmap.newacting, advmap.acting_primary, @@ -778,7 +1061,7 @@ void PeeringState::Primary::exit() pg->want_acting.clear(); utime_t dur = ceph_clock_now() - enter_time; pl->get_peering_perf().tinc(rs_primary_latency, dur); - pg->clear_primary_state(); + pl->clear_primary_state(); pg->state_clear(PG_STATE_CREATING); } @@ -1815,6 +2098,7 @@ PeeringState::Active::Active(my_context ctx) boost::statechart::result PeeringState::Active::react(const AdvMap& advmap) { PG *pg = context< PeeringMachine >().pg; + PeeringState *ps = context< PeeringMachine >().state; if (ps->should_restart_peering( advmap.up_primary, advmap.acting_primary, diff --git a/src/osd/PeeringState.h b/src/osd/PeeringState.h index 8d39db3a9a7..0dc182ac1d8 100644 --- a/src/osd/PeeringState.h +++ b/src/osd/PeeringState.h @@ -79,12 +79,25 @@ public: virtual PerfCounters &get_peering_perf() = 0; + virtual void clear_ready_to_merge() = 0; + + virtual void queue_want_pg_temp(const vector &wanted) = 0; + virtual void clear_want_pg_temp() = 0; + + virtual void clear_publish_stats() = 0; + virtual void check_recovery_sources(const OSDMapRef& newmap) = 0; + virtual void check_blacklisted_watchers() = 0; + virtual void clear_primary_state() = 0; + virtual void on_pool_change() = 0; virtual void on_role_change() = 0; virtual void on_change(ObjectStore::Transaction *t) = 0; virtual void on_activate() = 0; - virtual void check_blacklisted_watchers() = 0; + virtual void on_new_interval() = 0; + + virtual epoch_t oldest_stored_osdmap() = 0; + virtual ~PeeringListener() {} }; @@ -1168,7 +1181,18 @@ public: const vector& newacting, OSDMapRef lastmap, OSDMapRef osdmap); - + void start_peering_interval( + const OSDMapRef lastmap, + const vector& newup, int up_primary, + const vector& newacting, int acting_primary, + ObjectStore::Transaction *t); + void on_new_interval(); + void init_primary_up_acting( + const vector &newup, + const vector &newacting, + int new_up_primary, + int new_acting_primary); + void clear_primary_state(); public: PeeringState( @@ -1180,6 +1204,12 @@ public: PeeringListener *pl, PG *pg); + void set_backend_predicates( + IsPGReadablePredicate *is_readable, + IsPGRecoverablePredicate *is_recoverable) { + missing_loc.set_backend_predicates(is_readable, is_recoverable); + } + // MissingLoc::MappingInfo const set &get_upset() const override { return upset; @@ -1291,6 +1321,9 @@ public: return deleted || e < get_last_peering_reset(); } + void set_role(int r) { + role = r; + } int get_role() const { return role; } diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index 9136127b799..e4307c6e91f 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -1721,7 +1721,7 @@ PrimaryLogPG::PrimaryLogPG(OSDService *o, OSDMapRef curmap, temp_seq(0), snap_trimmer_machine(this) { - missing_loc.set_backend_predicates( + recovery_state.set_backend_predicates( pgbackend->get_is_readable_predicate(), pgbackend->get_is_recoverable_predicate()); snap_trimmer_machine.initiate(); @@ -12187,9 +12187,10 @@ void PrimaryLogPG::on_activate() agent_setup(); } -void PrimaryLogPG::_on_new_interval() +void PrimaryLogPG::plpg_on_new_interval() { dout(20) << __func__ << " checking missing set deletes flag. missing = " << pg_log.get_missing() << dendl; + if (!pg_log.get_missing().may_include_deletes && get_osdmap()->test_flag(CEPH_OSDMAP_RECOVERY_DELETES)) { pg_log.rebuild_missing_set_with_deletes(osd->store, ch, info); @@ -12307,7 +12308,7 @@ void PrimaryLogPG::on_change(ObjectStore::Transaction *t) ceph_assert(objects_blocked_on_degraded_snap.empty()); } -void PrimaryLogPG::on_role_change() +void PrimaryLogPG::plpg_on_role_change() { dout(10) << __func__ << dendl; if (get_role() != 0 && hit_set) { diff --git a/src/osd/PrimaryLogPG.h b/src/osd/PrimaryLogPG.h index 910a45f8d4b..c101df13cc6 100644 --- a/src/osd/PrimaryLogPG.h +++ b/src/osd/PrimaryLogPG.h @@ -1858,9 +1858,9 @@ public: void do_update_log_missing_reply( OpRequestRef &op); - void on_role_change() override; + void plpg_on_role_change() override; void on_pool_change() override; - void _on_new_interval() override; + void plpg_on_new_interval() override; void clear_async_reads(); void on_change(ObjectStore::Transaction *t) override; void on_activate() override; diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index 0e81292ad8c..5908a3e4824 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -3850,7 +3850,7 @@ bool PastIntervals::check_new_interval( const OSDMap *osdmap, const OSDMap *lastmap, pg_t pgid, - IsPGRecoverablePredicate *could_have_gone_active, + IsPGRecoverablePredicate &could_have_gone_active, PastIntervals *past_intervals, std::ostream *out) { @@ -3935,7 +3935,7 @@ bool PastIntervals::check_new_interval( if (num_acting && i.primary != -1 && num_acting >= old_pg_pool.min_size && - (*could_have_gone_active)(old_acting_shards)) { + could_have_gone_active(old_acting_shards)) { if (out) *out << __func__ << " " << i << " up_thru " << lastmap->get_up_thru(i.primary) diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index c0b9cf65d90..4fe9c352c19 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -3176,7 +3176,7 @@ public: const OSDMap *osdmap, ///< [in] current map const OSDMap *lastmap, ///< [in] last map pg_t pgid, ///< [in] pgid for pg - IsPGRecoverablePredicate *could_have_gone_active, ///< [in] predicate whether the pg can be active + const IsPGRecoverablePredicate &could_have_gone_active, ///< [in] predicate whether the pg can be active PastIntervals *past_intervals, ///< [out] intervals std::ostream *out = 0 ///< [out] debug ostream ); @@ -3194,7 +3194,7 @@ public: std::shared_ptr osdmap, ///< [in] current map std::shared_ptr lastmap, ///< [in] last map pg_t pgid, ///< [in] pgid for pg - IsPGRecoverablePredicate *could_have_gone_active, ///< [in] predicate whether the pg can be active + IsPGRecoverablePredicate &could_have_gone_active, ///< [in] predicate whether the pg can be active PastIntervals *past_intervals, ///< [out] intervals std::ostream *out = 0 ///< [out] debug ostream ) { -- 2.39.5