From 7cbbb6065f489de6bf9faf08fac0c3386d89f945 Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Fri, 29 Mar 2019 11:06:26 -0700 Subject: [PATCH] osd/: move Active purged_snaps handling back to PG Note: this patch only sets dirty_info/dirty_big_info when info.purged_snaps is actually changed. I *think* that's right, but that part deserves particular attention during review. Signed-off-by: Samuel Just --- src/osd/PG.cc | 102 +++++++++++++++++++++++++++++++++++ src/osd/PG.h | 3 ++ src/osd/PeeringState.cc | 114 +++++----------------------------------- src/osd/PeeringState.h | 19 +++++++ 4 files changed, 136 insertions(+), 102 deletions(-) diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 87e2f364988..08ba2a7b2b0 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -3911,6 +3911,108 @@ void PG::cancel_remote_recovery_reservation() { pg_id); } +void PG::on_active_advmap(const OSDMapRef &osdmap) +{ + if (osdmap->require_osd_release >= CEPH_RELEASE_MIMIC) { + const auto& new_removed_snaps = osdmap->get_new_removed_snaps(); + auto i = new_removed_snaps.find(get_pgid().pool()); + if (i != new_removed_snaps.end()) { + bool bad = false; + for (auto j : i->second) { + if (snap_trimq.intersects(j.first, j.second)) { + decltype(snap_trimq) added, overlap; + added.insert(j.first, j.second); + overlap.intersection_of(snap_trimq, added); + if (recovery_state.get_last_require_osd_release() < CEPH_RELEASE_MIMIC) { + derr << __func__ << " removed_snaps already contains " + << overlap << ", but this is the first mimic+ osdmap," + << " so it's expected" << dendl; + } else { + derr << __func__ << " removed_snaps already contains " + << overlap << dendl; + bad = true; + } + snap_trimq.union_of(added); + } else { + snap_trimq.insert(j.first, j.second); + } + } + if (recovery_state.get_last_require_osd_release() < CEPH_RELEASE_MIMIC) { + // at upgrade, we report *all* previously removed snaps as removed in + // the first mimic epoch. remove the ones we previously divined were + // removed (and subsequently purged) from the trimq. + derr << __func__ << " first mimic map, filtering purged_snaps" + << " from new removed_snaps" << dendl; + snap_trimq.subtract(recovery_state.get_info().purged_snaps); + } + dout(10) << __func__ << " new removed_snaps " << i->second + << ", snap_trimq now " << snap_trimq << dendl; + ceph_assert(!bad || !cct->_conf->osd_debug_verify_cached_snaps); + } + + const auto& new_purged_snaps = osdmap->get_new_purged_snaps(); + auto j = new_purged_snaps.find(get_pgid().pgid.pool()); + if (j != new_purged_snaps.end()) { + bool bad = false; + for (auto k : j->second) { + if (!recovery_state.get_info().purged_snaps.contains(k.first, k.second)) { + interval_set rm, overlap; + rm.insert(k.first, k.second); + overlap.intersection_of(recovery_state.get_info().purged_snaps, rm); + derr << __func__ << " purged_snaps does not contain " + << rm << ", only " << overlap << dendl; + recovery_state.adjust_purged_snaps( + [&overlap](auto &purged_snaps) { + purged_snaps.subtract(overlap); + }); + // This can currently happen in the normal (if unlikely) course of + // events. Because adding snaps to purged_snaps does not increase + // the pg version or add a pg log entry, we don't reliably propagate + // purged_snaps additions to other OSDs. + // One example: + // - purge S + // - primary and replicas update purged_snaps + // - no object updates + // - pg mapping changes, new primary on different node + // - new primary pg version == eversion_t(), so info is not + // propagated. + //bad = true; + } else { + recovery_state.adjust_purged_snaps( + [&k](auto &purged_snaps) { + purged_snaps.erase(k.first, k.second); + }); + } + } + dout(10) << __func__ << " new purged_snaps " << j->second + << ", now " << recovery_state.get_info().purged_snaps << dendl; + ceph_assert(!bad || !cct->_conf->osd_debug_verify_cached_snaps); + } + } else if (!pool.newly_removed_snaps.empty()) { + snap_trimq.union_of(pool.newly_removed_snaps); + dout(10) << " snap_trimq now " << snap_trimq << dendl; + } +} + +void PG::on_active_actmap() +{ + if (cct->_conf->osd_check_for_log_corruption) + check_log_for_corruption(osd->store); + + + if (recovery_state.is_active()) { + dout(10) << "Active: kicking snap trim" << dendl; + kick_snap_trim(); + } + + if (recovery_state.is_peered() && + !recovery_state.is_clean() && + !recovery_state.get_osdmap()->test_flag(CEPH_OSDMAP_NOBACKFILL) && + (!recovery_state.get_osdmap()->test_flag(CEPH_OSDMAP_NOREBALANCE) || + recovery_state.is_degraded())) { + queue_recovery(); + } +} void PG::do_replica_scrub_map(OpRequestRef op) { const MOSDRepScrubMap *m = static_cast(op->get_req()); diff --git a/src/osd/PG.h b/src/osd/PG.h index 1cf0b0d5b02..a795683755c 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -449,6 +449,9 @@ public: void cancel_remote_recovery_reservation() override; + void on_active_actmap() override; + void on_active_advmap(const OSDMapRef &osdmap) override; + bool is_forced_recovery_or_backfill() const { return recovery_state.is_forced_recovery_or_backfill(); } diff --git a/src/osd/PeeringState.cc b/src/osd/PeeringState.cc index 35994967f95..a914b103208 100644 --- a/src/osd/PeeringState.cc +++ b/src/osd/PeeringState.cc @@ -2442,91 +2442,13 @@ boost::statechart::result PeeringState::Active::react(const AdvMap& advmap) psdout(10) << "Active advmap" << dendl; bool need_publish = false; - if (advmap.osdmap->require_osd_release >= CEPH_RELEASE_MIMIC) { - const auto& new_removed_snaps = advmap.osdmap->get_new_removed_snaps(); - auto i = new_removed_snaps.find(ps->info.pgid.pool()); - if (i != new_removed_snaps.end()) { - bool bad = false; - for (auto j : i->second) { - if (pg->snap_trimq.intersects(j.first, j.second)) { - decltype(pg->snap_trimq) added, overlap; - added.insert(j.first, j.second); - overlap.intersection_of(pg->snap_trimq, added); - if (ps->last_require_osd_release < CEPH_RELEASE_MIMIC) { - lderr(ps->cct) << __func__ << " removed_snaps already contains " - << overlap << ", but this is the first mimic+ osdmap," - << " so it's expected" << dendl; - } else { - lderr(ps->cct) << __func__ << " removed_snaps already contains " - << overlap << dendl; - bad = true; - } - pg->snap_trimq.union_of(added); - } else { - pg->snap_trimq.insert(j.first, j.second); - } - } - if (ps->last_require_osd_release < CEPH_RELEASE_MIMIC) { - // at upgrade, we report *all* previously removed snaps as removed in - // the first mimic epoch. remove the ones we previously divined were - // removed (and subsequently purged) from the trimq. - lderr(ps->cct) << __func__ << " first mimic map, filtering purged_snaps" - << " from new removed_snaps" << dendl; - pg->snap_trimq.subtract(ps->info.purged_snaps); - } - ldout(ps->cct,10) << __func__ << " new removed_snaps " << i->second - << ", snap_trimq now " << pg->snap_trimq << dendl; - ceph_assert(!bad || !ps->cct->_conf->osd_debug_verify_cached_snaps); - ps->dirty_info = true; - ps->dirty_big_info = true; - } - - const auto& new_purged_snaps = advmap.osdmap->get_new_purged_snaps(); - auto j = new_purged_snaps.find(ps->info.pgid.pool()); - if (j != new_purged_snaps.end()) { - bool bad = false; - for (auto k : j->second) { - if (!ps->info.purged_snaps.contains(k.first, k.second)) { - decltype(ps->info.purged_snaps) rm, overlap; - rm.insert(k.first, k.second); - overlap.intersection_of(ps->info.purged_snaps, rm); - lderr(ps->cct) << __func__ << " purged_snaps does not contain " - << rm << ", only " << overlap << dendl; - ps->info.purged_snaps.subtract(overlap); - // This can currently happen in the normal (if unlikely) course of - // events. Because adding snaps to purged_snaps does not increase - // the pg version or add a pg log entry, we don't reliably propagate - // purged_snaps additions to other OSDs. - // One example: - // - purge S - // - primary and replicas update purged_snaps - // - no object updates - // - pg mapping changes, new primary on different node - // - new primary pg version == eversion_t(), so info is not - // propagated. - //bad = true; - } else { - ps->info.purged_snaps.erase(k.first, k.second); - } - } - ldout(ps->cct,10) << __func__ << " new purged_snaps " << j->second - << ", now " << ps->info.purged_snaps << dendl; - ceph_assert(!bad || !ps->cct->_conf->osd_debug_verify_cached_snaps); - ps->dirty_info = true; - ps->dirty_big_info = true; - } - if (ps->dirty_big_info) { - // share updated purged_snaps to mgr/mon so that we (a) stop reporting - // purged snaps and (b) perhaps share more snaps that we have purged - // but didn't fit in pg_stat_t. - need_publish = true; - pg->share_pg_info(); - } - } else if (!ps->pool.newly_removed_snaps.empty()) { - pg->snap_trimq.union_of(ps->pool.newly_removed_snaps); - psdout(10) << *pg << " snap_trimq now " << pg->snap_trimq << dendl; - ps->dirty_info = true; - ps->dirty_big_info = true; + pl->on_active_advmap(advmap.osdmap); + if (ps->dirty_big_info) { + // share updated purged_snaps to mgr/mon so that we (a) stop reporting + // purged snaps and (b) perhaps share more snaps that we have purged + // but didn't fit in pg_stat_t. + need_publish = true; + pg->share_pg_info(); } for (size_t i = 0; i < ps->want_acting.size(); i++) { @@ -2539,9 +2461,9 @@ boost::statechart::result PeeringState::Active::react(const AdvMap& advmap) /* Check for changes in pool size (if the acting set changed as a result, * this does not matter) */ - if (advmap.lastmap->get_pg_size(context< PeeringMachine >().spgid.pgid) != - ps->get_osdmap()->get_pg_size(context< PeeringMachine >().spgid.pgid)) { - if (ps->get_osdmap()->get_pg_size(context< PeeringMachine >().spgid.pgid) <= + if (advmap.lastmap->get_pg_size(ps->info.pgid.pgid) != + ps->get_osdmap()->get_pg_size(ps->info.pgid.pgid)) { + if (ps->get_osdmap()->get_pg_size(ps->info.pgid.pgid) <= ps->actingset.size()) { ps->state_clear(PG_STATE_UNDERSIZED); } else { @@ -2570,14 +2492,13 @@ boost::statechart::result PeeringState::Active::react(const ActMap&) psdout(10) << "Active: handling ActMap" << dendl; ceph_assert(ps->is_primary()); + pl->on_active_actmap(); + if (pg->have_unfound()) { // object may have become unfound pg->discover_all_missing(*context< PeeringMachine >().get_query_map()); } - if (ps->cct->_conf->osd_check_for_log_corruption) - pg->check_log_for_corruption(pg->osd->store); - uint64_t unfound = ps->missing_loc.num_unfound(); if (unfound > 0 && pg->all_unfound_are_queried_or_lost(ps->get_osdmap())) { @@ -2591,17 +2512,6 @@ boost::statechart::result PeeringState::Active::react(const ActMap&) << unfound << " objects unfound and apparently lost"; } - if (ps->is_active()) { - psdout(10) << "Active: kicking snap trim" << dendl; - pg->kick_snap_trim(); - } - - if (ps->is_peered() && - !ps->is_clean() && - !ps->get_osdmap()->test_flag(CEPH_OSDMAP_NOBACKFILL) && - (!ps->get_osdmap()->test_flag(CEPH_OSDMAP_NOREBALANCE) || ps->is_degraded())) { - pg->queue_recovery(); - } return forward_event(); } diff --git a/src/osd/PeeringState.h b/src/osd/PeeringState.h index 318ebe51481..f12214bb29a 100644 --- a/src/osd/PeeringState.h +++ b/src/osd/PeeringState.h @@ -119,6 +119,10 @@ public: virtual void on_activate() = 0; virtual void on_new_interval() = 0; + + // active map notifications + virtual void on_active_actmap() = 0; + virtual void on_active_advmap(const OSDMapRef &osdmap) = 0; virtual epoch_t oldest_stored_osdmap() = 0; virtual LogChannel &get_clog() = 0; @@ -1329,6 +1333,13 @@ public: last_persisted_osdmap = 0; } + template + void adjust_purged_snaps(Func f) { + f(info.purged_snaps); + dirty_info = true; + dirty_big_info = true; + } + bool is_deleting() const { return deleting; } @@ -1426,6 +1437,14 @@ public: return backfill_reserving; } + unsigned get_last_require_osd_release() const { + return last_require_osd_release; + } + + const pg_info_t &get_info() const { + return info; + } + // Flush control interface private: void start_flush(ObjectStore::Transaction *t) { -- 2.39.5