From 6e1b7c4c14be575a554ffe1d6e71c0d6189486af Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 3 Nov 2017 15:17:31 -0500 Subject: [PATCH] osd/PG: use new mimic osdmap structures for removed, pruned snaps - update snap_trimq and purged_snaps based on new mimic OSDMap fields - improve debug output to include both trimq and purged Signed-off-by: Sage Weil --- src/osd/PG.cc | 215 +++++++++++++++++++++++++++++++++++++------------- src/osd/PG.h | 5 +- 2 files changed, 164 insertions(+), 56 deletions(-) diff --git a/src/osd/PG.cc b/src/osd/PG.cc index cfad8873086b1..5adcfcf4e27eb 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -231,61 +231,77 @@ void PGPool::update(CephContext *cct, OSDMapRef map) info = *pi; auid = pi->auid; name = map->get_pool_name(id); + bool updated = false; if ((map->get_epoch() != cached_epoch + 1) || (pi->get_snap_epoch() == map->get_epoch())) { updated = true; - if (pi->maybe_updated_removed_snaps(cached_removed_snaps)) { - pi->build_removed_snaps(newly_removed_snaps); - if (cached_removed_snaps.subset_of(newly_removed_snaps)) { + } + + if (map->require_osd_release >= CEPH_RELEASE_MIMIC) { + // mimic tracks removed_snaps_queue in the OSDmap and purged_snaps + // in the pg_info_t, with deltas for both in each OSDMap. we don't + // need to (and can't) track it here. + cached_removed_snaps.clear(); + newly_removed_snaps.clear(); + } else { + // legacy (<= luminous) removed_snaps tracking + if (updated) { + if (pi->maybe_updated_removed_snaps(cached_removed_snaps)) { + pi->build_removed_snaps(newly_removed_snaps); + if (cached_removed_snaps.subset_of(newly_removed_snaps)) { interval_set removed_snaps = newly_removed_snaps; newly_removed_snaps.subtract(cached_removed_snaps); cached_removed_snaps.swap(removed_snaps); - } else { + } else { lgeneric_subdout(cct, osd, 0) << __func__ - << " cached_removed_snaps shrank from " << cached_removed_snaps - << " to " << newly_removed_snaps << dendl; + << " cached_removed_snaps shrank from " << cached_removed_snaps + << " to " << newly_removed_snaps << dendl; cached_removed_snaps.swap(newly_removed_snaps); newly_removed_snaps.clear(); + } + } else { + newly_removed_snaps.clear(); } - } else + } else { + /* 1) map->get_epoch() == cached_epoch + 1 && + * 2) pi->get_snap_epoch() != map->get_epoch() + * + * From the if branch, 1 && 2 must be true. From 2, we know that + * this map didn't change the set of removed snaps. From 1, we + * know that our cached_removed_snaps matches the previous map. + * Thus, from 1 && 2, cached_removed snaps matches the current + * set of removed snaps and all we have to do is clear + * newly_removed_snaps. + */ newly_removed_snaps.clear(); + } + lgeneric_subdout(cct, osd, 20) + << "PGPool::update cached_removed_snaps " + << cached_removed_snaps + << " newly_removed_snaps " + << newly_removed_snaps + << " snapc " << snapc + << (updated ? " (updated)":" (no change)") + << dendl; + if (cct->_conf->osd_debug_verify_cached_snaps) { + interval_set actual_removed_snaps; + pi->build_removed_snaps(actual_removed_snaps); + if (!(actual_removed_snaps == cached_removed_snaps)) { + lgeneric_derr(cct) << __func__ + << ": mismatch between the actual removed snaps " + << actual_removed_snaps + << " and pool.cached_removed_snaps " + << " pool.cached_removed_snaps " << cached_removed_snaps + << dendl; + } + assert(actual_removed_snaps == cached_removed_snaps); + } + } + if (info.is_pool_snaps_mode() && updated) { snapc = pi->get_snap_context(); - } else { - /* 1) map->get_epoch() == cached_epoch + 1 && - * 2) pi->get_snap_epoch() != map->get_epoch() - * - * From the if branch, 1 && 2 must be true. From 2, we know that - * this map didn't change the set of removed snaps. From 1, we - * know that our cached_removed_snaps matches the previous map. - * Thus, from 1 && 2, cached_removed snaps matches the current - * set of removed snaps and all we have to do is clear - * newly_removed_snaps. - */ - newly_removed_snaps.clear(); } cached_epoch = map->get_epoch(); - lgeneric_subdout(cct, osd, 20) - << "PGPool::update cached_removed_snaps " - << cached_removed_snaps - << " newly_removed_snaps " - << newly_removed_snaps - << " snapc " << snapc - << (updated ? " (updated)":" (no change)") - << dendl; - if (cct->_conf->osd_debug_verify_cached_snaps) { - interval_set actual_removed_snaps; - pi->build_removed_snaps(actual_removed_snaps); - if (!(actual_removed_snaps == cached_removed_snaps)) { - lgeneric_derr(cct) << __func__ - << ": mismatch between the actual removed snaps " - << actual_removed_snaps - << " and pool.cached_removed_snaps " - << " pool.cached_removed_snaps " << cached_removed_snaps - << dendl; - } - assert(actual_removed_snaps == cached_removed_snaps); - } } PG::PG(OSDService *o, OSDMapRef curmap, @@ -1614,20 +1630,36 @@ void PG::activate(ObjectStore::Transaction& t, get_osdmap()->get_epoch(), activation_epoch)); - // initialize snap_trimq if (is_primary()) { - dout(20) << "activate - purged_snaps " << info.purged_snaps - << " cached_removed_snaps " << pool.cached_removed_snaps << dendl; - snap_trimq = pool.cached_removed_snaps; - interval_set intersection; - intersection.intersection_of(snap_trimq, info.purged_snaps); - if (intersection == info.purged_snaps) { - snap_trimq.subtract(info.purged_snaps); + // initialize snap_trimq + if (get_osdmap()->require_osd_release < CEPH_RELEASE_MIMIC) { + dout(20) << "activate - purged_snaps " << info.purged_snaps + << " cached_removed_snaps " << pool.cached_removed_snaps + << dendl; + snap_trimq = pool.cached_removed_snaps; } else { - dout(0) << "warning: info.purged_snaps (" << info.purged_snaps - << ") is not a subset of removed_snaps" << dendl; - snap_trimq.subtract(intersection); - assert(!cct->_conf->osd_debug_verify_cached_snaps); + auto& removed_snaps_queue = get_osdmap()->get_removed_snaps_queue(); + auto p = removed_snaps_queue.find(info.pgid.pgid.pool()); + snap_trimq.clear(); + if (p != removed_snaps_queue.end()) { + dout(20) << "activate - purged_snaps " << info.purged_snaps + << " removed_snaps " << p->second + << dendl; + for (auto q : p->second) { + snap_trimq.insert(q.first, q.second); + } + } + } + interval_set purged; + purged.intersection_of(snap_trimq, info.purged_snaps); + snap_trimq.subtract(purged); + + if (get_osdmap()->require_osd_release >= CEPH_RELEASE_MIMIC) { + // adjust purged_snaps: PG may have been inactive while snaps were pruned + // from the removed_snaps_queue in the osdmap. update local purged_snaps + // reflect only those snaps that we thought were pruned and were still in + // the queue. + info.purged_snaps.swap(purged); } } @@ -5621,8 +5653,23 @@ ostream& operator<<(ostream& out, const PG& pg) out << " u=" << unfound; } } - if (pg.snap_trimq.size()) - out << " snaptrimq=" << pg.snap_trimq; + if (!pg.snap_trimq.empty()) { + out << " trimq="; + // only show a count if the set is large + if (pg.snap_trimq.num_intervals() > 16) { + out << pg.snap_trimq.size(); + } else { + out << pg.snap_trimq; + } + } + if (!pg.info.purged_snaps.empty()) { + out << " ps="; // snap trim queue / purged snaps + if (pg.info.purged_snaps.num_intervals() > 16) { + out << pg.info.purged_snaps.size(); + } else { + out << pg.info.purged_snaps; + } + } out << "]"; @@ -5897,7 +5944,9 @@ void PG::handle_advance_map( << " -- " << up_primary << "/" << acting_primary << dendl; update_osdmap_ref(osdmap); + pool.update(cct, osdmap); + AdvMap evt( osdmap, lastmap, newup, up_primary, newacting, acting_primary); @@ -7257,7 +7306,63 @@ boost::statechart::result PG::RecoveryState::Active::react(const AdvMap& advmap) return forward_event(); } ldout(pg->cct, 10) << "Active advmap" << dendl; - if (!pg->pool.newly_removed_snaps.empty()) { + + if (advmap.osdmap->require_osd_release >= CEPH_RELEASE_MIMIC) { + const auto& new_removed_snaps = advmap.osdmap->get_new_removed_snaps(); + auto i = new_removed_snaps.find(pg->info.pgid.pool()); + if (i != new_removed_snaps.end()) { + bool bad = false; + for (auto j : i->second) { + if (pg->snap_trimq.intersects(j.first, j.second)) { + decltype(pg->snap_trimq) added, overlap; + added.insert(j.first, j.second); + overlap.intersection_of(pg->snap_trimq, added); + lderr(pg->cct) << __func__ << " removed_snaps already contains " + << overlap << dendl; + bad = true; + pg->snap_trimq.union_of(added); + } else { + pg->snap_trimq.insert(j.first, j.second); + } + } + ldout(pg->cct,10) << __func__ << " new removed_snaps " << i->second + << ", snap_trimq now " << pg->snap_trimq << dendl; + assert(!bad || !pg->cct->_conf->osd_debug_verify_cached_snaps); + pg->dirty_info = true; + pg->dirty_big_info = true; + } + + const auto& new_purged_snaps = advmap.osdmap->get_new_purged_snaps(); + auto j = new_purged_snaps.find(pg->info.pgid.pool()); + if (j != new_purged_snaps.end()) { + bool bad = false; + for (auto k : j->second) { + if (!pg->info.purged_snaps.contains(k.first, k.second)) { + decltype(pg->info.purged_snaps) rm, overlap; + rm.insert(k.first, k.second); + overlap.intersection_of(pg->info.purged_snaps, rm); + lderr(pg->cct) << __func__ << " purged_snaps does not contain " + << rm << ", only " << overlap << dendl; + pg->info.purged_snaps.subtract(overlap); + bad = true; + } else { + pg->info.purged_snaps.erase(k.first, k.second); + } + } + ldout(pg->cct,10) << __func__ << " new purged_snaps " << j->second + << ", now " << pg->info.purged_snaps << dendl; + assert(!bad || !pg->cct->_conf->osd_debug_verify_cached_snaps); + pg->dirty_info = true; + pg->dirty_big_info = true; + } + if (pg->dirty_big_info) { + // share updated purged_snaps to mgr/mon so that we (a) stop reporting + // purged snaps and (b) perhaps share more snaps that we have purged + // but didn't fit in pg_stat_t. + pg->publish_stats_to_osd(); + pg->share_pg_info(); + } + } else if (!pg->pool.newly_removed_snaps.empty()) { pg->snap_trimq.union_of(pg->pool.newly_removed_snaps); ldout(pg->cct, 10) << *pg << " snap_trimq now " << pg->snap_trimq << dendl; pg->dirty_info = true; diff --git a/src/osd/PG.h b/src/osd/PG.h index 1e495da50b30f..73632fa9654b4 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -221,6 +221,7 @@ struct PGPool { pg_pool_t info; SnapContext snapc; // the default pool snapc, ready to go. + // these two sets are for < mimic only interval_set cached_removed_snaps; // current removed_snaps set interval_set newly_removed_snaps; // newly removed in the last epoch @@ -234,7 +235,9 @@ struct PGPool { assert(pi); info = *pi; snapc = pi->get_snap_context(); - pi->build_removed_snaps(cached_removed_snaps); + if (map->require_osd_release < CEPH_RELEASE_MIMIC) { + pi->build_removed_snaps(cached_removed_snaps); + } } void update(CephContext *cct, OSDMapRef map); -- 2.39.5