From afa7078763efc053814434d4b0dae08dd2295349 Mon Sep 17 00:00:00 2001 From: John Spray Date: Thu, 22 Sep 2016 17:09:22 +0100 Subject: [PATCH] mon: refactor PGMap updating code for reuse in mgr ...and remove the copypasta from mgr. mgr still doesn't do all the same logic (it doesn't have osdmap history handy) but it is now much easier to see which bits are used one place vs. the other. Signed-off-by: John Spray --- src/mgr/ClusterState.cc | 236 +------------------------------- src/mon/PGMap.cc | 285 +++++++++++++++++++++++++++++++++++++++ src/mon/PGMap.h | 39 ++++++ src/mon/PGMonitor.cc | 288 +--------------------------------------- src/mon/PGMonitor.h | 11 -- src/osd/OSDMap.h | 3 +- 6 files changed, 336 insertions(+), 526 deletions(-) diff --git a/src/mgr/ClusterState.cc b/src/mgr/ClusterState.cc index e5d29a707b30e..027b461efadcf 100644 --- a/src/mgr/ClusterState.cc +++ b/src/mgr/ClusterState.cc @@ -99,239 +99,13 @@ void ClusterState::notify_osdmap(const OSDMap &osd_map) PGMap::Incremental pending_inc; pending_inc.version = pg_map.version + 1; // to make apply_incremental happy - _update_creating_pgs(osd_map, &pending_inc); - _register_new_pgs(osd_map, &pending_inc); + PGMapUpdater::update_creating_pgs(osd_map, &pg_map, &pending_inc); + PGMapUpdater::register_new_pgs(osd_map, &pg_map, &pending_inc); pg_map.apply_incremental(g_ceph_context, pending_inc); - // TODO: Reinstate check_down_pgs logic? -} - -void ClusterState::_register_new_pgs( - const OSDMap &osd_map, - PGMap::Incremental *pending_inc) -{ - // iterate over crush mapspace - epoch_t epoch = osd_map.get_epoch(); - dout(10) << "checking pg pools for osdmap epoch " << epoch - << ", last_pg_scan " << pg_map.last_pg_scan << dendl; - - int created = 0; - for (const auto & p : osd_map.pools) { - int64_t poolid = p.first; - const pg_pool_t &pool = p.second; - - int ruleno = osd_map.crush->find_rule(pool.get_crush_ruleset(), - pool.get_type(), pool.get_size()); - if (ruleno < 0 || !osd_map.crush->rule_exists(ruleno)) - continue; - - if (pool.get_last_change() <= pg_map.last_pg_scan || - pool.get_last_change() <= pending_inc->pg_scan) { - dout(10) << " no change in pool " << poolid << " " << pool << dendl; - continue; - } - - dout(10) << "scanning pool " << poolid - << " " << pool << dendl; - - // first pgs in this pool - bool new_pool = pg_map.pg_pool_sum.count(poolid) == 0; - - for (ps_t ps = 0; ps < pool.get_pg_num(); ps++) { - pg_t pgid(ps, poolid, -1); - if (pg_map.pg_stat.count(pgid)) { - dout(20) << "register_new_pgs have " << pgid << dendl; - continue; - } - created++; - _register_pg(osd_map, pgid, pool.get_last_change(), new_pool, - pending_inc); - } - } - - int removed = 0; - for (const auto &p : pg_map.creating_pgs) { - if (p.preferred() >= 0) { - dout(20) << " removing creating_pg " << p - << " because it is localized and obsolete" << dendl; - pending_inc->pg_remove.insert(p); - removed++; - } - if (!osd_map.have_pg_pool(p.pool())) { - dout(20) << " removing creating_pg " << p - << " because containing pool deleted" << dendl; - pending_inc->pg_remove.insert(p); - ++removed; - } - } - - // deleted pools? - for (const auto & p : pg_map.pg_stat) { - if (!osd_map.have_pg_pool(p.first.pool())) { - dout(20) << " removing pg_stat " << p.first << " because " - << "containing pool deleted" << dendl; - pending_inc->pg_remove.insert(p.first); - ++removed; - } - if (p.first.preferred() >= 0) { - dout(20) << " removing localized pg " << p.first << dendl; - pending_inc->pg_remove.insert(p.first); - ++removed; - } - } - - // we don't want to redo this work if we can avoid it. - pending_inc->pg_scan = epoch; - - dout(10) << "register_new_pgs registered " << created << " new pgs, removed " - << removed << " uncreated pgs" << dendl; -} - -void ClusterState::_register_pg( - const OSDMap &osd_map, - pg_t pgid, epoch_t epoch, - bool new_pool, - PGMap::Incremental *pending_inc) -{ - pg_t parent; - int split_bits = 0; - bool parent_found = false; - if (!new_pool) { - parent = pgid; - while (1) { - // remove most significant bit - int msb = cbits(parent.ps()); - if (!msb) - break; - parent.set_ps(parent.ps() & ~(1<<(msb-1))); - split_bits++; - dout(30) << " is " << pgid << " parent " << parent << " ?" << dendl; - if (pg_map.pg_stat.count(parent) && - pg_map.pg_stat[parent].state != PG_STATE_CREATING) { - dout(10) << " parent is " << parent << dendl; - parent_found = true; - break; - } - } - } - - pg_stat_t &stats = pending_inc->pg_stat_updates[pgid]; - stats.state = PG_STATE_CREATING; - stats.created = epoch; - stats.parent = parent; - stats.parent_split_bits = split_bits; - stats.mapping_epoch = epoch; - - if (parent_found) { - pg_stat_t &ps = pg_map.pg_stat[parent]; - stats.last_fresh = ps.last_fresh; - stats.last_active = ps.last_active; - stats.last_change = ps.last_change; - stats.last_peered = ps.last_peered; - stats.last_clean = ps.last_clean; - stats.last_unstale = ps.last_unstale; - stats.last_undegraded = ps.last_undegraded; - stats.last_fullsized = ps.last_fullsized; - stats.last_scrub_stamp = ps.last_scrub_stamp; - stats.last_deep_scrub_stamp = ps.last_deep_scrub_stamp; - stats.last_clean_scrub_stamp = ps.last_clean_scrub_stamp; - } else { - utime_t now = ceph_clock_now(g_ceph_context); - stats.last_fresh = now; - stats.last_active = now; - stats.last_change = now; - stats.last_peered = now; - stats.last_clean = now; - stats.last_unstale = now; - stats.last_undegraded = now; - stats.last_fullsized = now; - stats.last_scrub_stamp = now; - stats.last_deep_scrub_stamp = now; - stats.last_clean_scrub_stamp = now; - } - - osd_map.pg_to_up_acting_osds( - pgid, - &stats.up, - &stats.up_primary, - &stats.acting, - &stats.acting_primary); - - if (split_bits == 0) { - dout(10) << " will create " << pgid - << " primary " << stats.acting_primary - << " acting " << stats.acting - << dendl; - } else { - dout(10) << " will create " << pgid - << " primary " << stats.acting_primary - << " acting " << stats.acting - << " parent " << parent - << " by " << split_bits << " bits" - << dendl; - } -} - -// This was PGMonitor::map_pg_creates -void ClusterState::_update_creating_pgs( - const OSDMap &osd_map, - PGMap::Incremental *pending_inc) -{ - assert(pending_inc != nullptr); - - dout(10) << "to " << pg_map.creating_pgs.size() - << " pgs, osdmap epoch " << osd_map.get_epoch() - << dendl; - - for (set::const_iterator p = pg_map.creating_pgs.begin(); - p != pg_map.creating_pgs.end(); - ++p) { - pg_t pgid = *p; - pg_t on = pgid; - ceph::unordered_map::const_iterator q = - pg_map.pg_stat.find(pgid); - assert(q != pg_map.pg_stat.end()); - const pg_stat_t *s = &q->second; - - if (s->parent_split_bits) - on = s->parent; - - vector up, acting; - int up_primary, acting_primary; - osd_map.pg_to_up_acting_osds( - on, - &up, - &up_primary, - &acting, - &acting_primary); - - if (up != s->up || - up_primary != s->up_primary || - acting != s->acting || - acting_primary != s->acting_primary) { - pg_stat_t *ns = &pending_inc->pg_stat_updates[pgid]; - dout(20) << pgid << " " - << " acting_primary: " << s->acting_primary - << " -> " << acting_primary - << " acting: " << s->acting << " -> " << acting - << " up_primary: " << s->up_primary << " -> " << up_primary - << " up: " << s->up << " -> " << up - << dendl; - - // only initialize if it wasn't already a pending update - if (ns->reported_epoch == 0) - *ns = *s; - - // note epoch if the target of the create message changed - if (acting_primary != ns->acting_primary) - ns->mapping_epoch = osd_map.get_epoch(); - - ns->up = up; - ns->up_primary = up_primary; - ns->acting = acting; - ns->acting_primary = acting_primary; - } - } + // TODO: Complete the separation of PG state handling so + // that a cut-down set of functionality remains in PGMonitor + // while the full-blown PGMap lives only here. } diff --git a/src/mon/PGMap.cc b/src/mon/PGMap.cc index 96e2ada531192..82fedf736d3b2 100644 --- a/src/mon/PGMap.cc +++ b/src/mon/PGMap.cc @@ -2070,3 +2070,288 @@ void PGMap::dump_object_stat_sum(TextTable &tbl, Formatter *f, } +void PGMapUpdater::check_osd_map(const OSDMap::Incremental &osd_inc, + std::set *need_check_down_pg_osds, + std::map *last_osd_report, + PGMap *pg_map, + PGMap::Incremental *pending_inc) +{ + for (const auto &p : osd_inc.new_weight) { + if (p.second == CEPH_OSD_OUT) { + dout(10) << __func__ << " osd." << p.first << " went OUT" << dendl; + pending_inc->stat_osd_out(p.first); + } + } + + // this is conservative: we want to know if any osds (maybe) got marked down. + for (const auto &p : osd_inc.new_state) { + if (p.second & CEPH_OSD_UP) { // true if marked up OR down, + // but we're too lazy to check + // which + need_check_down_pg_osds->insert(p.first); + + // clear out the last_osd_report for this OSD + map::iterator report = last_osd_report->find(p.first); + if (report != last_osd_report->end()) { + last_osd_report->erase(report); + } + + // clear out osd_stat slow request histogram + dout(20) << __func__ << " clearing osd." << p.first + << " request histogram" << dendl; + pending_inc->stat_osd_down_up(p.first, *pg_map); + } + + if (p.second & CEPH_OSD_EXISTS) { + // whether it was created *or* destroyed, we can safely drop + // it's osd_stat_t record. + dout(10) << __func__ << " osd." << p.first + << " created or destroyed" << dendl; + pending_inc->rm_stat(p.first); + + // and adjust full, nearfull set + pg_map->nearfull_osds.erase(p.first); + pg_map->full_osds.erase(p.first); + } + } +} + +void PGMapUpdater::register_pg( + const OSDMap &osd_map, + pg_t pgid, epoch_t epoch, + bool new_pool, + PGMap *pg_map, + PGMap::Incremental *pending_inc) +{ + pg_t parent; + int split_bits = 0; + bool parent_found = false; + if (!new_pool) { + parent = pgid; + while (1) { + // remove most significant bit + int msb = cbits(parent.ps()); + if (!msb) + break; + parent.set_ps(parent.ps() & ~(1<<(msb-1))); + split_bits++; + dout(30) << " is " << pgid << " parent " << parent << " ?" << dendl; + if (pg_map->pg_stat.count(parent) && + pg_map->pg_stat[parent].state != PG_STATE_CREATING) { + dout(10) << " parent is " << parent << dendl; + parent_found = true; + break; + } + } + } + + pg_stat_t &stats = pending_inc->pg_stat_updates[pgid]; + stats.state = PG_STATE_CREATING; + stats.created = epoch; + stats.parent = parent; + stats.parent_split_bits = split_bits; + stats.mapping_epoch = epoch; + + if (parent_found) { + pg_stat_t &ps = pg_map->pg_stat[parent]; + stats.last_fresh = ps.last_fresh; + stats.last_active = ps.last_active; + stats.last_change = ps.last_change; + stats.last_peered = ps.last_peered; + stats.last_clean = ps.last_clean; + stats.last_unstale = ps.last_unstale; + stats.last_undegraded = ps.last_undegraded; + stats.last_fullsized = ps.last_fullsized; + stats.last_scrub_stamp = ps.last_scrub_stamp; + stats.last_deep_scrub_stamp = ps.last_deep_scrub_stamp; + stats.last_clean_scrub_stamp = ps.last_clean_scrub_stamp; + } else { + utime_t now = ceph_clock_now(g_ceph_context); + stats.last_fresh = now; + stats.last_active = now; + stats.last_change = now; + stats.last_peered = now; + stats.last_clean = now; + stats.last_unstale = now; + stats.last_undegraded = now; + stats.last_fullsized = now; + stats.last_scrub_stamp = now; + stats.last_deep_scrub_stamp = now; + stats.last_clean_scrub_stamp = now; + } + + osd_map.pg_to_up_acting_osds( + pgid, + &stats.up, + &stats.up_primary, + &stats.acting, + &stats.acting_primary); + + if (split_bits == 0) { + dout(10) << __func__ << " will create " << pgid + << " primary " << stats.acting_primary + << " acting " << stats.acting + << dendl; + } else { + dout(10) << __func__ << " will create " << pgid + << " primary " << stats.acting_primary + << " acting " << stats.acting + << " parent " << parent + << " by " << split_bits << " bits" + << dendl; + } +} + +void PGMapUpdater::register_new_pgs( + const OSDMap &osd_map, + PGMap *pg_map, + PGMap::Incremental *pending_inc) +{ + epoch_t epoch = osd_map.get_epoch(); + dout(10) << __func__ << " checking pg pools for osdmap epoch " << epoch + << ", last_pg_scan " << pg_map->last_pg_scan << dendl; + + int created = 0; + for (const auto &p : osd_map.pools) { + int64_t poolid = p.first; + const pg_pool_t &pool = p.second; + int ruleno = osd_map.crush->find_rule(pool.get_crush_ruleset(), + pool.get_type(), pool.get_size()); + if (ruleno < 0 || !osd_map.crush->rule_exists(ruleno)) + continue; + + if (pool.get_last_change() <= pg_map->last_pg_scan || + pool.get_last_change() <= pending_inc->pg_scan) { + dout(10) << " no change in pool " << poolid << " " << pool << dendl; + continue; + } + + dout(10) << __func__ << " scanning pool " << poolid + << " " << pool << dendl; + + // first pgs in this pool + bool new_pool = pg_map->pg_pool_sum.count(poolid) == 0; + + for (ps_t ps = 0; ps < pool.get_pg_num(); ps++) { + pg_t pgid(ps, poolid, -1); + if (pg_map->pg_stat.count(pgid)) { + dout(20) << "register_new_pgs have " << pgid << dendl; + continue; + } + created++; + register_pg(osd_map, pgid, pool.get_last_change(), new_pool, + pg_map, pending_inc); + } + } + + int removed = 0; + for (const auto &p : pg_map->creating_pgs) { + if (p.preferred() >= 0) { + dout(20) << " removing creating_pg " << p + << " because it is localized and obsolete" << dendl; + pending_inc->pg_remove.insert(p); + removed++; + } + if (!osd_map.have_pg_pool(p.pool())) { + dout(20) << " removing creating_pg " << p + << " because containing pool deleted" << dendl; + pending_inc->pg_remove.insert(p); + ++removed; + } + } + + // deleted pools? + for (const auto &p : pg_map->pg_stat) { + if (!osd_map.have_pg_pool(p.first.pool())) { + dout(20) << " removing pg_stat " << p.first << " because " + << "containing pool deleted" << dendl; + pending_inc->pg_remove.insert(p.first); + ++removed; + } + if (p.first.preferred() >= 0) { + dout(20) << " removing localized pg " << p.first << dendl; + pending_inc->pg_remove.insert(p.first); + ++removed; + } + } + + // we don't want to redo this work if we can avoid it. + pending_inc->pg_scan = epoch; + + dout(10) << "register_new_pgs registered " << created << " new pgs, removed " + << removed << " uncreated pgs" << dendl; +} + + +void PGMapUpdater::update_creating_pgs( + const OSDMap &osd_map, + PGMap *pg_map, + PGMap::Incremental *pending_inc) +{ + dout(10) << __func__ << " to " << pg_map->creating_pgs.size() + << " pgs, osdmap epoch " << osd_map.get_epoch() + << dendl; + + unsigned changed = 0; + for (set::const_iterator p = pg_map->creating_pgs.begin(); + p != pg_map->creating_pgs.end(); + ++p) { + pg_t pgid = *p; + pg_t on = pgid; + ceph::unordered_map::const_iterator q = + pg_map->pg_stat.find(pgid); + assert(q != pg_map->pg_stat.end()); + const pg_stat_t *s = &q->second; + + if (s->parent_split_bits) + on = s->parent; + + vector up, acting; + int up_primary, acting_primary; + osd_map.pg_to_up_acting_osds( + on, + &up, + &up_primary, + &acting, + &acting_primary); + + if (up != s->up || + up_primary != s->up_primary || + acting != s->acting || + acting_primary != s->acting_primary) { + pg_stat_t *ns = &pending_inc->pg_stat_updates[pgid]; + if (osd_map.get_epoch() > ns->reported_epoch) { + dout(20) << __func__ << " " << pgid << " " + << " acting_primary: " << s->acting_primary + << " -> " << acting_primary + << " acting: " << s->acting << " -> " << acting + << " up_primary: " << s->up_primary << " -> " << up_primary + << " up: " << s->up << " -> " << up + << dendl; + + // only initialize if it wasn't already a pending update + if (ns->reported_epoch == 0) + *ns = *s; + + // note epoch if the target of the create message changed + if (acting_primary != ns->acting_primary) + ns->mapping_epoch = osd_map.get_epoch(); + + ns->up = up; + ns->up_primary = up_primary; + ns->acting = acting; + ns->acting_primary = acting_primary; + + ++changed; + } else { + dout(20) << __func__ << " " << pgid << " has pending update from newer" + << " epoch " << ns->reported_epoch + << dendl; + } + } + } + if (changed) { + dout(10) << __func__ << " " << changed << " pgs changed primary" << dendl; + } +} + diff --git a/src/mon/PGMap.h b/src/mon/PGMap.h index 9fc98fbcd2cd5..7787d6cbb02e3 100644 --- a/src/mon/PGMap.h +++ b/src/mon/PGMap.h @@ -26,6 +26,10 @@ #include "osd/osd_types.h" #include +// FIXME: don't like including this here to get OSDMap::Incremental, maybe +// PGMapUpdater needs its own header. +#include "osd/OSDMap.h" + namespace ceph { class Formatter; } class PGMap { @@ -382,4 +386,39 @@ inline ostream& operator<<(ostream& out, const PGMap& m) { return out; } +class PGMapUpdater +{ +public: + static void check_osd_map( + const OSDMap::Incremental &osd_inc, + std::set *need_check_down_pg_osds, + std::map *last_osd_report, + PGMap *pg_map, + PGMap::Incremental *pending_inc); + + /** + * check latest osdmap for new pgs to register + */ + static void register_new_pgs( + const OSDMap &osd_map, + PGMap *pg_map, + PGMap::Incremental *pending_inc); + + /** + * recalculate creating pg mappings + */ + static void update_creating_pgs( + const OSDMap &osd_map, + PGMap *pg_map, + PGMap::Incremental *pending_inc); + +protected: + static void register_pg( + const OSDMap &osd_map, + pg_t pgid, epoch_t epoch, + bool new_pool, + PGMap *pg_map, + PGMap::Incremental *pending_inc); +}; + #endif diff --git a/src/mon/PGMonitor.cc b/src/mon/PGMonitor.cc index ad05ef2676a4f..938c3bcc2a0d3 100644 --- a/src/mon/PGMonitor.cc +++ b/src/mon/PGMonitor.cc @@ -67,7 +67,7 @@ void PGMonitor::on_restart() void PGMonitor::on_active() { if (mon->is_leader()) { - check_osd_map(mon->osdmon()->osdmap.epoch); + check_osd_map(mon->osdmon()->osdmap.get_epoch()); need_check_down_pgs = true; } @@ -911,54 +911,16 @@ void PGMonitor::check_osd_map(epoch_t epoch) assert(bl.length()); OSDMap::Incremental inc(bl); - for (map::iterator p = inc.new_weight.begin(); - p != inc.new_weight.end(); - ++p) - if (p->second == CEPH_OSD_OUT) { - dout(10) << __func__ << " osd." << p->first << " went OUT" << dendl; - pending_inc.stat_osd_out(p->first); - } - - - // this is conservative: we want to know if any osds (maybe) got marked down. - for (map::iterator p = inc.new_state.begin(); - p != inc.new_state.end(); - ++p) { - if (p->second & CEPH_OSD_UP) { // true if marked up OR down, - // but we're too lazy to check - // which - need_check_down_pg_osds.insert(p->first); - - // clear out the last_osd_report for this OSD - map::iterator report = last_osd_report.find(p->first); - if (report != last_osd_report.end()) { - last_osd_report.erase(report); - } - - // clear out osd_stat slow request histogram - dout(20) << __func__ << " clearing osd." << p->first - << " request histogram" << dendl; - pending_inc.stat_osd_down_up(p->first, pg_map); - } - - if (p->second & CEPH_OSD_EXISTS) { - // whether it was created *or* destroyed, we can safely drop - // it's osd_stat_t record. - dout(10) << __func__ << " osd." << p->first - << " created or destroyed" << dendl; - pending_inc.rm_stat(p->first); - // and adjust full, nearfull set - pg_map.nearfull_osds.erase(p->first); - pg_map.full_osds.erase(p->first); - } - } + PGMapUpdater::check_osd_map(inc, &need_check_down_pg_osds, + &last_osd_report, &pg_map, &pending_inc); } assert(pg_map.last_osdmap_epoch < epoch); pending_inc.osdmap_epoch = epoch; - map_pg_creates(); - register_new_pgs(); + PGMapUpdater::update_creating_pgs(mon->osdmon()->osdmap, + &pg_map, &pending_inc); + PGMapUpdater::register_new_pgs(mon->osdmon()->osdmap, &pg_map, &pending_inc); if (need_check_down_pgs || !need_check_down_pg_osds.empty()) check_down_pgs(); @@ -966,244 +928,6 @@ void PGMonitor::check_osd_map(epoch_t epoch) propose_pending(); } -void PGMonitor::register_pg(OSDMap *osdmap, - pg_pool_t& pool, pg_t pgid, epoch_t epoch, - bool new_pool) -{ - pg_t parent; - int split_bits = 0; - bool parent_found = false; - if (!new_pool) { - parent = pgid; - while (1) { - // remove most significant bit - int msb = cbits(parent.ps()); - if (!msb) - break; - parent.set_ps(parent.ps() & ~(1<<(msb-1))); - split_bits++; - dout(30) << " is " << pgid << " parent " << parent << " ?" << dendl; - if (pg_map.pg_stat.count(parent) && - pg_map.pg_stat[parent].state != PG_STATE_CREATING) { - dout(10) << " parent is " << parent << dendl; - parent_found = true; - break; - } - } - } - - pg_stat_t &stats = pending_inc.pg_stat_updates[pgid]; - stats.state = PG_STATE_CREATING; - stats.created = epoch; - stats.parent = parent; - stats.parent_split_bits = split_bits; - stats.mapping_epoch = epoch; - - if (parent_found) { - pg_stat_t &ps = pg_map.pg_stat[parent]; - stats.last_fresh = ps.last_fresh; - stats.last_active = ps.last_active; - stats.last_change = ps.last_change; - stats.last_peered = ps.last_peered; - stats.last_clean = ps.last_clean; - stats.last_unstale = ps.last_unstale; - stats.last_undegraded = ps.last_undegraded; - stats.last_fullsized = ps.last_fullsized; - stats.last_scrub_stamp = ps.last_scrub_stamp; - stats.last_deep_scrub_stamp = ps.last_deep_scrub_stamp; - stats.last_clean_scrub_stamp = ps.last_clean_scrub_stamp; - } else { - utime_t now = ceph_clock_now(g_ceph_context); - stats.last_fresh = now; - stats.last_active = now; - stats.last_change = now; - stats.last_peered = now; - stats.last_clean = now; - stats.last_unstale = now; - stats.last_undegraded = now; - stats.last_fullsized = now; - stats.last_scrub_stamp = now; - stats.last_deep_scrub_stamp = now; - stats.last_clean_scrub_stamp = now; - } - - osdmap->pg_to_up_acting_osds( - pgid, - &stats.up, - &stats.up_primary, - &stats.acting, - &stats.acting_primary); - - if (split_bits == 0) { - dout(10) << __func__ << " will create " << pgid - << " primary " << stats.acting_primary - << " acting " << stats.acting - << dendl; - } else { - dout(10) << __func__ << " will create " << pgid - << " primary " << stats.acting_primary - << " acting " << stats.acting - << " parent " << parent - << " by " << split_bits << " bits" - << dendl; - } -} - -void PGMonitor::register_new_pgs() -{ - // iterate over crush mapspace - OSDMap *osdmap = &mon->osdmon()->osdmap; - epoch_t epoch = osdmap->get_epoch(); - dout(10) << __func__ << " checking pg pools for osdmap epoch " << epoch - << ", last_pg_scan " << pg_map.last_pg_scan << dendl; - - int created = 0; - for (map::iterator p = osdmap->pools.begin(); - p != osdmap->pools.end(); - ++p) { - int64_t poolid = p->first; - pg_pool_t &pool = p->second; - int ruleno = osdmap->crush->find_rule(pool.get_crush_ruleset(), - pool.get_type(), pool.get_size()); - if (ruleno < 0 || !osdmap->crush->rule_exists(ruleno)) - continue; - - if (pool.get_last_change() <= pg_map.last_pg_scan || - pool.get_last_change() <= pending_inc.pg_scan) { - dout(10) << " no change in pool " << p->first << " " << pool << dendl; - continue; - } - - dout(10) << __func__ << " scanning pool " << p->first - << " " << pool << dendl; - - // first pgs in this pool - bool new_pool = pg_map.pg_pool_sum.count(poolid) == 0; - - for (ps_t ps = 0; ps < pool.get_pg_num(); ps++) { - pg_t pgid(ps, poolid, -1); - if (pg_map.pg_stat.count(pgid)) { - dout(20) << "register_new_pgs have " << pgid << dendl; - continue; - } - created++; - register_pg(osdmap, pool, pgid, pool.get_last_change(), new_pool); - } - } - - int removed = 0; - for (set::iterator p = pg_map.creating_pgs.begin(); - p != pg_map.creating_pgs.end(); - ++p) { - if (p->preferred() >= 0) { - dout(20) << " removing creating_pg " << *p - << " because it is localized and obsolete" << dendl; - pending_inc.pg_remove.insert(*p); - removed++; - } - if (!osdmap->have_pg_pool(p->pool())) { - dout(20) << " removing creating_pg " << *p - << " because containing pool deleted" << dendl; - pending_inc.pg_remove.insert(*p); - ++removed; - } - } - - // deleted pools? - for (ceph::unordered_map::const_iterator p = - pg_map.pg_stat.begin(); - p != pg_map.pg_stat.end(); ++p) { - if (!osdmap->have_pg_pool(p->first.pool())) { - dout(20) << " removing pg_stat " << p->first << " because " - << "containing pool deleted" << dendl; - pending_inc.pg_remove.insert(p->first); - ++removed; - } - if (p->first.preferred() >= 0) { - dout(20) << " removing localized pg " << p->first << dendl; - pending_inc.pg_remove.insert(p->first); - ++removed; - } - } - - // we don't want to redo this work if we can avoid it. - pending_inc.pg_scan = epoch; - - dout(10) << "register_new_pgs registered " << created << " new pgs, removed " - << removed << " uncreated pgs" << dendl; -} - -void PGMonitor::map_pg_creates() -{ - OSDMap *osdmap = &mon->osdmon()->osdmap; - - dout(10) << __func__ << " to " << pg_map.creating_pgs.size() - << " pgs, osdmap epoch " << osdmap->get_epoch() - << dendl; - - unsigned changed = 0; - for (set::const_iterator p = pg_map.creating_pgs.begin(); - p != pg_map.creating_pgs.end(); - ++p) { - pg_t pgid = *p; - pg_t on = pgid; - ceph::unordered_map::const_iterator q = - pg_map.pg_stat.find(pgid); - assert(q != pg_map.pg_stat.end()); - const pg_stat_t *s = &q->second; - - if (s->parent_split_bits) - on = s->parent; - - vector up, acting; - int up_primary, acting_primary; - osdmap->pg_to_up_acting_osds( - on, - &up, - &up_primary, - &acting, - &acting_primary); - - if (up != s->up || - up_primary != s->up_primary || - acting != s->acting || - acting_primary != s->acting_primary) { - pg_stat_t *ns = &pending_inc.pg_stat_updates[pgid]; - if (osdmap->get_epoch() > ns->reported_epoch) { - dout(20) << __func__ << " " << pgid << " " - << " acting_primary: " << s->acting_primary - << " -> " << acting_primary - << " acting: " << s->acting << " -> " << acting - << " up_primary: " << s->up_primary << " -> " << up_primary - << " up: " << s->up << " -> " << up - << dendl; - - // only initialize if it wasn't already a pending update - if (ns->reported_epoch == 0) - *ns = *s; - - // note epoch if the target of the create message changed - if (acting_primary != ns->acting_primary) - ns->mapping_epoch = osdmap->get_epoch(); - - ns->up = up; - ns->up_primary = up_primary; - ns->acting = acting; - ns->acting_primary = acting_primary; - - ++changed; - } else { - dout(20) << __func__ << " " << pgid << " has pending update from newer" - << " epoch " << ns->reported_epoch - << dendl; - } - } - } - if (changed) { - dout(10) << __func__ << " " << changed << " pgs changed primary" << dendl; - } -} - void PGMonitor::send_pg_creates() { // We only need to do this old, spammy way of broadcasting create messages diff --git a/src/mon/PGMonitor.h b/src/mon/PGMonitor.h index 9c92d231bb07d..163551d2975c9 100644 --- a/src/mon/PGMonitor.h +++ b/src/mon/PGMonitor.h @@ -91,19 +91,8 @@ private: // when we last received PG stats from each osd map last_osd_report; - void register_pg(OSDMap *osdmap, pg_pool_t& pool, pg_t pgid, - epoch_t epoch, bool new_pool); - - /** - * check latest osdmap for new pgs to register - */ void register_new_pgs(); - /** - * recalculate creating pg mappings - */ - void map_pg_creates(); - void send_pg_creates(); epoch_t send_pg_creates(int osd, Connection *con, epoch_t next); diff --git a/src/osd/OSDMap.h b/src/osd/OSDMap.h index dfeec1bee0f70..ce5a0384f946a 100644 --- a/src/osd/OSDMap.h +++ b/src/osd/OSDMap.h @@ -256,8 +256,7 @@ private: friend class OSDMonitor; // FIXME: the elements required for PGMap updates should be exposed properly - friend class PGMonitor; - friend class ClusterState; + friend class PGMapUpdater; public: OSDMap() : epoch(0), -- 2.39.5