From: Sage Weil Date: Mon, 30 Jan 2017 20:24:47 +0000 (-0500) Subject: mon/OSDMonitor: prime_pg_temp based on OSDMapMapping X-Git-Tag: v12.0.1~343^2~14 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=cc12b5fc0a765fc09a693f533110ddbfced12454;p=ceph.git mon/OSDMonitor: prime_pg_temp based on OSDMapMapping Simplify the code and remove most of the dependency on PGMap. We still need PGMap for the creating_pgs to ignore; that can be dropped later. Signed-off-by: Sage Weil --- diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index a83f566cd76f..2f748e7ab1c0 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -986,99 +986,97 @@ void OSDMonitor::maybe_prime_pg_temp() next.deepish_copy_from(osdmap); next.apply_incremental(pending_inc); - PGMap *pg_map = &mon->pgmon()->pg_map; + PGMap *pg_map = &mon->pgmon()->pg_map; // FIMXE: use new creating_pgs map utime_t stop = ceph_clock_now(); stop += g_conf->mon_osd_prime_pg_temp_max_time; int chunk = 1000; int n = chunk; + bool abort = false; if (all) { - for (ceph::unordered_map::iterator pp = - pg_map->pg_stat.begin(); - pp != pg_map->pg_stat.end(); - ++pp) { - prime_pg_temp(next, pp); - if (--n <= 0) { - n = chunk; - if (ceph_clock_now() > stop) { - dout(10) << __func__ << " consumed more than " - << g_conf->mon_osd_prime_pg_temp_max_time - << " seconds, stopping" - << dendl; - break; + for (auto& pi : osdmap.get_pools()) { + for (unsigned ps = 0; ps < pi.second.get_pg_num(); ++ps) { + pg_t pgid(ps, pi.first); + if (!pg_map->creating_pgs.count(pgid)) { + prime_pg_temp(next, pgid); } + if (--n <= 0) { + n = chunk; + if (ceph_clock_now() > stop) { + dout(10) << __func__ << " consumed more than " + << g_conf->mon_osd_prime_pg_temp_max_time + << " seconds, stopping" + << dendl; + abort = true; + break; + } + } + } + if (abort) { + break; } } } else { dout(10) << __func__ << " " << osds.size() << " interesting osds" << dendl; - for (set::iterator p = osds.begin(); p != osds.end(); ++p) { - n -= prime_pg_temp(next, pg_map, *p); - if (n <= 0) { - n = chunk; - if (ceph_clock_now() > stop) { - dout(10) << __func__ << " consumed more than " - << g_conf->mon_osd_prime_pg_temp_max_time - << " seconds, stopping" - << dendl; - break; + for (auto osd : osds) { + const vector& pgs = mapping.get_osd_acting_pgs(osd); + dout(20) << __func__ << " osd." << osd << " " << pgs << dendl; + for (auto pgid : pgs) { + if (!pg_map->creating_pgs.count(pgid)) { + prime_pg_temp(next, pgid); + } + if (--n <= 0) { + n = chunk; + if (ceph_clock_now() > stop) { + dout(10) << __func__ << " consumed more than " + << g_conf->mon_osd_prime_pg_temp_max_time + << " seconds, stopping" + << dendl; + abort = true; + break; + } } } + if (abort) { + break; + } } } } -void OSDMonitor::prime_pg_temp(OSDMap& next, - ceph::unordered_map::iterator pp) +void OSDMonitor::prime_pg_temp( + OSDMap& next, + pg_t pgid) { - // do not prime creating pgs - if (pp->second.state & PG_STATE_CREATING) - return; // do not touch a mapping if a change is pending - if (pending_inc.new_pg_temp.count(pp->first)) + if (pending_inc.new_pg_temp.count(pgid)) return; + vector old_acting; + mapping.get(pgid, nullptr, nullptr, &old_acting, nullptr); vector up, acting; int up_primary, acting_primary; - next.pg_to_up_acting_osds(pp->first, &up, &up_primary, &acting, &acting_primary); - if (acting == pp->second.acting) + next.pg_to_up_acting_osds(pgid, &up, &up_primary, &acting, &acting_primary); + if (acting == old_acting) return; // no change since last pg update, skip vector cur_up, cur_acting; - osdmap.pg_to_up_acting_osds(pp->first, &cur_up, &up_primary, + osdmap.pg_to_up_acting_osds(pgid, &cur_up, &up_primary, &cur_acting, &acting_primary); if (cur_acting == acting) return; // no change this epoch; must be stale pg_stat if (cur_acting.empty()) return; // if previously empty now we can be no worse off - const pg_pool_t *pool = next.get_pg_pool(pp->first.pool()); + const pg_pool_t *pool = next.get_pg_pool(pgid.pool()); if (pool && cur_acting.size() < pool->min_size) return; // can be no worse off than before - dout(20) << __func__ << " " << pp->first << " " << cur_up << "/" << cur_acting + dout(20) << __func__ << " " << pgid << " " << cur_up << "/" << cur_acting << " -> " << up << "/" << acting << ", priming " << cur_acting << dendl; - pending_inc.new_pg_temp[pp->first] = cur_acting; -} - -int OSDMonitor::prime_pg_temp(OSDMap& next, PGMap *pg_map, int osd) -{ - dout(10) << __func__ << " osd." << osd << dendl; - int num = 0; - ceph::unordered_map >::iterator po = pg_map->pg_by_osd.find(osd); - if (po != pg_map->pg_by_osd.end()) { - for (set::iterator p = po->second.begin(); - p != po->second.end(); - ++p, ++num) { - ceph::unordered_map::iterator pp = pg_map->pg_stat.find(*p); - if (pp == pg_map->pg_stat.end()) - continue; - prime_pg_temp(next, pp); - } - } - return num; + pending_inc.new_pg_temp[pgid] = cur_acting; } - /** * @note receiving a transaction in this function gives a fair amount of * freedom to the service implementation if it does need it. It shouldn't. diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h index ece88880e6a9..1e64e05978ab 100644 --- a/src/mon/OSDMonitor.h +++ b/src/mon/OSDMonitor.h @@ -192,9 +192,7 @@ private: void share_map_with_random_osd(); void maybe_prime_pg_temp(); - void prime_pg_temp(OSDMap& next, - ceph::unordered_map::iterator pp); - int prime_pg_temp(OSDMap& next, PGMap *pg_map, int osd); + void prime_pg_temp(OSDMap& next, pg_t pgid); void update_logger();