From: Sage Weil Date: Wed, 21 Jan 2015 03:08:19 +0000 (-0800) Subject: mon: be more careful about when we prime all pgs X-Git-Tag: v9.0.1~34^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=4fcbd1464da97b098f2e27100f3288668324728d;p=ceph.git mon: be more careful about when we prime all pgs If we mark down or weight down, pgs will go *away* from an osd and we can focus just on the ones that are there now. If we mark up or weight up, we don't know where pgs will come from, and need to scan everything. Signed-off-by: Sage Weil --- diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 429047628e9..8a1c9244f9b 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -622,44 +622,84 @@ void OSDMonitor::create_pending() void OSDMonitor::maybe_prime_pg_temp() { + bool all = false; if (pending_inc.crush.length()) { - dout(10) << __func__ << " new crush map" << dendl; - OSDMap next; - next.deepish_copy_from(osdmap); - next.apply_incremental(pending_inc); - prime_pg_temp(next, &mon->pgmon()->pg_map); - return; + dout(10) << __func__ << " new crush map, all" << dendl; + all = true; + } + + if (!pending_inc.new_up_client.empty()) { + dout(10) << __func__ << " new up osds, all" << dendl; + all = true; } // check for interesting OSDs set osds; for (map::iterator p = pending_inc.new_state.begin(); - p != pending_inc.new_state.end(); + !all && p != pending_inc.new_state.end(); ++p) { - if (p->second & CEPH_OSD_UP) { + if ((p->second & CEPH_OSD_UP) && + osdmap.is_up(p->first)) { osds.insert(p->first); } } for (map::iterator p = pending_inc.new_weight.begin(); - p != pending_inc.new_weight.end(); + !all && p != pending_inc.new_weight.end(); ++p) { - osds.insert(p->first); + if (p->second < osdmap.get_weight(p->first)) { + // weight reduction + osds.insert(p->first); + } else { + dout(10) << __func__ << " osd." << p->first << " weight increase, all" + << dendl; + all = true; + } } - if (!osds.empty()) { + + if (!all && osds.empty()) + return; + + OSDMap next; + next.deepish_copy_from(osdmap); + next.apply_incremental(pending_inc); + + PGMap *pg_map = &mon->pgmon()->pg_map; + + utime_t stop = ceph_clock_now(NULL); + stop += g_conf->mon_osd_prime_pg_temp_max_time; + int chunk = 1000; + int n = chunk; + + if (all) { + for (ceph::unordered_map::iterator pp = + pg_map->pg_stat.begin(); + pp != pg_map->pg_stat.end(); + ++pp) { + prime_pg_temp(next, pp); + if (--n <= 0) { + n = chunk; + if (ceph_clock_now(NULL) > stop) { + dout(10) << __func__ << " consumed more than " + << g_conf->mon_osd_prime_pg_temp_max_time + << " seconds, stopping" + << dendl; + break; + } + } + } + } else { dout(10) << __func__ << " " << osds.size() << " interesting osds" << dendl; - OSDMap next; - next.deepish_copy_from(osdmap); - next.apply_incremental(pending_inc); - utime_t stop = ceph_clock_now(NULL); - stop += g_conf->mon_osd_prime_pg_temp_max_time; for (set::iterator p = osds.begin(); p != osds.end(); ++p) { - prime_pg_temp(next, &mon->pgmon()->pg_map, *p); - if (ceph_clock_now(NULL) > stop) { - dout(10) << __func__ << " consumed more than " - << g_conf->mon_osd_prime_pg_temp_max_time - << " seconds, stopping" - << dendl; - break; + n -= prime_pg_temp(next, pg_map, *p); + if (--n <= 0) { + n = chunk; + if (ceph_clock_now(NULL) > stop) { + dout(10) << __func__ << " consumed more than " + << g_conf->mon_osd_prime_pg_temp_max_time + << " seconds, stopping" + << dendl; + break; + } } } } @@ -689,43 +729,22 @@ void OSDMonitor::prime_pg_temp(OSDMap& next, pending_inc.new_pg_temp[pp->first] = cur_acting; } -void OSDMonitor::prime_pg_temp(OSDMap& next, PGMap *pg_map, int osd) +int OSDMonitor::prime_pg_temp(OSDMap& next, PGMap *pg_map, int osd) { dout(10) << __func__ << " osd." << osd << dendl; + int num = 0; ceph::unordered_map >::iterator po = pg_map->pg_by_osd.find(osd); if (po != pg_map->pg_by_osd.end()) { for (set::iterator p = po->second.begin(); p != po->second.end(); - ++p) { + ++p, ++num) { ceph::unordered_map::iterator pp = pg_map->pg_stat.find(*p); if (pp == pg_map->pg_stat.end()) continue; prime_pg_temp(next, pp); } } -} - -void OSDMonitor::prime_pg_temp(OSDMap& next, PGMap *pg_map) -{ - dout(10) << __func__ << dendl; - utime_t stop = ceph_clock_now(NULL); - stop += g_conf->mon_osd_prime_pg_temp_max_time; - int n = 0; - for (ceph::unordered_map::iterator pp = pg_map->pg_stat.begin(); - pp != pg_map->pg_stat.end(); - ++pp) { - prime_pg_temp(next, pp); - if (++n == 1000) { - n = 0; - if (ceph_clock_now(NULL) > stop) { - dout(10) << __func__ << " consumed more than " - << g_conf->mon_osd_prime_pg_temp_max_time - << " seconds, stopping" - << dendl; - break; - } - } - } + return num; } diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h index ea6926b7df6..484eb42d220 100644 --- a/src/mon/OSDMonitor.h +++ b/src/mon/OSDMonitor.h @@ -203,8 +203,7 @@ private: void maybe_prime_pg_temp(); void prime_pg_temp(OSDMap& next, ceph::unordered_map::iterator pp); - void prime_pg_temp(OSDMap& next, PGMap *pg_map, int osd); - void prime_pg_temp(OSDMap& next, PGMap *pg_map); + int prime_pg_temp(OSDMap& next, PGMap *pg_map, int osd); void update_logger();