]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mon: be more careful about when we prime all pgs
authorSage Weil <sage@redhat.com>
Wed, 21 Jan 2015 03:08:19 +0000 (19:08 -0800)
committerSage Weil <sage@redhat.com>
Wed, 21 Jan 2015 03:20:24 +0000 (19:20 -0800)
If we mark down or weight down, pgs will go *away* from an osd and we can
focus just on the ones that are there now.

If we mark up or weight up, we don't know where pgs will come from, and
need to scan everything.

Signed-off-by: Sage Weil <sage@redhat.com>
src/mon/OSDMonitor.cc
src/mon/OSDMonitor.h

index 429047628e9eac67cd3aa96f45f4befed342f34c..8a1c9244f9b38480e2495c307d5ff12a275b735b 100644 (file)
@@ -622,44 +622,84 @@ void OSDMonitor::create_pending()
 
 void OSDMonitor::maybe_prime_pg_temp()
 {
+  bool all = false;
   if (pending_inc.crush.length()) {
-    dout(10) << __func__ << " new crush map" << dendl;
-    OSDMap next;
-    next.deepish_copy_from(osdmap);
-    next.apply_incremental(pending_inc);
-    prime_pg_temp(next, &mon->pgmon()->pg_map);
-    return;
+    dout(10) << __func__ << " new crush map, all" << dendl;
+    all = true;
+  }
+
+  if (!pending_inc.new_up_client.empty()) {
+    dout(10) << __func__ << " new up osds, all" << dendl;
+    all = true;
   }
 
   // check for interesting OSDs
   set<int> osds;
   for (map<int32_t,uint8_t>::iterator p = pending_inc.new_state.begin();
-       p != pending_inc.new_state.end();
+       !all && p != pending_inc.new_state.end();
        ++p) {
-    if (p->second & CEPH_OSD_UP) {
+    if ((p->second & CEPH_OSD_UP) &&
+       osdmap.is_up(p->first)) {
       osds.insert(p->first);
     }
   }
   for (map<int32_t,uint32_t>::iterator p = pending_inc.new_weight.begin();
-       p != pending_inc.new_weight.end();
+       !all && p != pending_inc.new_weight.end();
        ++p) {
-    osds.insert(p->first);
+    if (p->second < osdmap.get_weight(p->first)) {
+      // weight reduction
+      osds.insert(p->first);
+    } else {
+      dout(10) << __func__ << " osd." << p->first << " weight increase, all"
+              << dendl;
+      all = true;
+    }
   }
-  if (!osds.empty()) {
+
+  if (!all && osds.empty())
+    return;
+
+  OSDMap next;
+  next.deepish_copy_from(osdmap);
+  next.apply_incremental(pending_inc);
+
+  PGMap *pg_map = &mon->pgmon()->pg_map;
+
+  utime_t stop = ceph_clock_now(NULL);
+  stop += g_conf->mon_osd_prime_pg_temp_max_time;
+  int chunk = 1000;
+  int n = chunk;
+
+  if (all) {
+    for (ceph::unordered_map<pg_t, pg_stat_t>::iterator pp =
+          pg_map->pg_stat.begin();
+        pp != pg_map->pg_stat.end();
+        ++pp) {
+      prime_pg_temp(next, pp);
+      if (--n <= 0) {
+       n = chunk;
+       if (ceph_clock_now(NULL) > stop) {
+         dout(10) << __func__ << " consumed more than "
+                  << g_conf->mon_osd_prime_pg_temp_max_time
+                  << " seconds, stopping"
+                  << dendl;
+         break;
+       }
+      }
+    }
+  } else {
     dout(10) << __func__ << " " << osds.size() << " interesting osds" << dendl;
-    OSDMap next;
-    next.deepish_copy_from(osdmap);
-    next.apply_incremental(pending_inc);
-    utime_t stop = ceph_clock_now(NULL);
-    stop += g_conf->mon_osd_prime_pg_temp_max_time;
     for (set<int>::iterator p = osds.begin(); p != osds.end(); ++p) {
-      prime_pg_temp(next, &mon->pgmon()->pg_map, *p);
-      if (ceph_clock_now(NULL) > stop) {
-       dout(10) << __func__ << " consumed more than "
-                << g_conf->mon_osd_prime_pg_temp_max_time
-                << " seconds, stopping"
-                << dendl;
-       break;
+      n -= prime_pg_temp(next, pg_map, *p);
+      if (--n <= 0) {
+       n = chunk;
+       if (ceph_clock_now(NULL) > stop) {
+         dout(10) << __func__ << " consumed more than "
+                  << g_conf->mon_osd_prime_pg_temp_max_time
+                  << " seconds, stopping"
+                  << dendl;
+         break;
+       }
       }
     }
   }
@@ -689,43 +729,22 @@ void OSDMonitor::prime_pg_temp(OSDMap& next,
   pending_inc.new_pg_temp[pp->first] = cur_acting;
 }
 
-void OSDMonitor::prime_pg_temp(OSDMap& next, PGMap *pg_map, int osd)
+int OSDMonitor::prime_pg_temp(OSDMap& next, PGMap *pg_map, int osd)
 {
   dout(10) << __func__ << " osd." << osd << dendl;
+  int num = 0;
   ceph::unordered_map<int, set<pg_t> >::iterator po = pg_map->pg_by_osd.find(osd);
   if (po != pg_map->pg_by_osd.end()) {
     for (set<pg_t>::iterator p = po->second.begin();
         p != po->second.end();
-        ++p) {
+        ++p, ++num) {
       ceph::unordered_map<pg_t, pg_stat_t>::iterator pp = pg_map->pg_stat.find(*p);
       if (pp == pg_map->pg_stat.end())
        continue;
       prime_pg_temp(next, pp);
     }
   }
-}
-
-void OSDMonitor::prime_pg_temp(OSDMap& next, PGMap *pg_map)
-{
-  dout(10) << __func__ << dendl;
-  utime_t stop = ceph_clock_now(NULL);
-  stop += g_conf->mon_osd_prime_pg_temp_max_time;
-  int n = 0;
-  for (ceph::unordered_map<pg_t, pg_stat_t>::iterator pp = pg_map->pg_stat.begin();
-       pp != pg_map->pg_stat.end();
-       ++pp) {
-    prime_pg_temp(next, pp);
-    if (++n == 1000) {
-      n = 0;
-      if (ceph_clock_now(NULL) > stop) {
-       dout(10) << __func__ << " consumed more than "
-                << g_conf->mon_osd_prime_pg_temp_max_time
-                << " seconds, stopping"
-                << dendl;
-       break;
-      }
-    }
-  }
+  return num;
 }
 
 
index ea6926b7df697aa90c73b42072d0594fa376c242..484eb42d220223308a2be05bb83c7ff585348de5 100644 (file)
@@ -203,8 +203,7 @@ private:
   void maybe_prime_pg_temp();
   void prime_pg_temp(OSDMap& next,
                     ceph::unordered_map<pg_t, pg_stat_t>::iterator pp);
-  void prime_pg_temp(OSDMap& next, PGMap *pg_map, int osd);
-  void prime_pg_temp(OSDMap& next, PGMap *pg_map);
+  int prime_pg_temp(OSDMap& next, PGMap *pg_map, int osd);
 
   void update_logger();