]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mon: track pg creations by osd
authorSage Weil <sage@inktank.com>
Wed, 18 Jul 2012 21:54:11 +0000 (14:54 -0700)
committerSamuel Just <sam.just@inktank.com>
Fri, 20 Jul 2012 00:13:09 +0000 (17:13 -0700)
Track the pending pg creations by osd, and use a helper to send out that
messages.

Signed-off-by: Sage Weil <sage@inktank.com>
src/mon/PGMap.cc
src/mon/PGMap.h
src/mon/PGMonitor.cc
src/mon/PGMonitor.h

index 3ca710186dd828349310140b290a30d94f0a4911..accc1b73a200f83664b22f2e84b3c8ae3d76a72a 100644 (file)
@@ -288,8 +288,11 @@ void PGMap::stat_pg_add(const pg_t &pgid, const pg_stat_t &s)
   num_pg_by_state[s.state]++;
   pg_pool_sum[pgid.pool()].add(s);
   pg_sum.add(s);
-  if (s.state & PG_STATE_CREATING)
+  if (s.state & PG_STATE_CREATING) {
     creating_pgs.insert(pgid);
+    if (s.acting.size())
+      creating_pgs_by_osd[s.acting[0]].insert(pgid);
+  }
 }
 
 void PGMap::stat_pg_sub(const pg_t &pgid, const pg_stat_t &s)
@@ -304,8 +307,11 @@ void PGMap::stat_pg_sub(const pg_t &pgid, const pg_stat_t &s)
     pg_pool_sum.erase(pgid.pool());
 
   pg_sum.sub(s);
-  if (s.state & PG_STATE_CREATING)
+  if (s.state & PG_STATE_CREATING) {
     creating_pgs.erase(pgid);
+    if (s.acting.size())
+      creating_pgs_by_osd[s.acting[0]].erase(pgid);
+  }
 }
 
 void PGMap::stat_osd_add(const osd_stat_t &s)
index affe4ff0b1c820ef8615fe4628942e0c09a50239..5748101125bc5ad88360ba474fcbf275f3df042d 100644 (file)
@@ -71,6 +71,7 @@ public:
   osd_stat_t osd_sum;
 
   set<pg_t> creating_pgs;   // lru: front = new additions, back = recently pinged
+  map<int,set<pg_t> > creating_pgs_by_osd;
 
   enum StuckPG {
     STUCK_INACTIVE,
index aa866bd735d81db9a1fec92be986e96cf030291a..d4c1a9cf5dbc9c0cccb3acb4902f526513f6088f 100644 (file)
@@ -759,7 +759,6 @@ void PGMonitor::send_pg_creates()
 {
   dout(10) << "send_pg_creates to " << pg_map.creating_pgs.size() << " pgs" << dendl;
 
-  map<int, MOSDPGCreate*> msg;
   utime_t now = ceph_clock_now(g_ceph_context);
   
   for (set<pg_t>::iterator p = pg_map.creating_pgs.begin();
@@ -767,42 +766,62 @@ void PGMonitor::send_pg_creates()
        p++) {
     pg_t pgid = *p;
     pg_t on = pgid;
-    if (pg_map.pg_stat[pgid].parent_split_bits)
-      on = pg_map.pg_stat[pgid].parent;
+    pg_stat_t& s = pg_map.pg_stat[pgid];
+    if (s.parent_split_bits)
+      on = s.parent;
     vector<int> acting;
     int nrep = mon->osdmon()->osdmap.pg_to_acting_osds(on, acting);
-    if (!nrep) {
+
+    if (s.acting.size())
+      pg_map.creating_pgs_by_osd[acting[0]].erase(pgid);
+    s.acting = acting;
+
+    // don't send creates for localized pgs
+    if (pgid.preferred() >= 0)
+      continue;
+
+    if (nrep) {
+      pg_map.creating_pgs_by_osd[acting[0]].insert(pgid);
+    } else {
       dout(20) << "send_pg_creates  " << pgid << " -> no osds in epoch "
               << mon->osdmon()->osdmap.get_epoch() << ", skipping" << dendl;
       continue;  // blarney!
     }
-    int osd = acting[0];
+  }
 
-    // don't send creates for localized pgs
-    if (pgid.preferred() >= 0)
-      continue;
+  for (map<int, set<pg_t> >::iterator p = pg_map.creating_pgs_by_osd.begin();
+       p != pg_map.creating_pgs_by_osd.end();
+       ++p) {
+    int osd = p->first;
 
     // throttle?
     if (last_sent_pg_create.count(osd) &&
        now - g_conf->mon_pg_create_interval < last_sent_pg_create[osd]) 
       continue;
       
-    dout(20) << "send_pg_creates  " << pgid << " -> osd." << osd 
-            << " in epoch " << pg_map.pg_stat[pgid].created << dendl;
-    if (msg.count(osd) == 0)
-      msg[osd] = new MOSDPGCreate(mon->osdmon()->osdmap.get_epoch());
-    msg[osd]->mkpg[pgid] = pg_create_t(pg_map.pg_stat[pgid].created,
-                                      pg_map.pg_stat[pgid].parent,
-                                      pg_map.pg_stat[pgid].parent_split_bits);
+    send_pg_creates(osd, NULL);
   }
+}
 
-  for (map<int, MOSDPGCreate*>::iterator p = msg.begin();
-       p != msg.end();
-       p++) {
-    dout(10) << "sending pg_create to osd." << p->first << dendl;
-    mon->messenger->send_message(p->second, mon->osdmon()->osdmap.get_inst(p->first));
-    last_sent_pg_create[p->first] = ceph_clock_now(g_ceph_context);
+void PGMonitor::send_pg_creates(int osd, Connection *con)
+{
+  map<int, set<pg_t> >::iterator p = pg_map.creating_pgs_by_osd.find(osd);
+  if (p == pg_map.creating_pgs_by_osd.end())
+    return;
+
+  dout(20) << "send_pg_creates osd." << osd << " pgs " << p->second << dendl;
+  MOSDPGCreate *m = new MOSDPGCreate(mon->osdmon()->osdmap.get_epoch());
+  for (set<pg_t>::iterator q = p->second.begin(); q != p->second.end(); ++q) {
+    m->mkpg[*q] = pg_create_t(pg_map.pg_stat[*q].created,
+                             pg_map.pg_stat[*q].parent,
+                             pg_map.pg_stat[*q].parent_split_bits);
   }
+
+  if (con)
+    mon->messenger->send_message(m, con);
+  else
+    mon->messenger->send_message(m, mon->osdmon()->osdmap.get_inst(osd));
+  last_sent_pg_create[osd] = ceph_clock_now(g_ceph_context);
 }
 
 bool PGMonitor::check_down_pgs()
index 21edb185e72c14189df1837d14abba4c90490480..563e6e9d9f8e987ff72a71692e2d5708762ec479 100644 (file)
@@ -96,6 +96,7 @@ private:
   bool register_new_pgs();
 
   void send_pg_creates();
+  void send_pg_creates(int osd, Connection *con);
 
   /**
    * check pgs for down primary osds