From 7f58b9beee10a829c3ebacdb51d48224789d3e14 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 18 Jul 2012 14:54:11 -0700 Subject: [PATCH] mon: track pg creations by osd Track the pending pg creations by osd, and use a helper to send out that messages. Signed-off-by: Sage Weil --- src/mon/PGMap.cc | 10 ++++++-- src/mon/PGMap.h | 1 + src/mon/PGMonitor.cc | 61 +++++++++++++++++++++++++++++--------------- src/mon/PGMonitor.h | 1 + 4 files changed, 50 insertions(+), 23 deletions(-) diff --git a/src/mon/PGMap.cc b/src/mon/PGMap.cc index 3ca710186dd82..accc1b73a200f 100644 --- a/src/mon/PGMap.cc +++ b/src/mon/PGMap.cc @@ -288,8 +288,11 @@ void PGMap::stat_pg_add(const pg_t &pgid, const pg_stat_t &s) num_pg_by_state[s.state]++; pg_pool_sum[pgid.pool()].add(s); pg_sum.add(s); - if (s.state & PG_STATE_CREATING) + if (s.state & PG_STATE_CREATING) { creating_pgs.insert(pgid); + if (s.acting.size()) + creating_pgs_by_osd[s.acting[0]].insert(pgid); + } } void PGMap::stat_pg_sub(const pg_t &pgid, const pg_stat_t &s) @@ -304,8 +307,11 @@ void PGMap::stat_pg_sub(const pg_t &pgid, const pg_stat_t &s) pg_pool_sum.erase(pgid.pool()); pg_sum.sub(s); - if (s.state & PG_STATE_CREATING) + if (s.state & PG_STATE_CREATING) { creating_pgs.erase(pgid); + if (s.acting.size()) + creating_pgs_by_osd[s.acting[0]].erase(pgid); + } } void PGMap::stat_osd_add(const osd_stat_t &s) diff --git a/src/mon/PGMap.h b/src/mon/PGMap.h index affe4ff0b1c82..5748101125bc5 100644 --- a/src/mon/PGMap.h +++ b/src/mon/PGMap.h @@ -71,6 +71,7 @@ public: osd_stat_t osd_sum; set creating_pgs; // lru: front = new additions, back = recently pinged + map > creating_pgs_by_osd; enum StuckPG { STUCK_INACTIVE, diff --git a/src/mon/PGMonitor.cc b/src/mon/PGMonitor.cc index aa866bd735d81..d4c1a9cf5dbc9 100644 --- a/src/mon/PGMonitor.cc +++ b/src/mon/PGMonitor.cc @@ -759,7 +759,6 @@ void PGMonitor::send_pg_creates() { dout(10) << "send_pg_creates to " << pg_map.creating_pgs.size() << " pgs" << dendl; - map msg; utime_t now = ceph_clock_now(g_ceph_context); for (set::iterator p = pg_map.creating_pgs.begin(); @@ -767,42 +766,62 @@ void PGMonitor::send_pg_creates() p++) { pg_t pgid = *p; pg_t on = pgid; - if (pg_map.pg_stat[pgid].parent_split_bits) - on = pg_map.pg_stat[pgid].parent; + pg_stat_t& s = pg_map.pg_stat[pgid]; + if (s.parent_split_bits) + on = s.parent; vector acting; int nrep = mon->osdmon()->osdmap.pg_to_acting_osds(on, acting); - if (!nrep) { + + if (s.acting.size()) + pg_map.creating_pgs_by_osd[acting[0]].erase(pgid); + s.acting = acting; + + // don't send creates for localized pgs + if (pgid.preferred() >= 0) + continue; + + if (nrep) { + pg_map.creating_pgs_by_osd[acting[0]].insert(pgid); + } else { dout(20) << "send_pg_creates " << pgid << " -> no osds in epoch " << mon->osdmon()->osdmap.get_epoch() << ", skipping" << dendl; continue; // blarney! } - int osd = acting[0]; + } - // don't send creates for localized pgs - if (pgid.preferred() >= 0) - continue; + for (map >::iterator p = pg_map.creating_pgs_by_osd.begin(); + p != pg_map.creating_pgs_by_osd.end(); + ++p) { + int osd = p->first; // throttle? if (last_sent_pg_create.count(osd) && now - g_conf->mon_pg_create_interval < last_sent_pg_create[osd]) continue; - dout(20) << "send_pg_creates " << pgid << " -> osd." << osd - << " in epoch " << pg_map.pg_stat[pgid].created << dendl; - if (msg.count(osd) == 0) - msg[osd] = new MOSDPGCreate(mon->osdmon()->osdmap.get_epoch()); - msg[osd]->mkpg[pgid] = pg_create_t(pg_map.pg_stat[pgid].created, - pg_map.pg_stat[pgid].parent, - pg_map.pg_stat[pgid].parent_split_bits); + send_pg_creates(osd, NULL); } +} - for (map::iterator p = msg.begin(); - p != msg.end(); - p++) { - dout(10) << "sending pg_create to osd." << p->first << dendl; - mon->messenger->send_message(p->second, mon->osdmon()->osdmap.get_inst(p->first)); - last_sent_pg_create[p->first] = ceph_clock_now(g_ceph_context); +void PGMonitor::send_pg_creates(int osd, Connection *con) +{ + map >::iterator p = pg_map.creating_pgs_by_osd.find(osd); + if (p == pg_map.creating_pgs_by_osd.end()) + return; + + dout(20) << "send_pg_creates osd." << osd << " pgs " << p->second << dendl; + MOSDPGCreate *m = new MOSDPGCreate(mon->osdmon()->osdmap.get_epoch()); + for (set::iterator q = p->second.begin(); q != p->second.end(); ++q) { + m->mkpg[*q] = pg_create_t(pg_map.pg_stat[*q].created, + pg_map.pg_stat[*q].parent, + pg_map.pg_stat[*q].parent_split_bits); } + + if (con) + mon->messenger->send_message(m, con); + else + mon->messenger->send_message(m, mon->osdmon()->osdmap.get_inst(osd)); + last_sent_pg_create[osd] = ceph_clock_now(g_ceph_context); } bool PGMonitor::check_down_pgs() diff --git a/src/mon/PGMonitor.h b/src/mon/PGMonitor.h index 21edb185e72c1..563e6e9d9f8e9 100644 --- a/src/mon/PGMonitor.h +++ b/src/mon/PGMonitor.h @@ -96,6 +96,7 @@ private: bool register_new_pgs(); void send_pg_creates(); + void send_pg_creates(int osd, Connection *con); /** * check pgs for down primary osds -- 2.39.5