From dec936923fa7c9b15caea7ecffcf230d80fc1f85 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 19 Jul 2012 16:47:23 -0700 Subject: [PATCH] osd/mon: subscribe (onetime) to pg creations on connect Ask the monitor for pending pg creations each time we connect. Normally, this is a freebie check. If there are pending creations, though, it ensures that the OSD finds out about them even if the original lame broadcast didn't reach it. Specifically: - osd is hunting for a monitor, but isn't yet connected - new pgs are created - send_pg_creates() sends out create messages, but osd does get it - osd finally connects to a mon Fixes: #2151 (tho the bug description is bad) Signed-off-by: Sage Weil Reviewed-by: Samuel Just --- src/mon/Monitor.cc | 4 ++++ src/mon/PGMonitor.cc | 19 +++++++++++++++---- src/mon/PGMonitor.h | 2 ++ src/osd/OSD.cc | 3 +++ 4 files changed, 24 insertions(+), 4 deletions(-) diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index 1db0a257e4ab..c415dbf6031c 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -1817,6 +1817,10 @@ void Monitor::handle_subscribe(MMonSubscribe *m) if ((int)s->caps.check_privileges(PAXOS_OSDMAP, MON_CAP_R)) { osdmon()->check_sub(s->sub_map["osdmap"]); } + } else if (p->first == "osd_pg_creates") { + if ((int)s->caps.check_privileges(PAXOS_OSDMAP, MON_CAP_W)) { + pgmon()->check_sub(s->sub_map["osd_pg_creates"]); + } } else if (p->first == "monmap") { check_sub(s->sub_map["monmap"]); } else if (logmon()->sub_name_to_id(p->first) >= 0) { diff --git a/src/mon/PGMonitor.cc b/src/mon/PGMonitor.cc index d4c1a9cf5dbc..0f0d52025ee3 100644 --- a/src/mon/PGMonitor.cc +++ b/src/mon/PGMonitor.cc @@ -773,7 +773,7 @@ void PGMonitor::send_pg_creates() int nrep = mon->osdmon()->osdmap.pg_to_acting_osds(on, acting); if (s.acting.size()) - pg_map.creating_pgs_by_osd[acting[0]].erase(pgid); + pg_map.creating_pgs_by_osd[s.acting[0]].erase(pgid); s.acting = acting; // don't send creates for localized pgs @@ -799,7 +799,8 @@ void PGMonitor::send_pg_creates() now - g_conf->mon_pg_create_interval < last_sent_pg_create[osd]) continue; - send_pg_creates(osd, NULL); + if (mon->osdmon()->osdmap.is_up(osd)) + send_pg_creates(osd, NULL); } } @@ -817,10 +818,12 @@ void PGMonitor::send_pg_creates(int osd, Connection *con) pg_map.pg_stat[*q].parent_split_bits); } - if (con) + if (con) { mon->messenger->send_message(m, con); - else + } else { + assert(mon->osdmon()->osdmap.is_up(osd)); mon->messenger->send_message(m, mon->osdmon()->osdmap.get_inst(osd)); + } last_sent_pg_create[osd] = ceph_clock_now(g_ceph_context); } @@ -1362,3 +1365,11 @@ int PGMonitor::dump_stuck_pg_stats(ostream& ss, ss << "ok"; return 0; } + +void PGMonitor::check_sub(Subscription *sub) +{ + if (sub->type == "osd_pg_creates") { + send_pg_creates(sub->session->inst.name.num(), + sub->session->con); + } +} diff --git a/src/mon/PGMonitor.h b/src/mon/PGMonitor.h index 563e6e9d9f8e..6ca3c0c4f6a1 100644 --- a/src/mon/PGMonitor.h +++ b/src/mon/PGMonitor.h @@ -137,6 +137,8 @@ public: list > *detail, const set& s, const char *desc, health_status_t sev) const; + void check_sub(Subscription *sub); + private: // no copying allowed PGMonitor(const PGMonitor &rhs); diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 9a59a84a2240..6cf9ab75cb29 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -2013,6 +2013,9 @@ void OSD::ms_handle_connect(Connection *con) service.send_pg_temp(); send_failures(); send_pg_stats(ceph_clock_now(g_ceph_context)); + + monc->sub_want("osd_pg_creates", 0, CEPH_SUBSCRIBE_ONETIME); + monc->renew_subs(); } } } -- 2.47.3