From: Sage Weil Date: Wed, 16 Sep 2015 15:00:57 +0000 (-0400) Subject: osd: fix pg stat reporting X-Git-Tag: v10.0.1~26^2~54 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=d5a2f9a6c7e1eec2bae7facc670860729bc06408;p=ceph.git osd: fix pg stat reporting Signed-off-by: Sage Weil --- diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 25d86d8fecaa..f325f2ba1922 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -1567,7 +1567,6 @@ OSD::OSD(CephContext *cct_, ObjectStore *store_, debug_drop_pg_create_duration(cct->_conf->osd_debug_drop_pg_create_duration), debug_drop_pg_create_left(-1), stats_ack_timeout(cct->_conf->osd_mon_ack_timeout), - outstanding_pg_stats(0), up_thru_wanted(0), up_thru_pending(0), requested_full_first(0), requested_full_last(0), @@ -3990,7 +3989,7 @@ void OSD::tick() bool report = false; utime_t now = ceph_clock_now(cct); pg_stat_queue_lock.Lock(); - if (outstanding_pg_stats && + if (!outstanding_pg_stats.empty() && (now - stats_ack_timeout) > last_pg_stats_ack) { dout(1) << __func__ << " mon hasn't acked PGStats in " << now - last_pg_stats_ack @@ -4001,11 +4000,13 @@ void OSD::tick() stats_ack_timeout = MAX(g_conf->osd_mon_ack_timeout, stats_ack_timeout * g_conf->osd_stats_ack_timeout_factor); + outstanding_pg_stats.clear(); } if (now - last_pg_stats_sent > cct->_conf->osd_mon_report_interval_max) { osd_stat_updated = true; report = true; - } else if (outstanding_pg_stats >= cct->_conf->osd_mon_report_max_in_flight) { + } else if ((int)outstanding_pg_stats.size() >= + cct->_conf->osd_mon_report_max_in_flight) { dout(20) << __func__ << " have max " << outstanding_pg_stats << " stats updates in flight" << dendl; } else { @@ -4020,9 +4021,9 @@ void OSD::tick() } pg_stat_queue_lock.Unlock(); - if (reset) + if (reset) { monc->reopen_session(); - else if (report) { + } else if (report) { last_mon_report = now; // do any pending reports @@ -4372,11 +4373,6 @@ void OSD::ms_handle_connect(Connection *con) return; dout(10) << "ms_handle_connect on mon" << dendl; - // reset pg stats state - pg_stat_queue_lock.Lock(); - outstanding_pg_stats = 0; - pg_stat_queue_lock.Unlock(); - if (is_booting()) { start_boot(); } else { @@ -4776,7 +4772,8 @@ void OSD::send_pg_stats(const utime_t &now) had_for -= had_map_since; MPGStats *m = new MPGStats(monc->get_fsid(), osdmap->get_epoch(), had_for); - m->set_tid(++pg_stat_tid); + uint64_t tid = ++pg_stat_tid; + m->set_tid(tid); m->osd_stat = cur_stat; xlist::iterator p = pg_stat_queue.begin(); @@ -4800,12 +4797,11 @@ void OSD::send_pg_stats(const utime_t &now) pg->pg_stats_publish_lock.Unlock(); } - if (!outstanding_pg_stats) { + if (!outstanding_pg_stats.empty()) { last_pg_stats_ack = ceph_clock_now(cct); } - ++outstanding_pg_stats; - dout(20) << __func__ << " " << outstanding_pg_stats << " updates pending" - << dendl; + outstanding_pg_stats.insert(tid); + dout(20) << __func__ << " updates pending: " << outstanding_pg_stats << dendl; monc->send_mon_message(m); } @@ -4822,6 +4818,10 @@ void OSD::handle_pg_stats_ack(MPGStatsAck *ack) return; } + // NOTE: we may get replies from a previous mon even while + // outstanding_pg_stats is empty if reconnecting races with replies + // in flight. + pg_stat_queue_lock.Lock(); last_pg_stats_ack = ceph_clock_now(cct); @@ -4863,13 +4863,8 @@ void OSD::handle_pg_stats_ack(MPGStatsAck *ack) } } - assert(outstanding_pg_stats > 0); - --outstanding_pg_stats; - if (!pg_stat_queue.size()) { - assert(outstanding_pg_stats == 0); - } - dout(20) << __func__ << " " << outstanding_pg_stats << " updates pending" - << dendl; + outstanding_pg_stats.erase(ack->get_tid()); + dout(20) << __func__ << " still pending: " << outstanding_pg_stats << dendl; pg_stat_queue_lock.Unlock(); diff --git a/src/osd/OSD.h b/src/osd/OSD.h index f829c58ea9df..8aef344305ed 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -1971,7 +1971,7 @@ protected: */ utime_t last_pg_stats_ack; float stats_ack_timeout; - int outstanding_pg_stats; // how many stat updates haven't been acked yet + set outstanding_pg_stats; // how many stat updates haven't been acked yet // -- boot -- void start_boot();