From: Sage Weil Date: Tue, 1 Jul 2014 19:42:19 +0000 (-0700) Subject: mon: clear osd request queue latency info on down or up X-Git-Tag: v0.84~160^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=9ce5ff96a1032be49d70b185311c47e8cfda4048;p=ceph.git mon: clear osd request queue latency info on down or up It doesn't make sense to warn about slow requests on OSDs that are down; clear those stats in the monitor when we see an OSD go down (or up). Fixes: #8692 Signed-off-by: Sage Weil --- diff --git a/src/mon/PGMap.h b/src/mon/PGMap.h index 99ce521b288e..560290b7533e 100644 --- a/src/mon/PGMap.h +++ b/src/mon/PGMap.h @@ -84,6 +84,20 @@ public: // 0 the stats for the osd osd_stat_updates[osd] = osd_stat_t(); } + void stat_osd_down_up(int32_t osd, PGMap& pg_map) { + // 0 the op_queue_age_hist for this osd + map::iterator p = osd_stat_updates.find(osd); + if (p != osd_stat_updates.end()) { + p->second.op_queue_age_hist.clear(); + return; + } + ceph::unordered_map::iterator q = + pg_map.osd_stat.find(osd); + if (q != pg_map.osd_stat.end()) { + osd_stat_t& t = osd_stat_updates[osd] = q->second; + t.op_queue_age_hist.clear(); + } + } void rm_stat(int32_t osd) { osd_stat_rm.insert(osd); osd_epochs.erase(osd); diff --git a/src/mon/PGMonitor.cc b/src/mon/PGMonitor.cc index 26770e2d9763..3c41112f0687 100644 --- a/src/mon/PGMonitor.cc +++ b/src/mon/PGMonitor.cc @@ -910,6 +910,11 @@ void PGMonitor::check_osd_map(epoch_t epoch) if (report != last_osd_report.end()) { last_osd_report.erase(report); } + + // clear out osd_stat slow request histogram + dout(20) << __func__ << " clearing osd." << p->first + << " request histogram" << dendl; + pending_inc.stat_osd_down_up(p->first, pg_map); } if (p->second & CEPH_OSD_EXISTS) {