From 3b75db80d851d21fc57054ef7b045ea46b4f2223 Mon Sep 17 00:00:00 2001 From: =?utf8?q?=D0=9A=D0=BE=D1=80=D0=B5=D0=BD=D0=B1=D0=B5=D1=80=D0=B3=20?= =?utf8?q?=D0=9C=D0=B0=D1=80=D0=BA?= Date: Wed, 29 Nov 2017 14:43:30 +0500 Subject: [PATCH] mgr: increase time resolution of Commit/Apply OSD latencies. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Increase precision/resolution of time measurements in performance monitoring. Affects only Commit/Apply OSD latencies. Signed-off-by: Коренберг Марк --- src/common/perf_counters.cc | 4 ++-- src/common/perf_counters.h | 2 +- src/mon/PGMap.cc | 4 ++-- src/os/bluestore/BlueStore.cc | 8 ++++---- src/os/bluestore/BlueStore.h | 8 ++++---- src/os/filestore/FileStore.cc | 8 ++++---- src/os/filestore/FileStore.h | 8 ++++---- src/osd/osd_types.cc | 36 ++++++++++++++++++++++++--------- src/osd/osd_types.h | 20 +++++++++--------- src/pybind/mgr/zabbix/module.py | 20 +++++++++--------- 10 files changed, 67 insertions(+), 51 deletions(-) diff --git a/src/common/perf_counters.cc b/src/common/perf_counters.cc index 737bbcc3dc851..922b47372e489 100644 --- a/src/common/perf_counters.cc +++ b/src/common/perf_counters.cc @@ -312,7 +312,7 @@ void PerfCounters::hinc(int idx, int64_t x, int64_t y) data.histogram->inc(x, y); } -pair PerfCounters::get_tavg_ms(int idx) const +pair PerfCounters::get_tavg_ns(int idx) const { if (!m_cct->_conf->perf) return make_pair(0, 0); @@ -325,7 +325,7 @@ pair PerfCounters::get_tavg_ms(int idx) const if (!(data.type & PERFCOUNTER_LONGRUNAVG)) return make_pair(0, 0); pair a = data.read_avg(); - return make_pair(a.second, a.first / 1000000ull); + return make_pair(a.second, a.first); } void PerfCounters::reset() diff --git a/src/common/perf_counters.h b/src/common/perf_counters.h index 1aec9cc05f588..4f1a50887d49c 100644 --- a/src/common/perf_counters.h +++ b/src/common/perf_counters.h @@ -241,7 +241,7 @@ public: const std::string &counter = "") { dump_formatted_generic(f, schema, true, counter); } - pair get_tavg_ms(int idx) const; + pair get_tavg_ns(int idx) const; const std::string& get_name() const; void set_name(std::string s) { diff --git a/src/mon/PGMap.cc b/src/mon/PGMap.cc index 884e99d11f402..841af9c56b968 100644 --- a/src/mon/PGMap.cc +++ b/src/mon/PGMap.cc @@ -1866,8 +1866,8 @@ void PGMap::print_osd_perf_stats(std::ostream *ss) const i != osd_stat.end(); ++i) { tab << i->first; - tab << i->second.os_perf_stat.os_commit_latency; - tab << i->second.os_perf_stat.os_apply_latency; + tab << i->second.os_perf_stat.os_commit_latency_ns / 1000000ull; + tab << i->second.os_perf_stat.os_apply_latency_ns / 1000000ull; tab << TextTable::endrow; } (*ss) << tab; diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index b65d5cf3d5929..0a379ff7a0504 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -8269,11 +8269,11 @@ void BlueStore::_txc_write_nodes(TransContext *txc, KeyValueDB::Transaction t) void BlueStore::BSPerfTracker::update_from_perfcounters( PerfCounters &logger) { - os_commit_latency.consume_next( - logger.get_tavg_ms( + os_commit_latency_ns.consume_next( + logger.get_tavg_ns( l_bluestore_commit_lat)); - os_apply_latency.consume_next( - logger.get_tavg_ms( + os_apply_latency_ns.consume_next( + logger.get_tavg_ns( l_bluestore_commit_lat)); } diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 32f4cde3d9cd3..a30c4e61ea652 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -2387,13 +2387,13 @@ public: } struct BSPerfTracker { - PerfCounters::avg_tracker os_commit_latency; - PerfCounters::avg_tracker os_apply_latency; + PerfCounters::avg_tracker os_commit_latency_ns; + PerfCounters::avg_tracker os_apply_latency_ns; objectstore_perf_stat_t get_cur_stats() const { objectstore_perf_stat_t ret; - ret.os_commit_latency = os_commit_latency.current_avg(); - ret.os_apply_latency = os_apply_latency.current_avg(); + ret.os_commit_latency_ns = os_commit_latency_ns.current_avg(); + ret.os_apply_latency_ns = os_apply_latency_ns.current_avg(); return ret; } diff --git a/src/os/filestore/FileStore.cc b/src/os/filestore/FileStore.cc index 33faa2371f85e..55fc10b8dd836 100644 --- a/src/os/filestore/FileStore.cc +++ b/src/os/filestore/FileStore.cc @@ -145,11 +145,11 @@ int FileStore::get_block_device_fsid(CephContext* cct, const string& path, void FileStore::FSPerfTracker::update_from_perfcounters( PerfCounters &logger) { - os_commit_latency.consume_next( - logger.get_tavg_ms( + os_commit_latency_ns.consume_next( + logger.get_tavg_ns( l_filestore_journal_latency)); - os_apply_latency.consume_next( - logger.get_tavg_ms( + os_apply_latency_ns.consume_next( + logger.get_tavg_ns( l_filestore_apply_latency)); } diff --git a/src/os/filestore/FileStore.h b/src/os/filestore/FileStore.h index d5711df039ea5..002bbf9b31466 100644 --- a/src/os/filestore/FileStore.h +++ b/src/os/filestore/FileStore.h @@ -129,13 +129,13 @@ public: static int get_block_device_fsid(CephContext* cct, const string& path, uuid_d *fsid); struct FSPerfTracker { - PerfCounters::avg_tracker os_commit_latency; - PerfCounters::avg_tracker os_apply_latency; + PerfCounters::avg_tracker os_commit_latency_ns; + PerfCounters::avg_tracker os_apply_latency_ns; objectstore_perf_stat_t get_cur_stats() const { objectstore_perf_stat_t ret; - ret.os_commit_latency = os_commit_latency.current_avg(); - ret.os_apply_latency = os_apply_latency.current_avg(); + ret.os_commit_latency_ns = os_commit_latency_ns.current_avg(); + ret.os_apply_latency_ns = os_apply_latency_ns.current_avg(); return ret; } diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index 42864574bd111..b5219f569b624 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -276,23 +276,39 @@ void request_redirect_t::generate_test_instances(list& o) void objectstore_perf_stat_t::dump(Formatter *f) const { - f->dump_unsigned("commit_latency_ms", os_commit_latency); - f->dump_unsigned("apply_latency_ms", os_apply_latency); + // *_ms values just for compatibility. + f->dump_float("commit_latency_ms", os_commit_latency_ns / 1000000.0); + f->dump_float("apply_latency_ms", os_apply_latency_ns / 1000000.0); + f->dump_unsigned("commit_latency_ns", os_commit_latency_ns); + f->dump_unsigned("apply_latency_ns", os_apply_latency_ns); } void objectstore_perf_stat_t::encode(bufferlist &bl) const { - ENCODE_START(1, 1, bl); - encode(os_commit_latency, bl); - encode(os_apply_latency, bl); + uint32_t commit_latency_ms = os_commit_latency_ns / 1000000; + uint32_t apply_latency_ms = os_apply_latency_ns / 1000000; + ENCODE_START(2, 1, bl); + encode(commit_latency_ms, bl); // for compatibility with older monitor. + encode(apply_latency_ms, bl); // for compatibility with older monitor. + encode(os_commit_latency_ns, bl); + encode(os_apply_latency_ns, bl); ENCODE_FINISH(bl); } void objectstore_perf_stat_t::decode(bufferlist::iterator &bl) { - DECODE_START(1, bl); - decode(os_commit_latency, bl); - decode(os_apply_latency, bl); + DECODE_START(2, bl); + uint32_t commit_latency_ms; + uint32_t apply_latency_ms; + decode(commit_latency_ms, bl); + decode(apply_latency_ms, bl); + if (struct_v >= 2) { + decode(os_commit_latency_ns, bl); + decode(os_apply_latency_ns, bl); + } else { + os_commit_latency_ns = commit_latency_ms * (uint64_t) 1000000; + os_apply_latency_ns = apply_latency_ms * (uint64_t) 1000000; + } DECODE_FINISH(bl); } @@ -300,8 +316,8 @@ void objectstore_perf_stat_t::generate_test_instances(std::listos_commit_latency = 20; - o.back()->os_apply_latency = 30; + o.back()->os_commit_latency_ns = 20000000; + o.back()->os_apply_latency_ns = 30000000; } // -- osd_stat_t -- diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index becea7d254593..5c1f51115980e 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -883,25 +883,25 @@ inline ostream& operator<<(ostream& out, const eversion_t& e) { * current perf information about the osd */ struct objectstore_perf_stat_t { - // cur_op_latency is in ms since double add/sub are not associative - uint32_t os_commit_latency; - uint32_t os_apply_latency; + // cur_op_latency is in ns since double add/sub are not associative + uint64_t os_commit_latency_ns; + uint64_t os_apply_latency_ns; objectstore_perf_stat_t() : - os_commit_latency(0), os_apply_latency(0) {} + os_commit_latency_ns(0), os_apply_latency_ns(0) {} bool operator==(const objectstore_perf_stat_t &r) const { - return os_commit_latency == r.os_commit_latency && - os_apply_latency == r.os_apply_latency; + return os_commit_latency_ns == r.os_commit_latency_ns && + os_apply_latency_ns == r.os_apply_latency_ns; } void add(const objectstore_perf_stat_t &o) { - os_commit_latency += o.os_commit_latency; - os_apply_latency += o.os_apply_latency; + os_commit_latency_ns += o.os_commit_latency_ns; + os_apply_latency_ns += o.os_apply_latency_ns; } void sub(const objectstore_perf_stat_t &o) { - os_commit_latency -= o.os_commit_latency; - os_apply_latency -= o.os_apply_latency; + os_commit_latency_ns -= o.os_commit_latency_ns; + os_apply_latency_ns -= o.os_apply_latency_ns; } void dump(Formatter *f) const; void encode(bufferlist &bl) const; diff --git a/src/pybind/mgr/zabbix/module.py b/src/pybind/mgr/zabbix/module.py index f95db56c17c85..bd0ee1845e793 100644 --- a/src/pybind/mgr/zabbix/module.py +++ b/src/pybind/mgr/zabbix/module.py @@ -170,16 +170,16 @@ class Module(MgrModule): data['num_osd_in'] = num_in osd_fill = list() - osd_apply_latency = list() - osd_commit_latency = list() + osd_apply_latency_ns = list() + osd_commit_latency_ns = list() osd_stats = self.get('osd_stats') for osd in osd_stats['osd_stats']: if osd['kb'] == 0: continue osd_fill.append((float(osd['kb_used']) / float(osd['kb'])) * 100) - osd_apply_latency.append(osd['perf_stat']['apply_latency_ms']) - osd_commit_latency.append(osd['perf_stat']['commit_latency_ms']) + osd_apply_latency_ns.append(osd['perf_stat']['apply_latency_ns']) + osd_commit_latency_ns.append(osd['perf_stat']['commit_latency_ns']) try: data['osd_max_fill'] = max(osd_fill) @@ -189,13 +189,13 @@ class Module(MgrModule): pass try: - data['osd_latency_apply_max'] = max(osd_apply_latency) - data['osd_latency_apply_min'] = min(osd_apply_latency) - data['osd_latency_apply_avg'] = avg(osd_apply_latency) + data['osd_latency_apply_max'] = max(osd_apply_latency_ns) / 1000000.0 # ns -> ms + data['osd_latency_apply_min'] = min(osd_apply_latency_ns) / 1000000.0 # ns -> ms + data['osd_latency_apply_avg'] = avg(osd_apply_latency_ns) / 1000000.0 # ns -> ms - data['osd_latency_commit_max'] = max(osd_commit_latency) - data['osd_latency_commit_min'] = min(osd_commit_latency) - data['osd_latency_commit_avg'] = avg(osd_commit_latency) + data['osd_latency_commit_max'] = max(osd_commit_latency_ns) / 1000000.0 # ns -> ms + data['osd_latency_commit_min'] = min(osd_commit_latency_ns) / 1000000.0 # ns -> ms + data['osd_latency_commit_avg'] = avg(osd_commit_latency_ns) / 1000000.0 # ns -> ms except ValueError: pass -- 2.39.5