From: Коренберг Марк Date: Wed, 29 Nov 2017 09:43:30 +0000 (+0500) Subject: mgr: increase time resolution of Commit/Apply OSD latencies. X-Git-Tag: v13.0.2~305^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=3b75db80d851d21fc57054ef7b045ea46b4f2223;p=ceph.git mgr: increase time resolution of Commit/Apply OSD latencies. Increase precision/resolution of time measurements in performance monitoring. Affects only Commit/Apply OSD latencies. Signed-off-by: Коренберг Марк --- diff --git a/src/common/perf_counters.cc b/src/common/perf_counters.cc index 737bbcc3dc85..922b47372e48 100644 --- a/src/common/perf_counters.cc +++ b/src/common/perf_counters.cc @@ -312,7 +312,7 @@ void PerfCounters::hinc(int idx, int64_t x, int64_t y) data.histogram->inc(x, y); } -pair PerfCounters::get_tavg_ms(int idx) const +pair PerfCounters::get_tavg_ns(int idx) const { if (!m_cct->_conf->perf) return make_pair(0, 0); @@ -325,7 +325,7 @@ pair PerfCounters::get_tavg_ms(int idx) const if (!(data.type & PERFCOUNTER_LONGRUNAVG)) return make_pair(0, 0); pair a = data.read_avg(); - return make_pair(a.second, a.first / 1000000ull); + return make_pair(a.second, a.first); } void PerfCounters::reset() diff --git a/src/common/perf_counters.h b/src/common/perf_counters.h index 1aec9cc05f58..4f1a50887d49 100644 --- a/src/common/perf_counters.h +++ b/src/common/perf_counters.h @@ -241,7 +241,7 @@ public: const std::string &counter = "") { dump_formatted_generic(f, schema, true, counter); } - pair get_tavg_ms(int idx) const; + pair get_tavg_ns(int idx) const; const std::string& get_name() const; void set_name(std::string s) { diff --git a/src/mon/PGMap.cc b/src/mon/PGMap.cc index 884e99d11f40..841af9c56b96 100644 --- a/src/mon/PGMap.cc +++ b/src/mon/PGMap.cc @@ -1866,8 +1866,8 @@ void PGMap::print_osd_perf_stats(std::ostream *ss) const i != osd_stat.end(); ++i) { tab << i->first; - tab << i->second.os_perf_stat.os_commit_latency; - tab << i->second.os_perf_stat.os_apply_latency; + tab << i->second.os_perf_stat.os_commit_latency_ns / 1000000ull; + tab << i->second.os_perf_stat.os_apply_latency_ns / 1000000ull; tab << TextTable::endrow; } (*ss) << tab; diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index b65d5cf3d592..0a379ff7a050 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -8269,11 +8269,11 @@ void BlueStore::_txc_write_nodes(TransContext *txc, KeyValueDB::Transaction t) void BlueStore::BSPerfTracker::update_from_perfcounters( PerfCounters &logger) { - os_commit_latency.consume_next( - logger.get_tavg_ms( + os_commit_latency_ns.consume_next( + logger.get_tavg_ns( l_bluestore_commit_lat)); - os_apply_latency.consume_next( - logger.get_tavg_ms( + os_apply_latency_ns.consume_next( + logger.get_tavg_ns( l_bluestore_commit_lat)); } diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 32f4cde3d9cd..a30c4e61ea65 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -2387,13 +2387,13 @@ public: } struct BSPerfTracker { - PerfCounters::avg_tracker os_commit_latency; - PerfCounters::avg_tracker os_apply_latency; + PerfCounters::avg_tracker os_commit_latency_ns; + PerfCounters::avg_tracker os_apply_latency_ns; objectstore_perf_stat_t get_cur_stats() const { objectstore_perf_stat_t ret; - ret.os_commit_latency = os_commit_latency.current_avg(); - ret.os_apply_latency = os_apply_latency.current_avg(); + ret.os_commit_latency_ns = os_commit_latency_ns.current_avg(); + ret.os_apply_latency_ns = os_apply_latency_ns.current_avg(); return ret; } diff --git a/src/os/filestore/FileStore.cc b/src/os/filestore/FileStore.cc index 33faa2371f85..55fc10b8dd83 100644 --- a/src/os/filestore/FileStore.cc +++ b/src/os/filestore/FileStore.cc @@ -145,11 +145,11 @@ int FileStore::get_block_device_fsid(CephContext* cct, const string& path, void FileStore::FSPerfTracker::update_from_perfcounters( PerfCounters &logger) { - os_commit_latency.consume_next( - logger.get_tavg_ms( + os_commit_latency_ns.consume_next( + logger.get_tavg_ns( l_filestore_journal_latency)); - os_apply_latency.consume_next( - logger.get_tavg_ms( + os_apply_latency_ns.consume_next( + logger.get_tavg_ns( l_filestore_apply_latency)); } diff --git a/src/os/filestore/FileStore.h b/src/os/filestore/FileStore.h index d5711df039ea..002bbf9b3146 100644 --- a/src/os/filestore/FileStore.h +++ b/src/os/filestore/FileStore.h @@ -129,13 +129,13 @@ public: static int get_block_device_fsid(CephContext* cct, const string& path, uuid_d *fsid); struct FSPerfTracker { - PerfCounters::avg_tracker os_commit_latency; - PerfCounters::avg_tracker os_apply_latency; + PerfCounters::avg_tracker os_commit_latency_ns; + PerfCounters::avg_tracker os_apply_latency_ns; objectstore_perf_stat_t get_cur_stats() const { objectstore_perf_stat_t ret; - ret.os_commit_latency = os_commit_latency.current_avg(); - ret.os_apply_latency = os_apply_latency.current_avg(); + ret.os_commit_latency_ns = os_commit_latency_ns.current_avg(); + ret.os_apply_latency_ns = os_apply_latency_ns.current_avg(); return ret; } diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index 42864574bd11..b5219f569b62 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -276,23 +276,39 @@ void request_redirect_t::generate_test_instances(list& o) void objectstore_perf_stat_t::dump(Formatter *f) const { - f->dump_unsigned("commit_latency_ms", os_commit_latency); - f->dump_unsigned("apply_latency_ms", os_apply_latency); + // *_ms values just for compatibility. + f->dump_float("commit_latency_ms", os_commit_latency_ns / 1000000.0); + f->dump_float("apply_latency_ms", os_apply_latency_ns / 1000000.0); + f->dump_unsigned("commit_latency_ns", os_commit_latency_ns); + f->dump_unsigned("apply_latency_ns", os_apply_latency_ns); } void objectstore_perf_stat_t::encode(bufferlist &bl) const { - ENCODE_START(1, 1, bl); - encode(os_commit_latency, bl); - encode(os_apply_latency, bl); + uint32_t commit_latency_ms = os_commit_latency_ns / 1000000; + uint32_t apply_latency_ms = os_apply_latency_ns / 1000000; + ENCODE_START(2, 1, bl); + encode(commit_latency_ms, bl); // for compatibility with older monitor. + encode(apply_latency_ms, bl); // for compatibility with older monitor. + encode(os_commit_latency_ns, bl); + encode(os_apply_latency_ns, bl); ENCODE_FINISH(bl); } void objectstore_perf_stat_t::decode(bufferlist::iterator &bl) { - DECODE_START(1, bl); - decode(os_commit_latency, bl); - decode(os_apply_latency, bl); + DECODE_START(2, bl); + uint32_t commit_latency_ms; + uint32_t apply_latency_ms; + decode(commit_latency_ms, bl); + decode(apply_latency_ms, bl); + if (struct_v >= 2) { + decode(os_commit_latency_ns, bl); + decode(os_apply_latency_ns, bl); + } else { + os_commit_latency_ns = commit_latency_ms * (uint64_t) 1000000; + os_apply_latency_ns = apply_latency_ms * (uint64_t) 1000000; + } DECODE_FINISH(bl); } @@ -300,8 +316,8 @@ void objectstore_perf_stat_t::generate_test_instances(std::listos_commit_latency = 20; - o.back()->os_apply_latency = 30; + o.back()->os_commit_latency_ns = 20000000; + o.back()->os_apply_latency_ns = 30000000; } // -- osd_stat_t -- diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index becea7d25459..5c1f51115980 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -883,25 +883,25 @@ inline ostream& operator<<(ostream& out, const eversion_t& e) { * current perf information about the osd */ struct objectstore_perf_stat_t { - // cur_op_latency is in ms since double add/sub are not associative - uint32_t os_commit_latency; - uint32_t os_apply_latency; + // cur_op_latency is in ns since double add/sub are not associative + uint64_t os_commit_latency_ns; + uint64_t os_apply_latency_ns; objectstore_perf_stat_t() : - os_commit_latency(0), os_apply_latency(0) {} + os_commit_latency_ns(0), os_apply_latency_ns(0) {} bool operator==(const objectstore_perf_stat_t &r) const { - return os_commit_latency == r.os_commit_latency && - os_apply_latency == r.os_apply_latency; + return os_commit_latency_ns == r.os_commit_latency_ns && + os_apply_latency_ns == r.os_apply_latency_ns; } void add(const objectstore_perf_stat_t &o) { - os_commit_latency += o.os_commit_latency; - os_apply_latency += o.os_apply_latency; + os_commit_latency_ns += o.os_commit_latency_ns; + os_apply_latency_ns += o.os_apply_latency_ns; } void sub(const objectstore_perf_stat_t &o) { - os_commit_latency -= o.os_commit_latency; - os_apply_latency -= o.os_apply_latency; + os_commit_latency_ns -= o.os_commit_latency_ns; + os_apply_latency_ns -= o.os_apply_latency_ns; } void dump(Formatter *f) const; void encode(bufferlist &bl) const; diff --git a/src/pybind/mgr/zabbix/module.py b/src/pybind/mgr/zabbix/module.py index f95db56c17c8..bd0ee1845e79 100644 --- a/src/pybind/mgr/zabbix/module.py +++ b/src/pybind/mgr/zabbix/module.py @@ -170,16 +170,16 @@ class Module(MgrModule): data['num_osd_in'] = num_in osd_fill = list() - osd_apply_latency = list() - osd_commit_latency = list() + osd_apply_latency_ns = list() + osd_commit_latency_ns = list() osd_stats = self.get('osd_stats') for osd in osd_stats['osd_stats']: if osd['kb'] == 0: continue osd_fill.append((float(osd['kb_used']) / float(osd['kb'])) * 100) - osd_apply_latency.append(osd['perf_stat']['apply_latency_ms']) - osd_commit_latency.append(osd['perf_stat']['commit_latency_ms']) + osd_apply_latency_ns.append(osd['perf_stat']['apply_latency_ns']) + osd_commit_latency_ns.append(osd['perf_stat']['commit_latency_ns']) try: data['osd_max_fill'] = max(osd_fill) @@ -189,13 +189,13 @@ class Module(MgrModule): pass try: - data['osd_latency_apply_max'] = max(osd_apply_latency) - data['osd_latency_apply_min'] = min(osd_apply_latency) - data['osd_latency_apply_avg'] = avg(osd_apply_latency) + data['osd_latency_apply_max'] = max(osd_apply_latency_ns) / 1000000.0 # ns -> ms + data['osd_latency_apply_min'] = min(osd_apply_latency_ns) / 1000000.0 # ns -> ms + data['osd_latency_apply_avg'] = avg(osd_apply_latency_ns) / 1000000.0 # ns -> ms - data['osd_latency_commit_max'] = max(osd_commit_latency) - data['osd_latency_commit_min'] = min(osd_commit_latency) - data['osd_latency_commit_avg'] = avg(osd_commit_latency) + data['osd_latency_commit_max'] = max(osd_commit_latency_ns) / 1000000.0 # ns -> ms + data['osd_latency_commit_min'] = min(osd_commit_latency_ns) / 1000000.0 # ns -> ms + data['osd_latency_commit_avg'] = avg(osd_commit_latency_ns) / 1000000.0 # ns -> ms except ValueError: pass