]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr: increase time resolution of Commit/Apply OSD latencies. 19232/head
authorКоренберг Марк <mark@ideco.ru>
Wed, 29 Nov 2017 09:43:30 +0000 (14:43 +0500)
committerКоренберг Марк <mark@ideco.ru>
Wed, 24 Jan 2018 08:29:13 +0000 (13:29 +0500)
Increase precision/resolution of time measurements in performance
monitoring. Affects only Commit/Apply OSD latencies.

Signed-off-by: Коренберг Марк <socketpair@gmail.com>
src/common/perf_counters.cc
src/common/perf_counters.h
src/mon/PGMap.cc
src/os/bluestore/BlueStore.cc
src/os/bluestore/BlueStore.h
src/os/filestore/FileStore.cc
src/os/filestore/FileStore.h
src/osd/osd_types.cc
src/osd/osd_types.h
src/pybind/mgr/zabbix/module.py

index 737bbcc3dc85153e0b78bed405933476a8e26a3f..922b47372e489dc3d6b3311811be0ed99fedb808 100644 (file)
@@ -312,7 +312,7 @@ void PerfCounters::hinc(int idx, int64_t x, int64_t y)
   data.histogram->inc(x, y);
 }
 
-pair<uint64_t, uint64_t> PerfCounters::get_tavg_ms(int idx) const
+pair<uint64_t, uint64_t> PerfCounters::get_tavg_ns(int idx) const
 {
   if (!m_cct->_conf->perf)
     return make_pair(0, 0);
@@ -325,7 +325,7 @@ pair<uint64_t, uint64_t> PerfCounters::get_tavg_ms(int idx) const
   if (!(data.type & PERFCOUNTER_LONGRUNAVG))
     return make_pair(0, 0);
   pair<uint64_t,uint64_t> a = data.read_avg();
-  return make_pair(a.second, a.first / 1000000ull);
+  return make_pair(a.second, a.first);
 }
 
 void PerfCounters::reset()
index 1aec9cc05f58801d5b2c606bb36f9d94485da159..4f1a50887d49c05fa545e7f5a9aa79b41567adb0 100644 (file)
@@ -241,7 +241,7 @@ public:
                                  const std::string &counter = "") {
     dump_formatted_generic(f, schema, true, counter);
   }
-  pair<uint64_t, uint64_t> get_tavg_ms(int idx) const;
+  pair<uint64_t, uint64_t> get_tavg_ns(int idx) const;
 
   const std::string& get_name() const;
   void set_name(std::string s) {
index 884e99d11f4028333b459a9eb10126dbf22c7f1f..841af9c56b968f0cbd0525bdc98ec4ecf05a723a 100644 (file)
@@ -1866,8 +1866,8 @@ void PGMap::print_osd_perf_stats(std::ostream *ss) const
        i != osd_stat.end();
        ++i) {
     tab << i->first;
-    tab << i->second.os_perf_stat.os_commit_latency;
-    tab << i->second.os_perf_stat.os_apply_latency;
+    tab << i->second.os_perf_stat.os_commit_latency_ns / 1000000ull;
+    tab << i->second.os_perf_stat.os_apply_latency_ns / 1000000ull;
     tab << TextTable::endrow;
   }
   (*ss) << tab;
index b65d5cf3d5929e1b7f2ef1cb1e3fe06eb85c94c7..0a379ff7a0504688e7146e6d6689861f7042d127 100644 (file)
@@ -8269,11 +8269,11 @@ void BlueStore::_txc_write_nodes(TransContext *txc, KeyValueDB::Transaction t)
 void BlueStore::BSPerfTracker::update_from_perfcounters(
   PerfCounters &logger)
 {
-  os_commit_latency.consume_next(
-    logger.get_tavg_ms(
+  os_commit_latency_ns.consume_next(
+    logger.get_tavg_ns(
       l_bluestore_commit_lat));
-  os_apply_latency.consume_next(
-    logger.get_tavg_ms(
+  os_apply_latency_ns.consume_next(
+    logger.get_tavg_ns(
       l_bluestore_commit_lat));
 }
 
index 32f4cde3d9cd34f9f224f270e79a2fe42f723cd7..a30c4e61ea652b415d4b0bfbc1bd1d2c0b456a7d 100644 (file)
@@ -2387,13 +2387,13 @@ public:
   }
 
   struct BSPerfTracker {
-    PerfCounters::avg_tracker<uint64_t> os_commit_latency;
-    PerfCounters::avg_tracker<uint64_t> os_apply_latency;
+    PerfCounters::avg_tracker<uint64_t> os_commit_latency_ns;
+    PerfCounters::avg_tracker<uint64_t> os_apply_latency_ns;
 
     objectstore_perf_stat_t get_cur_stats() const {
       objectstore_perf_stat_t ret;
-      ret.os_commit_latency = os_commit_latency.current_avg();
-      ret.os_apply_latency = os_apply_latency.current_avg();
+      ret.os_commit_latency_ns = os_commit_latency_ns.current_avg();
+      ret.os_apply_latency_ns = os_apply_latency_ns.current_avg();
       return ret;
     }
 
index 33faa2371f85e07fe9351625556749597a194c37..55fc10b8dd8369a11b2b148405d7368b3e14d63f 100644 (file)
@@ -145,11 +145,11 @@ int FileStore::get_block_device_fsid(CephContext* cct, const string& path,
 void FileStore::FSPerfTracker::update_from_perfcounters(
   PerfCounters &logger)
 {
-  os_commit_latency.consume_next(
-    logger.get_tavg_ms(
+  os_commit_latency_ns.consume_next(
+    logger.get_tavg_ns(
       l_filestore_journal_latency));
-  os_apply_latency.consume_next(
-    logger.get_tavg_ms(
+  os_apply_latency_ns.consume_next(
+    logger.get_tavg_ns(
       l_filestore_apply_latency));
 }
 
index d5711df039ea58e1629ce61932b1d301109667ab..002bbf9b314663b7d2efdcc4c512d8738b1021fc 100644 (file)
@@ -129,13 +129,13 @@ public:
   static int get_block_device_fsid(CephContext* cct, const string& path,
                                   uuid_d *fsid);
   struct FSPerfTracker {
-    PerfCounters::avg_tracker<uint64_t> os_commit_latency;
-    PerfCounters::avg_tracker<uint64_t> os_apply_latency;
+    PerfCounters::avg_tracker<uint64_t> os_commit_latency_ns;
+    PerfCounters::avg_tracker<uint64_t> os_apply_latency_ns;
 
     objectstore_perf_stat_t get_cur_stats() const {
       objectstore_perf_stat_t ret;
-      ret.os_commit_latency = os_commit_latency.current_avg();
-      ret.os_apply_latency = os_apply_latency.current_avg();
+      ret.os_commit_latency_ns = os_commit_latency_ns.current_avg();
+      ret.os_apply_latency_ns = os_apply_latency_ns.current_avg();
       return ret;
     }
 
index 42864574bd1119f197cdaaeb308a56ce9d7b05a2..b5219f569b624fbe8ea2f1a0a2b1bfbc0114bb22 100644 (file)
@@ -276,23 +276,39 @@ void request_redirect_t::generate_test_instances(list<request_redirect_t*>& o)
 
 void objectstore_perf_stat_t::dump(Formatter *f) const
 {
-  f->dump_unsigned("commit_latency_ms", os_commit_latency);
-  f->dump_unsigned("apply_latency_ms", os_apply_latency);
+  // *_ms values just for compatibility.
+  f->dump_float("commit_latency_ms", os_commit_latency_ns / 1000000.0);
+  f->dump_float("apply_latency_ms", os_apply_latency_ns / 1000000.0);
+  f->dump_unsigned("commit_latency_ns", os_commit_latency_ns);
+  f->dump_unsigned("apply_latency_ns", os_apply_latency_ns);
 }
 
 void objectstore_perf_stat_t::encode(bufferlist &bl) const
 {
-  ENCODE_START(1, 1, bl);
-  encode(os_commit_latency, bl);
-  encode(os_apply_latency, bl);
+  uint32_t commit_latency_ms = os_commit_latency_ns / 1000000;
+  uint32_t apply_latency_ms = os_apply_latency_ns / 1000000;
+  ENCODE_START(2, 1, bl);
+  encode(commit_latency_ms, bl); // for compatibility with older monitor.
+  encode(apply_latency_ms, bl); // for compatibility with older monitor.
+  encode(os_commit_latency_ns, bl);
+  encode(os_apply_latency_ns, bl);
   ENCODE_FINISH(bl);
 }
 
 void objectstore_perf_stat_t::decode(bufferlist::iterator &bl)
 {
-  DECODE_START(1, bl);
-  decode(os_commit_latency, bl);
-  decode(os_apply_latency, bl);
+  DECODE_START(2, bl);
+  uint32_t commit_latency_ms;
+  uint32_t apply_latency_ms;
+  decode(commit_latency_ms, bl);
+  decode(apply_latency_ms, bl);
+  if (struct_v >= 2) {
+    decode(os_commit_latency_ns, bl);
+    decode(os_apply_latency_ns, bl);
+  } else {
+    os_commit_latency_ns = commit_latency_ms * (uint64_t) 1000000;
+    os_apply_latency_ns = apply_latency_ms * (uint64_t) 1000000;
+  }
   DECODE_FINISH(bl);
 }
 
@@ -300,8 +316,8 @@ void objectstore_perf_stat_t::generate_test_instances(std::list<objectstore_perf
 {
   o.push_back(new objectstore_perf_stat_t());
   o.push_back(new objectstore_perf_stat_t());
-  o.back()->os_commit_latency = 20;
-  o.back()->os_apply_latency = 30;
+  o.back()->os_commit_latency_ns = 20000000;
+  o.back()->os_apply_latency_ns = 30000000;
 }
 
 // -- osd_stat_t --
index becea7d254593cf8e63e08cb4f20520593b01e75..5c1f51115980e7b489cf1d461e4db2365d902ade 100644 (file)
@@ -883,25 +883,25 @@ inline ostream& operator<<(ostream& out, const eversion_t& e) {
  * current perf information about the osd
  */
 struct objectstore_perf_stat_t {
-  // cur_op_latency is in ms since double add/sub are not associative
-  uint32_t os_commit_latency;
-  uint32_t os_apply_latency;
+  // cur_op_latency is in ns since double add/sub are not associative
+  uint64_t os_commit_latency_ns;
+  uint64_t os_apply_latency_ns;
 
   objectstore_perf_stat_t() :
-    os_commit_latency(0), os_apply_latency(0) {}
+    os_commit_latency_ns(0), os_apply_latency_ns(0) {}
 
   bool operator==(const objectstore_perf_stat_t &r) const {
-    return os_commit_latency == r.os_commit_latency &&
-      os_apply_latency == r.os_apply_latency;
+    return os_commit_latency_ns == r.os_commit_latency_ns &&
+      os_apply_latency_ns == r.os_apply_latency_ns;
   }
 
   void add(const objectstore_perf_stat_t &o) {
-    os_commit_latency += o.os_commit_latency;
-    os_apply_latency += o.os_apply_latency;
+    os_commit_latency_ns += o.os_commit_latency_ns;
+    os_apply_latency_ns += o.os_apply_latency_ns;
   }
   void sub(const objectstore_perf_stat_t &o) {
-    os_commit_latency -= o.os_commit_latency;
-    os_apply_latency -= o.os_apply_latency;
+    os_commit_latency_ns -= o.os_commit_latency_ns;
+    os_apply_latency_ns -= o.os_apply_latency_ns;
   }
   void dump(Formatter *f) const;
   void encode(bufferlist &bl) const;
index f95db56c17c85be38ec68694131c45ba6eb38054..bd0ee1845e793d76ef7d973207e4a529ea147c62 100644 (file)
@@ -170,16 +170,16 @@ class Module(MgrModule):
         data['num_osd_in'] = num_in
 
         osd_fill = list()
-        osd_apply_latency = list()
-        osd_commit_latency = list()
+        osd_apply_latency_ns = list()
+        osd_commit_latency_ns = list()
 
         osd_stats = self.get('osd_stats')
         for osd in osd_stats['osd_stats']:
             if osd['kb'] == 0:
                 continue
             osd_fill.append((float(osd['kb_used']) / float(osd['kb'])) * 100)
-            osd_apply_latency.append(osd['perf_stat']['apply_latency_ms'])
-            osd_commit_latency.append(osd['perf_stat']['commit_latency_ms'])
+            osd_apply_latency_ns.append(osd['perf_stat']['apply_latency_ns'])
+            osd_commit_latency_ns.append(osd['perf_stat']['commit_latency_ns'])
 
         try:
             data['osd_max_fill'] = max(osd_fill)
@@ -189,13 +189,13 @@ class Module(MgrModule):
             pass
 
         try:
-            data['osd_latency_apply_max'] = max(osd_apply_latency)
-            data['osd_latency_apply_min'] = min(osd_apply_latency)
-            data['osd_latency_apply_avg'] = avg(osd_apply_latency)
+            data['osd_latency_apply_max'] = max(osd_apply_latency_ns) / 1000000.0 # ns -> ms
+            data['osd_latency_apply_min'] = min(osd_apply_latency_ns) / 1000000.0 # ns -> ms
+            data['osd_latency_apply_avg'] = avg(osd_apply_latency_ns) / 1000000.0 # ns -> ms
 
-            data['osd_latency_commit_max'] = max(osd_commit_latency)
-            data['osd_latency_commit_min'] = min(osd_commit_latency)
-            data['osd_latency_commit_avg'] = avg(osd_commit_latency)
+            data['osd_latency_commit_max'] = max(osd_commit_latency_ns) / 1000000.0 # ns -> ms
+            data['osd_latency_commit_min'] = min(osd_commit_latency_ns) / 1000000.0 # ns -> ms
+            data['osd_latency_commit_avg'] = avg(osd_commit_latency_ns) / 1000000.0 # ns -> ms
         except ValueError:
             pass