From: Bartłomiej Święcki Date: Wed, 1 Feb 2017 09:03:06 +0000 (+0100) Subject: common, osd, tools: Add histograms to performance counters X-Git-Tag: v12.0.1~468^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=503402413c6a0039a083a1d2511bdff434c1dd37;p=ceph.git common, osd, tools: Add histograms to performance counters This change adds new performance counter type: histogram. Currently it does measure latency + request size (in bytes) of an op similarly to l_osd_op_(r|w|rw)_lat and l_osd_op_(r|w|rw)_(inb|outb). Histograms are 2-dimensional and gather probes for both values at the same time. This allows a more detailed analysis than what could be done with two separate 1-D histograms. Still the memory footprint is negligible. Since new data could break existing tools and the amount of data dumped is rather large, two new admin socket commands are introduced: perf histogram schema and perf histogram dump. Invocation of original command should remain compatible. There's also a new simple tool in src/tools/histogram_dump.py which does a live dump of one histogram in local daemon. Current configuration of histograms is hard-coded and should cover all common cases. If it turns out to be useful, in the future it could be loaded from the configuration. Signed-off-by: Bartłomiej Święcki --- diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d7cf3a2b57c1..3a4d3b098fd3 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -392,6 +392,7 @@ set(libcommon_files common/PrebufferedStreambuf.cc common/BackTrace.cc common/perf_counters.cc + common/perf_histogram.cc common/mutex_debug.cc common/Mutex.cc common/OutputDataSocket.cc diff --git a/src/common/ceph_context.cc b/src/common/ceph_context.cc index 82d3825113eb..bf47a224696e 100644 --- a/src/common/ceph_context.cc +++ b/src/common/ceph_context.cc @@ -4,6 +4,7 @@ * Ceph - scalable distributed file system * * Copyright (C) 2011 New Dream Network + * Copyright (C) 2017 OVH * * This is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -393,6 +394,17 @@ void CephContext::do_command(std::string command, cmdmap_t& cmdmap, command == "perf schema") { _perf_counters_collection->dump_formatted(f, true); } + else if (command == "perf histogram dump") { + std::string logger; + std::string counter; + cmd_getval(this, cmdmap, "logger", logger); + cmd_getval(this, cmdmap, "counter", counter); + _perf_counters_collection->dump_formatted_histograms(f, false, logger, + counter); + } + else if (command == "perf histogram schema") { + _perf_counters_collection->dump_formatted_histograms(f, true); + } else if (command == "perf reset") { std::string var; string section = command; @@ -555,8 +567,10 @@ CephContext::CephContext(uint32_t module_type_, int init_flags_) _admin_socket->register_command("1", "1", _admin_hook, ""); _admin_socket->register_command("perf dump", "perf dump name=logger,type=CephString,req=false name=counter,type=CephString,req=false", _admin_hook, "dump perfcounters value"); _admin_socket->register_command("perfcounters_schema", "perfcounters_schema", _admin_hook, ""); + _admin_socket->register_command("perf histogram dump", "perf histogram dump name=logger,type=CephString,req=false name=counter,type=CephString,req=false", _admin_hook, "dump perf histogram values"); _admin_socket->register_command("2", "2", _admin_hook, ""); _admin_socket->register_command("perf schema", "perf schema", _admin_hook, "dump perfcounters schema"); + _admin_socket->register_command("perf histogram schema", "perf histogram schema", _admin_hook, "dump perf histogram schema"); _admin_socket->register_command("perf reset", "perf reset name=var,type=CephString", _admin_hook, "perf reset : perf reset all or one perfcounter name"); _admin_socket->register_command("config show", "config show", _admin_hook, "dump current config settings"); _admin_socket->register_command("config set", "config set name=var,type=CephString name=val,type=CephString,n=N", _admin_hook, "config set [ ...]: set a config variable"); diff --git a/src/common/perf_counters.cc b/src/common/perf_counters.cc index 839bbb443c61..8f4e702419d8 100644 --- a/src/common/perf_counters.cc +++ b/src/common/perf_counters.cc @@ -4,6 +4,7 @@ * Ceph - scalable distributed file system * * Copyright (C) 2011 New Dream Network + * Copyright (C) 2017 OVH * * This is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -135,10 +136,13 @@ bool PerfCountersCollection::reset(const std::string &name) * @param counter name of counter within subsystem, e.g. "num_strays", * may be empty. * @param schema if true, output schema instead of current data. + * @param histograms if true, dump histogram values, + * if false dump all non-histogram counters */ -void PerfCountersCollection::dump_formatted( +void PerfCountersCollection::dump_formatted_generic( Formatter *f, bool schema, + bool histograms, const std::string &logger, const std::string &counter) { @@ -149,7 +153,7 @@ void PerfCountersCollection::dump_formatted( l != m_loggers.end(); ++l) { // Optionally filter on logger name, pass through counter filter if (logger.empty() || (*l)->get_name() == logger) { - (*l)->dump_formatted(f, schema, counter); + (*l)->dump_formatted_generic(f, schema, histograms, counter); } } f->close_section(); @@ -304,6 +308,21 @@ utime_t PerfCounters::tget(int idx) const return utime_t(v / 1000000000ull, v % 1000000000ull); } +void PerfCounters::hinc(int idx, int64_t x, int64_t y) +{ + if (!m_cct->_conf->perf) + return; + + assert(idx > m_lower_bound); + assert(idx < m_upper_bound); + + perf_counter_data_any_d& data(m_data[idx - m_lower_bound - 1]); + assert(data.type == (PERFCOUNTER_HISTOGRAM | PERFCOUNTER_U64)); + assert(data.histogram); + + data.histogram->inc(x, y); +} + pair PerfCounters::get_tavg_ms(int idx) const { if (!m_cct->_conf->perf) @@ -331,8 +350,8 @@ void PerfCounters::reset() } } -void PerfCounters::dump_formatted(Formatter *f, bool schema, - const std::string &counter) +void PerfCounters::dump_formatted_generic(Formatter *f, bool schema, + bool histograms, const std::string &counter) { f->open_object_section(m_name.c_str()); @@ -343,6 +362,12 @@ void PerfCounters::dump_formatted(Formatter *f, bool schema, continue; } + // Switch between normal and histogram view + bool is_histogram = (d->type & PERFCOUNTER_HISTOGRAM) != 0; + if (is_histogram != histograms) { + continue; + } + if (schema) { f->open_object_section(d->name); f->dump_int("type", d->type); @@ -375,6 +400,12 @@ void PerfCounters::dump_formatted(Formatter *f, bool schema, ceph_abort(); } f->close_section(); + } else if (d->type & PERFCOUNTER_HISTOGRAM) { + assert(d->type == (PERFCOUNTER_HISTOGRAM | PERFCOUNTER_U64)); + assert(d->histogram); + f->open_object_section(d->name); + d->histogram->dump_formatted(f); + f->close_section(); } else { uint64_t v = d->u64.read(); if (d->type & PERFCOUNTER_U64) { @@ -452,8 +483,18 @@ void PerfCountersBuilder::add_time_avg(int idx, const char *name, add_impl(idx, name, description, nick, PERFCOUNTER_TIME | PERFCOUNTER_LONGRUNAVG); } +void PerfCountersBuilder::add_histogram(int idx, const char *name, + PerfHistogramCommon::axis_config_d x_axis_config, + PerfHistogramCommon::axis_config_d y_axis_config, + const char *description, const char *nick) +{ + add_impl(idx, name, description, nick, PERFCOUNTER_U64 | PERFCOUNTER_HISTOGRAM, + unique_ptr>{new PerfHistogram<>{x_axis_config, y_axis_config}}); +} + void PerfCountersBuilder::add_impl(int idx, const char *name, - const char *description, const char *nick, int ty) + const char *description, const char *nick, int ty, + unique_ptr> histogram) { assert(idx > m_perf_counters->m_lower_bound); assert(idx < m_perf_counters->m_upper_bound); @@ -465,6 +506,7 @@ void PerfCountersBuilder::add_impl(int idx, const char *name, data.description = description; data.nick = nick; data.type = (enum perfcounter_type_d)ty; + data.histogram = std::move(histogram); } PerfCounters *PerfCountersBuilder::create_perf_counters() diff --git a/src/common/perf_counters.h b/src/common/perf_counters.h index 098555d564b7..1556ee582cf6 100644 --- a/src/common/perf_counters.h +++ b/src/common/perf_counters.h @@ -4,6 +4,7 @@ * Ceph - scalable distributed file system * * Copyright (C) 2011 New Dream Network + * Copyright (C) 2017 OVH * * This is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -17,6 +18,7 @@ #define CEPH_COMMON_PERF_COUNTERS_H #include "common/config_obs.h" +#include "common/perf_histogram.h" #include "common/Mutex.h" #include "include/utime.h" @@ -27,6 +29,7 @@ #include #include #include +#include class CephContext; class PerfCountersBuilder; @@ -38,8 +41,10 @@ enum perfcounter_type_d : uint8_t PERFCOUNTER_U64 = 0x2, PERFCOUNTER_LONGRUNAVG = 0x4, PERFCOUNTER_COUNTER = 0x8, + PERFCOUNTER_HISTOGRAM = 0x10, }; + /* * A PerfCounters object is usually associated with a single subsystem. * It contains counters which we modify to track performance and throughput @@ -49,11 +54,12 @@ enum perfcounter_type_d : uint8_t * 1) integer values & counters * 2) floating-point values & counters * 3) floating-point averages + * 4) 2D histograms of quantized value pairs * - * The difference between values and counters is in how they are initialized + * The difference between values, counters and histograms is in how they are initialized * and accessed. For a counter, use the inc(counter, amount) function (note * that amount defaults to 1 if you don't set it). For a value, use the - * set(index, value) function. + * set(index, value) function. For histogram use the hinc(value1, value2) function. * (For time, use the tinc and tset variants.) * * If for some reason you would like to reset your counters, you can do so using @@ -88,6 +94,9 @@ public: u64.set(a.first); avgcount.set(a.second); avgcount2.set(a.second); + if (other.histogram) { + histogram.reset(new PerfHistogram<>(*other.histogram)); + } } const char *name; @@ -97,6 +106,7 @@ public: atomic64_t u64; atomic64_t avgcount; atomic64_t avgcount2; + std::unique_ptr> histogram; void reset() { @@ -105,18 +115,9 @@ public: avgcount.set(0); avgcount2.set(0); } - } - - perf_counter_data_any_d& operator=(const perf_counter_data_any_d& other) { - name = other.name; - description = other.description; - nick = other.nick; - type = other.type; - pair a = other.read_avg(); - u64.set(a.first); - avgcount.set(a.second); - avgcount2.set(a.second); - return *this; + if (histogram) { + histogram->reset(); + } } /// read safely @@ -160,9 +161,17 @@ public: void tinc(int idx, ceph::timespan v); utime_t tget(int idx) const; + void hinc(int idx, int64_t x, int64_t y); + void reset(); void dump_formatted(ceph::Formatter *f, bool schema, - const std::string &counter = ""); + const std::string &counter = "") { + dump_formatted_generic(f, schema, false, counter); + } + void dump_formatted_histograms(ceph::Formatter *f, bool schema, + const std::string &counter = "") { + dump_formatted_generic(f, schema, true, counter); + } pair get_tavg_ms(int idx) const; const std::string& get_name() const; @@ -175,6 +184,8 @@ private: int lower_bound, int upper_bound); PerfCounters(const PerfCounters &rhs); PerfCounters& operator=(const PerfCounters &rhs); + void dump_formatted_generic(ceph::Formatter *f, bool schema, bool histograms, + const std::string &counter = ""); typedef std::vector perf_counter_data_vec_t; @@ -214,11 +225,18 @@ public: void remove(class PerfCounters *l); void clear(); bool reset(const std::string &name); - void dump_formatted( - ceph::Formatter *f, - bool schema, - const std::string &logger = "", - const std::string &counter = ""); + + void dump_formatted(ceph::Formatter *f, bool schema, + const std::string &logger = "", + const std::string &counter = "") { + dump_formatted_generic(f, schema, false, logger, counter); + } + + void dump_formatted_histograms(ceph::Formatter *f, bool schema, + const std::string &logger = "", + const std::string &counter = "") { + dump_formatted_generic(f, schema, true, logger, counter); + } typedef std::map CounterMap; @@ -226,6 +244,10 @@ public: void with_counters(std::function) const; private: + void dump_formatted_generic(ceph::Formatter *f, bool schema, bool histograms, + const std::string &logger = "", + const std::string &counter = ""); + CephContext *m_cct; /** Protects m_loggers */ @@ -262,12 +284,17 @@ public: const char *description=NULL, const char *nick = NULL); void add_time_avg(int key, const char *name, const char *description=NULL, const char *nick = NULL); + void add_histogram(int key, const char* name, + PerfHistogramCommon::axis_config_d x_axis_config, + PerfHistogramCommon::axis_config_d y_axis_config, + const char *description=NULL, const char* nick = NULL); PerfCounters* create_perf_counters(); private: PerfCountersBuilder(const PerfCountersBuilder &rhs); PerfCountersBuilder& operator=(const PerfCountersBuilder &rhs); void add_impl(int idx, const char *name, - const char *description, const char *nick, int ty); + const char *description, const char *nick, int ty, + unique_ptr> histogram = nullptr); PerfCounters *m_perf_counters; }; diff --git a/src/common/perf_histogram.cc b/src/common/perf_histogram.cc new file mode 100644 index 000000000000..035e32672846 --- /dev/null +++ b/src/common/perf_histogram.cc @@ -0,0 +1,121 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2017 OVH + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "common/perf_histogram.h" + +#include + +void PerfHistogramCommon::dump_formatted_axis( + ceph::Formatter *f, const PerfHistogramCommon::axis_config_d &ac) { + f->open_object_section("axis"); + + // Dump axis configuration + f->dump_string("name", ac.m_name); + f->dump_int("min", ac.m_min); + f->dump_int("quant_size", ac.m_quant_size); + f->dump_int("buckets", ac.m_buckets); + switch (ac.m_scale_type) { + case SCALE_LINEAR: + f->dump_string("scale_type", "linear"); + break; + case SCALE_LOG2: + f->dump_string("scale_type", "log2"); + break; + default: + assert(false && "Invalid scale type"); + } + + { + // Dump concrete ranges for axis buckets + f->open_array_section("ranges"); + auto ranges = get_axis_bucket_ranges(ac); + for (int i = 0; i < ac.m_buckets; ++i) { + f->open_object_section("bucket"); + if (i > 0) { + f->dump_int("min", ranges[i].first); + } + if (i < ac.m_buckets - 1) { + f->dump_int("max", ranges[i].second); + } + f->close_section(); + } + f->close_section(); + } + + f->close_section(); +} + +int64_t get_quants(int64_t i, PerfHistogramCommon::scale_type_d st) { + switch (st) { + case PerfHistogramCommon::SCALE_LINEAR: + return i; + case PerfHistogramCommon::SCALE_LOG2: + return int64_t(1) << (i - 1); + } + assert(false && "Invalid scale type"); +} + +int64_t PerfHistogramCommon::get_bucket_for_axis( + int64_t value, const PerfHistogramCommon::axis_config_d &ac) { + if (value < ac.m_min) { + return 0; + } + + value -= ac.m_min; + value /= ac.m_quant_size; + + switch (ac.m_scale_type) { + case SCALE_LINEAR: + return std::min(value + 1, ac.m_buckets - 1); + + case SCALE_LOG2: + for (int64_t i = 1; i < ac.m_buckets; ++i) { + if (value < get_quants(i, SCALE_LOG2)) { + return i; + } + } + return ac.m_buckets - 1; + } + assert(false && "Invalid scale type"); +} + +std::vector> +PerfHistogramCommon::get_axis_bucket_ranges( + const PerfHistogramCommon::axis_config_d &ac) { + std::vector> ret; + ret.resize(ac.m_buckets); + + // First bucket is for value < min + int64_t min = ac.m_min; + for (int64_t i = 1; i < ac.m_buckets - 1; i++) { + int64_t max_exclusive = + ac.m_min + get_quants(i, ac.m_scale_type) * ac.m_quant_size; + + // Dump bucket range + ret[i].first = min; + ret[i].second = max_exclusive - 1; + + // Shift min to next bucket + min = max_exclusive; + } + + // Fill up first and last element, note that in case m_buckets == 1 + // those will point to the same element, the order is important here + ret.front().second = ac.m_min - 1; + ret.back().first = min; + + ret.front().first = std::numeric_limits::min(); + ret.back().second = std::numeric_limits::max(); + return ret; +} diff --git a/src/common/perf_histogram.h b/src/common/perf_histogram.h new file mode 100644 index 000000000000..6d315905db3e --- /dev/null +++ b/src/common/perf_histogram.h @@ -0,0 +1,213 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2017 OVH + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef CEPH_COMMON_PERF_HISTOGRAM_H +#define CEPH_COMMON_PERF_HISTOGRAM_H + +#include "common/Formatter.h" +#include "include/atomic.h" +#include "include/int_types.h" + +#include + +class PerfHistogramCommon { +public: + enum scale_type_d : uint8_t { + SCALE_LINEAR = 1, + SCALE_LOG2 = 2, + }; + + struct axis_config_d { + const char *m_name; + scale_type_d m_scale_type; + int64_t m_min; + int64_t m_quant_size; + int32_t m_buckets; + }; + +protected: + /// Dump configuration of one axis to a formatter + static void dump_formatted_axis(ceph::Formatter *f, const axis_config_d &ac); + + /// Quantize given value and convert to bucket number on given axis + static int64_t get_bucket_for_axis(int64_t value, const axis_config_d &ac); + + /// Calculate inclusive ranges of axis values for each bucket on that axis + static std::vector> get_axis_bucket_ranges( + const axis_config_d &ac); +}; + +/// PerfHistogram does trace a histogram of input values. It's an extended +/// version of a standard histogram which does trace characteristics of a single +/// one value only. In this implementation, values can be traced in multiple +/// dimensions - i.e. we can create a histogram of input request size (first +/// dimension) and processing latency (second dimension). Creating standard +/// histogram out of such multidimensional one is trivial and requires summing +/// values across dimensions we're not interested in. +template +class PerfHistogram : public PerfHistogramCommon { +public: + /// Initialize new histogram object + PerfHistogram(std::initializer_list axes_config) { + assert(axes_config.size() == DIM && + "Invalid number of axis configuration objects"); + + int i = 0; + for (const auto &ac : axes_config) { + assert(ac.m_buckets > 0 && "Must have at least one bucket on axis"); + assert(ac.m_quant_size > 0 && + "Quantization unit must be non-zero positive integer value"); + + m_axes_config[i++] = ac; + } + + m_rawData.reset(new atomic64_t[get_raw_size()]); + } + + /// Copy from other histogram object + PerfHistogram(const PerfHistogram &other) + : m_axes_config(other.m_axes_config) { + int64_t size = get_raw_size(); + m_rawData.reset(new atomic64_t[size]); + for (int64_t i = 0; i < size; i++) { + m_rawData[i].set(other.m_rawData[i].read()); + } + } + + /// Set all histogram values to 0 + void reset() { + auto size = get_raw_size(); + for (auto i = size; --i >= 0;) { + m_rawData[i].set(0); + } + } + + /// Increase counter for given axis values by one + template + void inc(T... axis) { + auto index = get_raw_index_for_value(axis...); + m_rawData[index].add(1); + } + + /// Increase counter for given axis buckets by one + template + void inc_bucket(T... bucket) { + auto index = get_raw_index_for_bucket(bucket...); + m_rawData[index].add(1); + } + + /// Read value from given bucket + template + uint64_t read_bucket(T... bucket) const { + auto index = get_raw_index_for_bucket(bucket...); + return m_rawData[index].read(); + } + + /// Dump data to a Formatter object + void dump_formatted(ceph::Formatter *f) const { + // Dump axes configuration + f->open_array_section("axes"); + for (auto &ac : m_axes_config) { + dump_formatted_axis(f, ac); + } + f->close_section(); + + // Dump histogram values + dump_formatted_values(f); + } + +protected: + /// Raw data stored as linear space, internal indexes are calculated on + /// demand. + std::unique_ptr m_rawData; + + /// Configuration of axes + axis_config_d m_axes_config[DIM]; + + /// Dump histogram counters to a formatter + void dump_formatted_values(ceph::Formatter *f) const { + visit_values([f](int) { f->open_array_section("values"); }, + [f](int64_t value) { f->dump_unsigned("value", value); }, + [f](int) { f->close_section(); }); + } + + /// Get number of all histogram counters + int64_t get_raw_size() { + int64_t ret = 1; + for (int i = 0; i < DIM; ++i) { + ret *= m_axes_config[i].m_buckets; + } + return ret; + } + + /// Calculate m_rawData index from axis values + template + int64_t get_raw_index_for_value(T... axes) const { + static_assert(sizeof...(T) == DIM, "Incorrect number of arguments"); + return get_raw_index_internal<0>(get_bucket_for_axis, 0, axes...); + } + + /// Calculate m_rawData index from axis bucket numbers + template + int64_t get_raw_index_for_bucket(T... buckets) const { + static_assert(sizeof...(T) == DIM, "Incorrect number of arguments"); + return get_raw_index_internal<0>( + [](int64_t bucket, const axis_config_d &ac) { + assert(bucket >= 0 && "Bucket index can not be negative"); + assert(bucket < ac.m_buckets && "Bucket index too large"); + return bucket; + }, + 0, buckets...); + } + + template + int64_t get_raw_index_internal(F bucket_evaluator, int64_t startIndex, + int64_t value, T... tail) const { + static_assert(level + 1 + sizeof...(T) == DIM, + "Internal consistency check"); + auto &ac = m_axes_config[level]; + auto bucket = bucket_evaluator(value, ac); + return get_raw_index_internal( + bucket_evaluator, ac.m_buckets * startIndex + bucket, tail...); + } + + template + int64_t get_raw_index_internal(F, int64_t startIndex) const { + static_assert(level == DIM, "Internal consistency check"); + return startIndex; + } + + /// Visit all histogram counters, call onDimensionEnter / onDimensionLeave + /// when starting / finishing traversal + /// on given axis, call onValue when dumping raw histogram counter value. + template + void visit_values(FDE onDimensionEnter, FV onValue, FDL onDimensionLeave, + int level = 0, int startIndex = 0) const { + if (level == DIM) { + onValue(m_rawData[startIndex].read()); + return; + } + + onDimensionEnter(level); + auto &ac = m_axes_config[level]; + startIndex *= ac.m_buckets; + for (int32_t i = 0; i < ac.m_buckets; ++i, ++startIndex) { + visit_values(onDimensionEnter, onValue, onDimensionLeave, level + 1, + startIndex); + } + onDimensionLeave(level); + } +}; + +#endif diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 7afa5dc20055..f2c216fd689e 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -4,6 +4,7 @@ * Ceph - scalable distributed file system * * Copyright (C) 2004-2006 Sage Weil + * Copyright (C) 2017 OVH * * This is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -2512,6 +2513,25 @@ void OSD::create_logger() PerfCountersBuilder osd_plb(cct, "osd", l_osd_first, l_osd_last); + // Latency axis configuration for op histograms, values are in nanoseconds + PerfHistogramCommon::axis_config_d op_hist_x_axis_config{ + "Latency (usec)", + PerfHistogramCommon::SCALE_LOG2, ///< Latency in logarithmic scale + 0, ///< Start at 0 + 100000, ///< Quantization unit is 100usec + 32, ///< Enough to cover much longer than slow requests + }; + + // Op size axis configuration for op histograms, values are in bytes + PerfHistogramCommon::axis_config_d op_hist_y_axis_config{ + "Request size (bytes)", + PerfHistogramCommon::SCALE_LOG2, ///< Request size in logarithmic scale + 0, ///< Start at 0 + 512, ///< Quantization unit is 512 bytes + 32, ///< Enough to cover requests larger than GB + }; + + osd_plb.add_u64(l_osd_op_wip, "op_wip", "Replication operations currently being processed (primary)"); // rep ops currently being processed (primary) osd_plb.add_u64_counter(l_osd_op, "op", @@ -2533,6 +2553,9 @@ void OSD::create_logger() "Client data read"); // client read out bytes osd_plb.add_time_avg(l_osd_op_r_lat, "op_r_latency", "Latency of read operation (including queue time)"); // client read latency + osd_plb.add_histogram(l_osd_op_r_lat_outb_hist, "op_r_latency_out_bytes_histogram", + op_hist_x_axis_config, op_hist_y_axis_config, + "Histogram of operation latency (including queue time) + data read"); osd_plb.add_time_avg(l_osd_op_r_process_lat, "op_r_process_latency", "Latency of read operation (excluding queue time)"); // client read process latency osd_plb.add_time_avg(l_osd_op_r_prepare_lat, "op_r_prepare_latency", @@ -2543,6 +2566,9 @@ void OSD::create_logger() "Client data written"); // client write in bytes osd_plb.add_time_avg(l_osd_op_w_lat, "op_w_latency", "Latency of write operation (including queue time)"); // client write latency + osd_plb.add_histogram(l_osd_op_w_lat_inb_hist, "op_w_latency_in_bytes_histogram", + op_hist_x_axis_config, op_hist_y_axis_config, + "Histogram of operation latency (including queue time) + data written"); osd_plb.add_time_avg(l_osd_op_w_process_lat, "op_w_process_latency", "Latency of write operation (excluding queue time)"); // client write process latency osd_plb.add_time_avg(l_osd_op_w_prepare_lat, "op_w_prepare_latency", @@ -2555,6 +2581,12 @@ void OSD::create_logger() "Client read-modify-write operations read out "); // client rmw out bytes osd_plb.add_time_avg(l_osd_op_rw_lat, "op_rw_latency", "Latency of read-modify-write operation (including queue time)"); // client rmw latency + osd_plb.add_histogram(l_osd_op_rw_lat_inb_hist, "op_rw_latency_in_bytes_histogram", + op_hist_x_axis_config, op_hist_y_axis_config, + "Histogram of rw operation latency (including queue time) + data written"); + osd_plb.add_histogram(l_osd_op_rw_lat_outb_hist, "op_rw_latency_out_bytes_histogram", + op_hist_x_axis_config, op_hist_y_axis_config, + "Histogram of rw operation latency (including queue time) + data read"); osd_plb.add_time_avg(l_osd_op_rw_process_lat, "op_rw_process_latency", "Latency of read-modify-write operation (excluding queue time)"); // client rmw process latency osd_plb.add_time_avg(l_osd_op_rw_prepare_lat, "op_rw_prepare_latency", diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 3d59c967ec72..45c66e66f945 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -70,17 +70,21 @@ enum { l_osd_op_r, l_osd_op_r_outb, l_osd_op_r_lat, + l_osd_op_r_lat_outb_hist, l_osd_op_r_process_lat, l_osd_op_r_prepare_lat, l_osd_op_w, l_osd_op_w_inb, l_osd_op_w_lat, + l_osd_op_w_lat_inb_hist, l_osd_op_w_process_lat, l_osd_op_w_prepare_lat, l_osd_op_rw, l_osd_op_rw_inb, l_osd_op_rw_outb, l_osd_op_rw_lat, + l_osd_op_rw_lat_inb_hist, + l_osd_op_rw_lat_outb_hist, l_osd_op_rw_process_lat, l_osd_op_rw_prepare_lat, diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index cfd88d4b5684..362ee75a8ce6 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -3189,16 +3189,20 @@ void PrimaryLogPG::log_op_stats(OpContext *ctx) osd->logger->inc(l_osd_op_rw_inb, inb); osd->logger->inc(l_osd_op_rw_outb, outb); osd->logger->tinc(l_osd_op_rw_lat, latency); + osd->logger->hinc(l_osd_op_rw_lat_inb_hist, latency.to_nsec(), inb); + osd->logger->hinc(l_osd_op_rw_lat_outb_hist, latency.to_nsec(), outb); osd->logger->tinc(l_osd_op_rw_process_lat, process_latency); } else if (op->may_read()) { osd->logger->inc(l_osd_op_r); osd->logger->inc(l_osd_op_r_outb, outb); osd->logger->tinc(l_osd_op_r_lat, latency); + osd->logger->hinc(l_osd_op_r_lat_outb_hist, latency.to_nsec(), outb); osd->logger->tinc(l_osd_op_r_process_lat, process_latency); } else if (op->may_write() || op->may_cache()) { osd->logger->inc(l_osd_op_w); osd->logger->inc(l_osd_op_w_inb, inb); osd->logger->tinc(l_osd_op_w_lat, latency); + osd->logger->hinc(l_osd_op_w_lat_inb_hist, latency.to_nsec(), inb); osd->logger->tinc(l_osd_op_w_process_lat, process_latency); } else ceph_abort(); diff --git a/src/test/common/CMakeLists.txt b/src/test/common/CMakeLists.txt index 51726638981f..35e3a7389d45 100644 --- a/src/test/common/CMakeLists.txt +++ b/src/test/common/CMakeLists.txt @@ -208,6 +208,13 @@ add_executable(unittest_shunique_lock add_ceph_unittest(unittest_shunique_lock ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/unittest_shunique_lock) target_link_libraries(unittest_shunique_lock global ${BLKID_LIBRARIES} ${EXTRALIBS}) +# unittest_perf_histogram +add_executable(unittest_perf_histogram + test_perf_histogram.cc + ) +add_ceph_unittest(unittest_perf_histogram ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/unittest_perf_histogram) +target_link_libraries(unittest_perf_histogram global ${BLKID_LIBRARIES} ${EXTRALIBS}) + # unittest_global_doublefree if(WITH_CEPHFS) add_executable(unittest_global_doublefree diff --git a/src/test/common/test_perf_histogram.cc b/src/test/common/test_perf_histogram.cc new file mode 100644 index 000000000000..ca52190da7c2 --- /dev/null +++ b/src/test/common/test_perf_histogram.cc @@ -0,0 +1,206 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 &smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2017 OVH + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License version 2, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "common/perf_histogram.h" + +#include "gtest/gtest.h" + +template +class PerfHistogramAccessor : public PerfHistogram { +public: + typedef PerfHistogram Base; + + using Base::PerfHistogram; + + static int64_t get_bucket_for_axis( + int64_t value, const PerfHistogramCommon::axis_config_d& axis_config) { + return Base::get_bucket_for_axis(value, axis_config); + } + + static std::vector> get_axis_bucket_ranges( + const PerfHistogramCommon::axis_config_d& axis_config) { + return Base::get_axis_bucket_ranges(axis_config); + } + + template + void visit_values(F1 f1, F2 f2, F3 f3) { + Base::visit_values(f1, f2, f3); + } +}; + +TEST(PerfHistogram, GetBucketForAxis) { + PerfHistogramCommon::axis_config_d linear{ + "", PerfHistogramCommon::SCALE_LINEAR, 100, 3, 4}; + + ASSERT_EQ(0, PerfHistogramAccessor<1>::get_bucket_for_axis(-1, linear)); + ASSERT_EQ(0, PerfHistogramAccessor<1>::get_bucket_for_axis(0, linear)); + ASSERT_EQ(0, PerfHistogramAccessor<1>::get_bucket_for_axis(99, linear)); + ASSERT_EQ(1, PerfHistogramAccessor<1>::get_bucket_for_axis(100, linear)); + ASSERT_EQ(1, PerfHistogramAccessor<1>::get_bucket_for_axis(101, linear)); + ASSERT_EQ(1, PerfHistogramAccessor<1>::get_bucket_for_axis(102, linear)); + ASSERT_EQ(2, PerfHistogramAccessor<1>::get_bucket_for_axis(103, linear)); + ASSERT_EQ(2, PerfHistogramAccessor<1>::get_bucket_for_axis(105, linear)); + ASSERT_EQ(3, PerfHistogramAccessor<1>::get_bucket_for_axis(106, linear)); + ASSERT_EQ(3, PerfHistogramAccessor<1>::get_bucket_for_axis(108, linear)); + ASSERT_EQ(3, PerfHistogramAccessor<1>::get_bucket_for_axis(109, linear)); + + ASSERT_EQ(0, PerfHistogramAccessor<1>::get_bucket_for_axis( + std::numeric_limits::min(), linear)); + ASSERT_EQ(3, PerfHistogramAccessor<1>::get_bucket_for_axis( + std::numeric_limits::max(), linear)); + + PerfHistogramCommon::axis_config_d logarithmic{ + "", PerfHistogramCommon::SCALE_LOG2, 100, 3, 5}; + + ASSERT_EQ(0, PerfHistogramAccessor<1>::get_bucket_for_axis(-1, logarithmic)); + ASSERT_EQ(0, PerfHistogramAccessor<1>::get_bucket_for_axis(0, logarithmic)); + ASSERT_EQ(0, PerfHistogramAccessor<1>::get_bucket_for_axis(99, logarithmic)); + ASSERT_EQ(1, PerfHistogramAccessor<1>::get_bucket_for_axis(100, logarithmic)); + ASSERT_EQ(1, PerfHistogramAccessor<1>::get_bucket_for_axis(101, logarithmic)); + ASSERT_EQ(1, PerfHistogramAccessor<1>::get_bucket_for_axis(102, logarithmic)); + ASSERT_EQ(2, PerfHistogramAccessor<1>::get_bucket_for_axis(103, logarithmic)); + ASSERT_EQ(2, PerfHistogramAccessor<1>::get_bucket_for_axis(105, logarithmic)); + ASSERT_EQ(3, PerfHistogramAccessor<1>::get_bucket_for_axis(106, logarithmic)); + ASSERT_EQ(3, PerfHistogramAccessor<1>::get_bucket_for_axis(111, logarithmic)); + ASSERT_EQ(4, PerfHistogramAccessor<1>::get_bucket_for_axis(112, logarithmic)); + ASSERT_EQ(4, PerfHistogramAccessor<1>::get_bucket_for_axis(124, logarithmic)); + + ASSERT_EQ(0, PerfHistogramAccessor<1>::get_bucket_for_axis( + std::numeric_limits::min(), logarithmic)); + ASSERT_EQ(4, PerfHistogramAccessor<1>::get_bucket_for_axis( + std::numeric_limits::max(), logarithmic)); +} + +static const int XS = 5; +static const int YS = 7; + +static const auto x_axis = PerfHistogramCommon::axis_config_d{ + "x", PerfHistogramCommon::SCALE_LINEAR, 0, 1, XS}; +static const auto y_axis = PerfHistogramCommon::axis_config_d{ + "y", PerfHistogramCommon::SCALE_LOG2, 0, 1, YS}; + +TEST(PerfHistogram, ZeroedInitially) { + PerfHistogramAccessor<2> h{x_axis, y_axis}; + for (int x = 0; x < XS; ++x) { + for (int y = 0; y < YS; ++y) { + ASSERT_EQ(0, h.read_bucket(x, y)); + } + } +} + +TEST(PerfHistogram, SimpleValues) { + PerfHistogramAccessor<2> h{x_axis, y_axis}; + ASSERT_EQ(0, h.read_bucket(1, 1)); + h.inc(0, 0); + ASSERT_EQ(1, h.read_bucket(1, 1)); + + ASSERT_EQ(0, h.read_bucket(2, 2)); + h.inc(1, 1); + ASSERT_EQ(1, h.read_bucket(2, 2)); + + ASSERT_EQ(0, h.read_bucket(3, 3)); + h.inc(2, 2); + ASSERT_EQ(1, h.read_bucket(3, 3)); + + ASSERT_EQ(0, h.read_bucket(4, 3)); + h.inc(3, 3); + ASSERT_EQ(1, h.read_bucket(4, 3)); +} + +TEST(PerfHistogram, OneBucketRange) { + auto ranges = PerfHistogramAccessor<1>::get_axis_bucket_ranges( + PerfHistogramCommon::axis_config_d{"", PerfHistogramCommon::SCALE_LINEAR, + 0, 1, 1}); + + ASSERT_EQ(1, ranges.size()); + ASSERT_EQ(std::numeric_limits::min(), ranges[0].first); + ASSERT_EQ(std::numeric_limits::max(), ranges[0].second); +} + +TEST(PerfHistogram, TwoBucketRange) { + auto ranges = PerfHistogramAccessor<1>::get_axis_bucket_ranges( + PerfHistogramCommon::axis_config_d{"", PerfHistogramCommon::SCALE_LINEAR, + 0, 1, 2}); + + ASSERT_EQ(2, ranges.size()); + ASSERT_EQ(std::numeric_limits::min(), ranges[0].first); + ASSERT_EQ(-1, ranges[0].second); + ASSERT_EQ(0, ranges[1].first); + ASSERT_EQ(std::numeric_limits::max(), ranges[1].second); +} + +TEST(PerfHistogram, LinearBucketRange) { + PerfHistogramCommon::axis_config_d ac{"", PerfHistogramCommon::SCALE_LINEAR, + 100, 10, 15}; + auto ranges = PerfHistogramAccessor<1>::get_axis_bucket_ranges(ac); + + for (size_t i = 0; i < ranges.size(); ++i) { + ASSERT_EQ( + i, PerfHistogramAccessor<1>::get_bucket_for_axis(ranges[i].first, ac)); + ASSERT_EQ( + i, PerfHistogramAccessor<1>::get_bucket_for_axis(ranges[i].second, ac)); + } + + for (size_t i = 1; i < ranges.size(); ++i) { + ASSERT_EQ(ranges[i].first, ranges[i - 1].second + 1); + } +} + +TEST(PerfHistogram, LogarithmicBucketRange) { + PerfHistogramCommon::axis_config_d ac{"", PerfHistogramCommon::SCALE_LOG2, + 100, 10, 15}; + auto ranges = PerfHistogramAccessor<1>::get_axis_bucket_ranges(ac); + + for (size_t i = 0; i < ranges.size(); ++i) { + ASSERT_EQ( + i, PerfHistogramAccessor<1>::get_bucket_for_axis(ranges[i].first, ac)); + ASSERT_EQ( + i, PerfHistogramAccessor<1>::get_bucket_for_axis(ranges[i].second, ac)); + } + + for (size_t i = 1; i < ranges.size(); ++i) { + ASSERT_EQ(ranges[i].first, ranges[i - 1].second + 1); + } +} + +TEST(PerfHistogram, AxisAddressing) { + PerfHistogramCommon::axis_config_d ac1{"", PerfHistogramCommon::SCALE_LINEAR, + 0, 1, 7}; + PerfHistogramCommon::axis_config_d ac2{"", PerfHistogramCommon::SCALE_LINEAR, + 0, 1, 9}; + PerfHistogramCommon::axis_config_d ac3{"", PerfHistogramCommon::SCALE_LINEAR, + 0, 1, 11}; + + PerfHistogramAccessor<3> h{ac1, ac2, ac3}; + + h.inc(1, 2, 3); // Should end up in buckets 2, 3, 4 + h.inc_bucket(4, 5, 6); + + std::vector rawValues; + h.visit_values([](int) {}, + [&rawValues](int64_t value) { rawValues.push_back(value); }, + [](int) {}); + + for (size_t i = 0; i < rawValues.size(); ++i) { + switch (i) { + case 4 + 11 * (3 + 9 * 2): + case 6 + 11 * (5 + 9 * 4): + ASSERT_EQ(1, rawValues[i]); + break; + default: + ASSERT_EQ(0, rawValues[i]); + break; + } + } +} diff --git a/src/tools/histogram_dump.py b/src/tools/histogram_dump.py new file mode 100755 index 000000000000..bafc24b00ea7 --- /dev/null +++ b/src/tools/histogram_dump.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python +# coding: utf-8 +# +# Ceph - scalable distributed file system +# +# Copyright (C) 2017 OVH +# +# This is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public +# License version 2, as published by the Free Software +# Foundation. See file COPYING. +# + +import json +import subprocess +import time +import os +import argparse + + +def shorten(val): + if isinstance(val, str): + return val + for u in ((3, ''), (6, 'k'), (9, 'M'), (12, 'G'), (15, 'T')): + if val < 10**u[0]: + return "{}{}".format(int(val / (10 ** (u[0]-3))), u[1]) + return val + + +def print_histogram(asok, logger, counter, last): + + try: + out = subprocess.check_output( + "ceph --admin-daemon {} perf histogram dump".format(asok), + shell=True) + j = json.loads(out.decode('utf-8')) + except Exception as e: + return (last, + "Couldn't connect to admin socket, result: \n{}".format(e)) + + current = j['osd'][counter]['values'] + axes = j['osd'][counter]['axes'] + content = "" + + content += "{}:\n".format(axes[1]['name']) + for r in axes[1]['ranges']: + content += "{0: >4} ".format( + shorten(r['min']) if 'min' in r else '') + content += "\n" + for r in axes[1]['ranges']: + content += "{0: >4} ".format( + shorten(r['max']) if 'max' in r else '') + content += "\n" + + content += ("{0: >"+str(len(axes[1]['ranges'])*5+14)+"}:\n").format( + axes[0]['name']) + + for i in range(len(current)): + for j in range(len(current[i])): + try: + diff = current[i][j] - last[i][j] + except IndexError: + diff = '-' + content += "{0: >4} ".format(shorten(diff)) + + r = axes[0]['ranges'][i] + content += "{0: >6} : {1}\n".format( + shorten(r['min']) if 'min' in r else '', + shorten(r['max']) if 'max' in r else '') + return (current, content) + + +def loop_print(asok, logger, counter): + last = [] + while True: + + last, content = print_histogram(asok, logger, counter, last) + print("{}{}".format("\n"*100, content)) + time.sleep(1) + + +def main(): + parser = argparse.ArgumentParser( + description='Continuously display ceph performance histogram') + parser.add_argument( + '--asok', + type=str, + default='/var/run/ceph/*.asok', + help='Path to asok file, can use wildcards') + parser.add_argument( + '--logger', + type=str, + default='osd') + parser.add_argument( + '--counter', + type=str, + default='op_w_latency_in_bytes_histogram') + args = parser.parse_args() + + loop_print(args.asok, args.logger, args.counter) + + +if __name__ == '__main__': + main()