This change adds new performance counter type: histogram.
Currently it does measure latency + request size (in bytes) of
an op similarly to l_osd_op_(r|w|rw)_lat and
l_osd_op_(r|w|rw)_(inb|outb). Histograms are 2-dimensional and
gather probes for both values at the same time. This allows
a more detailed analysis than what could be done with two
separate 1-D histograms. Still the memory footprint is negligible.
Since new data could break existing tools and the amount of data
dumped is rather large, two new admin socket commands are
introduced: perf histogram schema and perf histogram dump.
Invocation of original command should remain compatible.
There's also a new simple tool in src/tools/histogram_dump.py
which does a live dump of one histogram in local daemon.
Current configuration of histograms is hard-coded and should
cover all common cases. If it turns out to be useful, in the
future it could be loaded from the configuration.
Signed-off-by: Bartłomiej Święcki <bartlomiej.swiecki@corp.ovh.com>
common/PrebufferedStreambuf.cc
common/BackTrace.cc
common/perf_counters.cc
+ common/perf_histogram.cc
common/mutex_debug.cc
common/Mutex.cc
common/OutputDataSocket.cc
* Ceph - scalable distributed file system
*
* Copyright (C) 2011 New Dream Network
+ * Copyright (C) 2017 OVH
*
* This is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
command == "perf schema") {
_perf_counters_collection->dump_formatted(f, true);
}
+ else if (command == "perf histogram dump") {
+ std::string logger;
+ std::string counter;
+ cmd_getval(this, cmdmap, "logger", logger);
+ cmd_getval(this, cmdmap, "counter", counter);
+ _perf_counters_collection->dump_formatted_histograms(f, false, logger,
+ counter);
+ }
+ else if (command == "perf histogram schema") {
+ _perf_counters_collection->dump_formatted_histograms(f, true);
+ }
else if (command == "perf reset") {
std::string var;
string section = command;
_admin_socket->register_command("1", "1", _admin_hook, "");
_admin_socket->register_command("perf dump", "perf dump name=logger,type=CephString,req=false name=counter,type=CephString,req=false", _admin_hook, "dump perfcounters value");
_admin_socket->register_command("perfcounters_schema", "perfcounters_schema", _admin_hook, "");
+ _admin_socket->register_command("perf histogram dump", "perf histogram dump name=logger,type=CephString,req=false name=counter,type=CephString,req=false", _admin_hook, "dump perf histogram values");
_admin_socket->register_command("2", "2", _admin_hook, "");
_admin_socket->register_command("perf schema", "perf schema", _admin_hook, "dump perfcounters schema");
+ _admin_socket->register_command("perf histogram schema", "perf histogram schema", _admin_hook, "dump perf histogram schema");
_admin_socket->register_command("perf reset", "perf reset name=var,type=CephString", _admin_hook, "perf reset <name>: perf reset all or one perfcounter name");
_admin_socket->register_command("config show", "config show", _admin_hook, "dump current config settings");
_admin_socket->register_command("config set", "config set name=var,type=CephString name=val,type=CephString,n=N", _admin_hook, "config set <field> <val> [<val> ...]: set a config variable");
* Ceph - scalable distributed file system
*
* Copyright (C) 2011 New Dream Network
+ * Copyright (C) 2017 OVH
*
* This is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* @param counter name of counter within subsystem, e.g. "num_strays",
* may be empty.
* @param schema if true, output schema instead of current data.
+ * @param histograms if true, dump histogram values,
+ * if false dump all non-histogram counters
*/
-void PerfCountersCollection::dump_formatted(
+void PerfCountersCollection::dump_formatted_generic(
Formatter *f,
bool schema,
+ bool histograms,
const std::string &logger,
const std::string &counter)
{
l != m_loggers.end(); ++l) {
// Optionally filter on logger name, pass through counter filter
if (logger.empty() || (*l)->get_name() == logger) {
- (*l)->dump_formatted(f, schema, counter);
+ (*l)->dump_formatted_generic(f, schema, histograms, counter);
}
}
f->close_section();
return utime_t(v / 1000000000ull, v % 1000000000ull);
}
+void PerfCounters::hinc(int idx, int64_t x, int64_t y)
+{
+ if (!m_cct->_conf->perf)
+ return;
+
+ assert(idx > m_lower_bound);
+ assert(idx < m_upper_bound);
+
+ perf_counter_data_any_d& data(m_data[idx - m_lower_bound - 1]);
+ assert(data.type == (PERFCOUNTER_HISTOGRAM | PERFCOUNTER_U64));
+ assert(data.histogram);
+
+ data.histogram->inc(x, y);
+}
+
pair<uint64_t, uint64_t> PerfCounters::get_tavg_ms(int idx) const
{
if (!m_cct->_conf->perf)
}
}
-void PerfCounters::dump_formatted(Formatter *f, bool schema,
- const std::string &counter)
+void PerfCounters::dump_formatted_generic(Formatter *f, bool schema,
+ bool histograms, const std::string &counter)
{
f->open_object_section(m_name.c_str());
continue;
}
+ // Switch between normal and histogram view
+ bool is_histogram = (d->type & PERFCOUNTER_HISTOGRAM) != 0;
+ if (is_histogram != histograms) {
+ continue;
+ }
+
if (schema) {
f->open_object_section(d->name);
f->dump_int("type", d->type);
ceph_abort();
}
f->close_section();
+ } else if (d->type & PERFCOUNTER_HISTOGRAM) {
+ assert(d->type == (PERFCOUNTER_HISTOGRAM | PERFCOUNTER_U64));
+ assert(d->histogram);
+ f->open_object_section(d->name);
+ d->histogram->dump_formatted(f);
+ f->close_section();
} else {
uint64_t v = d->u64.read();
if (d->type & PERFCOUNTER_U64) {
add_impl(idx, name, description, nick, PERFCOUNTER_TIME | PERFCOUNTER_LONGRUNAVG);
}
+void PerfCountersBuilder::add_histogram(int idx, const char *name,
+ PerfHistogramCommon::axis_config_d x_axis_config,
+ PerfHistogramCommon::axis_config_d y_axis_config,
+ const char *description, const char *nick)
+{
+ add_impl(idx, name, description, nick, PERFCOUNTER_U64 | PERFCOUNTER_HISTOGRAM,
+ unique_ptr<PerfHistogram<>>{new PerfHistogram<>{x_axis_config, y_axis_config}});
+}
+
void PerfCountersBuilder::add_impl(int idx, const char *name,
- const char *description, const char *nick, int ty)
+ const char *description, const char *nick, int ty,
+ unique_ptr<PerfHistogram<>> histogram)
{
assert(idx > m_perf_counters->m_lower_bound);
assert(idx < m_perf_counters->m_upper_bound);
data.description = description;
data.nick = nick;
data.type = (enum perfcounter_type_d)ty;
+ data.histogram = std::move(histogram);
}
PerfCounters *PerfCountersBuilder::create_perf_counters()
* Ceph - scalable distributed file system
*
* Copyright (C) 2011 New Dream Network
+ * Copyright (C) 2017 OVH
*
* This is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
#define CEPH_COMMON_PERF_COUNTERS_H
#include "common/config_obs.h"
+#include "common/perf_histogram.h"
#include "common/Mutex.h"
#include "include/utime.h"
#include <stdint.h>
#include <string>
#include <vector>
+#include <memory>
class CephContext;
class PerfCountersBuilder;
PERFCOUNTER_U64 = 0x2,
PERFCOUNTER_LONGRUNAVG = 0x4,
PERFCOUNTER_COUNTER = 0x8,
+ PERFCOUNTER_HISTOGRAM = 0x10,
};
+
/*
* A PerfCounters object is usually associated with a single subsystem.
* It contains counters which we modify to track performance and throughput
* 1) integer values & counters
* 2) floating-point values & counters
* 3) floating-point averages
+ * 4) 2D histograms of quantized value pairs
*
- * The difference between values and counters is in how they are initialized
+ * The difference between values, counters and histograms is in how they are initialized
* and accessed. For a counter, use the inc(counter, amount) function (note
* that amount defaults to 1 if you don't set it). For a value, use the
- * set(index, value) function.
+ * set(index, value) function. For histogram use the hinc(value1, value2) function.
* (For time, use the tinc and tset variants.)
*
* If for some reason you would like to reset your counters, you can do so using
u64.set(a.first);
avgcount.set(a.second);
avgcount2.set(a.second);
+ if (other.histogram) {
+ histogram.reset(new PerfHistogram<>(*other.histogram));
+ }
}
const char *name;
atomic64_t u64;
atomic64_t avgcount;
atomic64_t avgcount2;
+ std::unique_ptr<PerfHistogram<>> histogram;
void reset()
{
avgcount.set(0);
avgcount2.set(0);
}
- }
-
- perf_counter_data_any_d& operator=(const perf_counter_data_any_d& other) {
- name = other.name;
- description = other.description;
- nick = other.nick;
- type = other.type;
- pair<uint64_t,uint64_t> a = other.read_avg();
- u64.set(a.first);
- avgcount.set(a.second);
- avgcount2.set(a.second);
- return *this;
+ if (histogram) {
+ histogram->reset();
+ }
}
/// read <sum, count> safely
void tinc(int idx, ceph::timespan v);
utime_t tget(int idx) const;
+ void hinc(int idx, int64_t x, int64_t y);
+
void reset();
void dump_formatted(ceph::Formatter *f, bool schema,
- const std::string &counter = "");
+ const std::string &counter = "") {
+ dump_formatted_generic(f, schema, false, counter);
+ }
+ void dump_formatted_histograms(ceph::Formatter *f, bool schema,
+ const std::string &counter = "") {
+ dump_formatted_generic(f, schema, true, counter);
+ }
pair<uint64_t, uint64_t> get_tavg_ms(int idx) const;
const std::string& get_name() const;
int lower_bound, int upper_bound);
PerfCounters(const PerfCounters &rhs);
PerfCounters& operator=(const PerfCounters &rhs);
+ void dump_formatted_generic(ceph::Formatter *f, bool schema, bool histograms,
+ const std::string &counter = "");
typedef std::vector<perf_counter_data_any_d> perf_counter_data_vec_t;
void remove(class PerfCounters *l);
void clear();
bool reset(const std::string &name);
- void dump_formatted(
- ceph::Formatter *f,
- bool schema,
- const std::string &logger = "",
- const std::string &counter = "");
+
+ void dump_formatted(ceph::Formatter *f, bool schema,
+ const std::string &logger = "",
+ const std::string &counter = "") {
+ dump_formatted_generic(f, schema, false, logger, counter);
+ }
+
+ void dump_formatted_histograms(ceph::Formatter *f, bool schema,
+ const std::string &logger = "",
+ const std::string &counter = "") {
+ dump_formatted_generic(f, schema, true, logger, counter);
+ }
typedef std::map<std::string,
PerfCounters::perf_counter_data_any_d *> CounterMap;
void with_counters(std::function<void(const CounterMap &)>) const;
private:
+ void dump_formatted_generic(ceph::Formatter *f, bool schema, bool histograms,
+ const std::string &logger = "",
+ const std::string &counter = "");
+
CephContext *m_cct;
/** Protects m_loggers */
const char *description=NULL, const char *nick = NULL);
void add_time_avg(int key, const char *name,
const char *description=NULL, const char *nick = NULL);
+ void add_histogram(int key, const char* name,
+ PerfHistogramCommon::axis_config_d x_axis_config,
+ PerfHistogramCommon::axis_config_d y_axis_config,
+ const char *description=NULL, const char* nick = NULL);
PerfCounters* create_perf_counters();
private:
PerfCountersBuilder(const PerfCountersBuilder &rhs);
PerfCountersBuilder& operator=(const PerfCountersBuilder &rhs);
void add_impl(int idx, const char *name,
- const char *description, const char *nick, int ty);
+ const char *description, const char *nick, int ty,
+ unique_ptr<PerfHistogram<>> histogram = nullptr);
PerfCounters *m_perf_counters;
};
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2017 OVH
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include "common/perf_histogram.h"
+
+#include <limits>
+
+void PerfHistogramCommon::dump_formatted_axis(
+ ceph::Formatter *f, const PerfHistogramCommon::axis_config_d &ac) {
+ f->open_object_section("axis");
+
+ // Dump axis configuration
+ f->dump_string("name", ac.m_name);
+ f->dump_int("min", ac.m_min);
+ f->dump_int("quant_size", ac.m_quant_size);
+ f->dump_int("buckets", ac.m_buckets);
+ switch (ac.m_scale_type) {
+ case SCALE_LINEAR:
+ f->dump_string("scale_type", "linear");
+ break;
+ case SCALE_LOG2:
+ f->dump_string("scale_type", "log2");
+ break;
+ default:
+ assert(false && "Invalid scale type");
+ }
+
+ {
+ // Dump concrete ranges for axis buckets
+ f->open_array_section("ranges");
+ auto ranges = get_axis_bucket_ranges(ac);
+ for (int i = 0; i < ac.m_buckets; ++i) {
+ f->open_object_section("bucket");
+ if (i > 0) {
+ f->dump_int("min", ranges[i].first);
+ }
+ if (i < ac.m_buckets - 1) {
+ f->dump_int("max", ranges[i].second);
+ }
+ f->close_section();
+ }
+ f->close_section();
+ }
+
+ f->close_section();
+}
+
+int64_t get_quants(int64_t i, PerfHistogramCommon::scale_type_d st) {
+ switch (st) {
+ case PerfHistogramCommon::SCALE_LINEAR:
+ return i;
+ case PerfHistogramCommon::SCALE_LOG2:
+ return int64_t(1) << (i - 1);
+ }
+ assert(false && "Invalid scale type");
+}
+
+int64_t PerfHistogramCommon::get_bucket_for_axis(
+ int64_t value, const PerfHistogramCommon::axis_config_d &ac) {
+ if (value < ac.m_min) {
+ return 0;
+ }
+
+ value -= ac.m_min;
+ value /= ac.m_quant_size;
+
+ switch (ac.m_scale_type) {
+ case SCALE_LINEAR:
+ return std::min<int64_t>(value + 1, ac.m_buckets - 1);
+
+ case SCALE_LOG2:
+ for (int64_t i = 1; i < ac.m_buckets; ++i) {
+ if (value < get_quants(i, SCALE_LOG2)) {
+ return i;
+ }
+ }
+ return ac.m_buckets - 1;
+ }
+ assert(false && "Invalid scale type");
+}
+
+std::vector<std::pair<int64_t, int64_t>>
+PerfHistogramCommon::get_axis_bucket_ranges(
+ const PerfHistogramCommon::axis_config_d &ac) {
+ std::vector<std::pair<int64_t, int64_t>> ret;
+ ret.resize(ac.m_buckets);
+
+ // First bucket is for value < min
+ int64_t min = ac.m_min;
+ for (int64_t i = 1; i < ac.m_buckets - 1; i++) {
+ int64_t max_exclusive =
+ ac.m_min + get_quants(i, ac.m_scale_type) * ac.m_quant_size;
+
+ // Dump bucket range
+ ret[i].first = min;
+ ret[i].second = max_exclusive - 1;
+
+ // Shift min to next bucket
+ min = max_exclusive;
+ }
+
+ // Fill up first and last element, note that in case m_buckets == 1
+ // those will point to the same element, the order is important here
+ ret.front().second = ac.m_min - 1;
+ ret.back().first = min;
+
+ ret.front().first = std::numeric_limits<int64_t>::min();
+ ret.back().second = std::numeric_limits<int64_t>::max();
+ return ret;
+}
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2017 OVH
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_COMMON_PERF_HISTOGRAM_H
+#define CEPH_COMMON_PERF_HISTOGRAM_H
+
+#include "common/Formatter.h"
+#include "include/atomic.h"
+#include "include/int_types.h"
+
+#include <memory>
+
+class PerfHistogramCommon {
+public:
+ enum scale_type_d : uint8_t {
+ SCALE_LINEAR = 1,
+ SCALE_LOG2 = 2,
+ };
+
+ struct axis_config_d {
+ const char *m_name;
+ scale_type_d m_scale_type;
+ int64_t m_min;
+ int64_t m_quant_size;
+ int32_t m_buckets;
+ };
+
+protected:
+ /// Dump configuration of one axis to a formatter
+ static void dump_formatted_axis(ceph::Formatter *f, const axis_config_d &ac);
+
+ /// Quantize given value and convert to bucket number on given axis
+ static int64_t get_bucket_for_axis(int64_t value, const axis_config_d &ac);
+
+ /// Calculate inclusive ranges of axis values for each bucket on that axis
+ static std::vector<std::pair<int64_t, int64_t>> get_axis_bucket_ranges(
+ const axis_config_d &ac);
+};
+
+/// PerfHistogram does trace a histogram of input values. It's an extended
+/// version of a standard histogram which does trace characteristics of a single
+/// one value only. In this implementation, values can be traced in multiple
+/// dimensions - i.e. we can create a histogram of input request size (first
+/// dimension) and processing latency (second dimension). Creating standard
+/// histogram out of such multidimensional one is trivial and requires summing
+/// values across dimensions we're not interested in.
+template <int DIM = 2>
+class PerfHistogram : public PerfHistogramCommon {
+public:
+ /// Initialize new histogram object
+ PerfHistogram(std::initializer_list<axis_config_d> axes_config) {
+ assert(axes_config.size() == DIM &&
+ "Invalid number of axis configuration objects");
+
+ int i = 0;
+ for (const auto &ac : axes_config) {
+ assert(ac.m_buckets > 0 && "Must have at least one bucket on axis");
+ assert(ac.m_quant_size > 0 &&
+ "Quantization unit must be non-zero positive integer value");
+
+ m_axes_config[i++] = ac;
+ }
+
+ m_rawData.reset(new atomic64_t[get_raw_size()]);
+ }
+
+ /// Copy from other histogram object
+ PerfHistogram(const PerfHistogram &other)
+ : m_axes_config(other.m_axes_config) {
+ int64_t size = get_raw_size();
+ m_rawData.reset(new atomic64_t[size]);
+ for (int64_t i = 0; i < size; i++) {
+ m_rawData[i].set(other.m_rawData[i].read());
+ }
+ }
+
+ /// Set all histogram values to 0
+ void reset() {
+ auto size = get_raw_size();
+ for (auto i = size; --i >= 0;) {
+ m_rawData[i].set(0);
+ }
+ }
+
+ /// Increase counter for given axis values by one
+ template <typename... T>
+ void inc(T... axis) {
+ auto index = get_raw_index_for_value(axis...);
+ m_rawData[index].add(1);
+ }
+
+ /// Increase counter for given axis buckets by one
+ template <typename... T>
+ void inc_bucket(T... bucket) {
+ auto index = get_raw_index_for_bucket(bucket...);
+ m_rawData[index].add(1);
+ }
+
+ /// Read value from given bucket
+ template <typename... T>
+ uint64_t read_bucket(T... bucket) const {
+ auto index = get_raw_index_for_bucket(bucket...);
+ return m_rawData[index].read();
+ }
+
+ /// Dump data to a Formatter object
+ void dump_formatted(ceph::Formatter *f) const {
+ // Dump axes configuration
+ f->open_array_section("axes");
+ for (auto &ac : m_axes_config) {
+ dump_formatted_axis(f, ac);
+ }
+ f->close_section();
+
+ // Dump histogram values
+ dump_formatted_values(f);
+ }
+
+protected:
+ /// Raw data stored as linear space, internal indexes are calculated on
+ /// demand.
+ std::unique_ptr<atomic64_t[]> m_rawData;
+
+ /// Configuration of axes
+ axis_config_d m_axes_config[DIM];
+
+ /// Dump histogram counters to a formatter
+ void dump_formatted_values(ceph::Formatter *f) const {
+ visit_values([f](int) { f->open_array_section("values"); },
+ [f](int64_t value) { f->dump_unsigned("value", value); },
+ [f](int) { f->close_section(); });
+ }
+
+ /// Get number of all histogram counters
+ int64_t get_raw_size() {
+ int64_t ret = 1;
+ for (int i = 0; i < DIM; ++i) {
+ ret *= m_axes_config[i].m_buckets;
+ }
+ return ret;
+ }
+
+ /// Calculate m_rawData index from axis values
+ template <typename... T>
+ int64_t get_raw_index_for_value(T... axes) const {
+ static_assert(sizeof...(T) == DIM, "Incorrect number of arguments");
+ return get_raw_index_internal<0>(get_bucket_for_axis, 0, axes...);
+ }
+
+ /// Calculate m_rawData index from axis bucket numbers
+ template <typename... T>
+ int64_t get_raw_index_for_bucket(T... buckets) const {
+ static_assert(sizeof...(T) == DIM, "Incorrect number of arguments");
+ return get_raw_index_internal<0>(
+ [](int64_t bucket, const axis_config_d &ac) {
+ assert(bucket >= 0 && "Bucket index can not be negative");
+ assert(bucket < ac.m_buckets && "Bucket index too large");
+ return bucket;
+ },
+ 0, buckets...);
+ }
+
+ template <int level = 0, typename F, typename... T>
+ int64_t get_raw_index_internal(F bucket_evaluator, int64_t startIndex,
+ int64_t value, T... tail) const {
+ static_assert(level + 1 + sizeof...(T) == DIM,
+ "Internal consistency check");
+ auto &ac = m_axes_config[level];
+ auto bucket = bucket_evaluator(value, ac);
+ return get_raw_index_internal<level + 1>(
+ bucket_evaluator, ac.m_buckets * startIndex + bucket, tail...);
+ }
+
+ template <int level, typename F>
+ int64_t get_raw_index_internal(F, int64_t startIndex) const {
+ static_assert(level == DIM, "Internal consistency check");
+ return startIndex;
+ }
+
+ /// Visit all histogram counters, call onDimensionEnter / onDimensionLeave
+ /// when starting / finishing traversal
+ /// on given axis, call onValue when dumping raw histogram counter value.
+ template <typename FDE, typename FV, typename FDL>
+ void visit_values(FDE onDimensionEnter, FV onValue, FDL onDimensionLeave,
+ int level = 0, int startIndex = 0) const {
+ if (level == DIM) {
+ onValue(m_rawData[startIndex].read());
+ return;
+ }
+
+ onDimensionEnter(level);
+ auto &ac = m_axes_config[level];
+ startIndex *= ac.m_buckets;
+ for (int32_t i = 0; i < ac.m_buckets; ++i, ++startIndex) {
+ visit_values(onDimensionEnter, onValue, onDimensionLeave, level + 1,
+ startIndex);
+ }
+ onDimensionLeave(level);
+ }
+};
+
+#endif
* Ceph - scalable distributed file system
*
* Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ * Copyright (C) 2017 OVH
*
* This is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
PerfCountersBuilder osd_plb(cct, "osd", l_osd_first, l_osd_last);
+ // Latency axis configuration for op histograms, values are in nanoseconds
+ PerfHistogramCommon::axis_config_d op_hist_x_axis_config{
+ "Latency (usec)",
+ PerfHistogramCommon::SCALE_LOG2, ///< Latency in logarithmic scale
+ 0, ///< Start at 0
+ 100000, ///< Quantization unit is 100usec
+ 32, ///< Enough to cover much longer than slow requests
+ };
+
+ // Op size axis configuration for op histograms, values are in bytes
+ PerfHistogramCommon::axis_config_d op_hist_y_axis_config{
+ "Request size (bytes)",
+ PerfHistogramCommon::SCALE_LOG2, ///< Request size in logarithmic scale
+ 0, ///< Start at 0
+ 512, ///< Quantization unit is 512 bytes
+ 32, ///< Enough to cover requests larger than GB
+ };
+
+
osd_plb.add_u64(l_osd_op_wip, "op_wip",
"Replication operations currently being processed (primary)"); // rep ops currently being processed (primary)
osd_plb.add_u64_counter(l_osd_op, "op",
"Client data read"); // client read out bytes
osd_plb.add_time_avg(l_osd_op_r_lat, "op_r_latency",
"Latency of read operation (including queue time)"); // client read latency
+ osd_plb.add_histogram(l_osd_op_r_lat_outb_hist, "op_r_latency_out_bytes_histogram",
+ op_hist_x_axis_config, op_hist_y_axis_config,
+ "Histogram of operation latency (including queue time) + data read");
osd_plb.add_time_avg(l_osd_op_r_process_lat, "op_r_process_latency",
"Latency of read operation (excluding queue time)"); // client read process latency
osd_plb.add_time_avg(l_osd_op_r_prepare_lat, "op_r_prepare_latency",
"Client data written"); // client write in bytes
osd_plb.add_time_avg(l_osd_op_w_lat, "op_w_latency",
"Latency of write operation (including queue time)"); // client write latency
+ osd_plb.add_histogram(l_osd_op_w_lat_inb_hist, "op_w_latency_in_bytes_histogram",
+ op_hist_x_axis_config, op_hist_y_axis_config,
+ "Histogram of operation latency (including queue time) + data written");
osd_plb.add_time_avg(l_osd_op_w_process_lat, "op_w_process_latency",
"Latency of write operation (excluding queue time)"); // client write process latency
osd_plb.add_time_avg(l_osd_op_w_prepare_lat, "op_w_prepare_latency",
"Client read-modify-write operations read out "); // client rmw out bytes
osd_plb.add_time_avg(l_osd_op_rw_lat, "op_rw_latency",
"Latency of read-modify-write operation (including queue time)"); // client rmw latency
+ osd_plb.add_histogram(l_osd_op_rw_lat_inb_hist, "op_rw_latency_in_bytes_histogram",
+ op_hist_x_axis_config, op_hist_y_axis_config,
+ "Histogram of rw operation latency (including queue time) + data written");
+ osd_plb.add_histogram(l_osd_op_rw_lat_outb_hist, "op_rw_latency_out_bytes_histogram",
+ op_hist_x_axis_config, op_hist_y_axis_config,
+ "Histogram of rw operation latency (including queue time) + data read");
osd_plb.add_time_avg(l_osd_op_rw_process_lat, "op_rw_process_latency",
"Latency of read-modify-write operation (excluding queue time)"); // client rmw process latency
osd_plb.add_time_avg(l_osd_op_rw_prepare_lat, "op_rw_prepare_latency",
l_osd_op_r,
l_osd_op_r_outb,
l_osd_op_r_lat,
+ l_osd_op_r_lat_outb_hist,
l_osd_op_r_process_lat,
l_osd_op_r_prepare_lat,
l_osd_op_w,
l_osd_op_w_inb,
l_osd_op_w_lat,
+ l_osd_op_w_lat_inb_hist,
l_osd_op_w_process_lat,
l_osd_op_w_prepare_lat,
l_osd_op_rw,
l_osd_op_rw_inb,
l_osd_op_rw_outb,
l_osd_op_rw_lat,
+ l_osd_op_rw_lat_inb_hist,
+ l_osd_op_rw_lat_outb_hist,
l_osd_op_rw_process_lat,
l_osd_op_rw_prepare_lat,
osd->logger->inc(l_osd_op_rw_inb, inb);
osd->logger->inc(l_osd_op_rw_outb, outb);
osd->logger->tinc(l_osd_op_rw_lat, latency);
+ osd->logger->hinc(l_osd_op_rw_lat_inb_hist, latency.to_nsec(), inb);
+ osd->logger->hinc(l_osd_op_rw_lat_outb_hist, latency.to_nsec(), outb);
osd->logger->tinc(l_osd_op_rw_process_lat, process_latency);
} else if (op->may_read()) {
osd->logger->inc(l_osd_op_r);
osd->logger->inc(l_osd_op_r_outb, outb);
osd->logger->tinc(l_osd_op_r_lat, latency);
+ osd->logger->hinc(l_osd_op_r_lat_outb_hist, latency.to_nsec(), outb);
osd->logger->tinc(l_osd_op_r_process_lat, process_latency);
} else if (op->may_write() || op->may_cache()) {
osd->logger->inc(l_osd_op_w);
osd->logger->inc(l_osd_op_w_inb, inb);
osd->logger->tinc(l_osd_op_w_lat, latency);
+ osd->logger->hinc(l_osd_op_w_lat_inb_hist, latency.to_nsec(), inb);
osd->logger->tinc(l_osd_op_w_process_lat, process_latency);
} else
ceph_abort();
add_ceph_unittest(unittest_shunique_lock ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/unittest_shunique_lock)
target_link_libraries(unittest_shunique_lock global ${BLKID_LIBRARIES} ${EXTRALIBS})
+# unittest_perf_histogram
+add_executable(unittest_perf_histogram
+ test_perf_histogram.cc
+ )
+add_ceph_unittest(unittest_perf_histogram ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/unittest_perf_histogram)
+target_link_libraries(unittest_perf_histogram global ${BLKID_LIBRARIES} ${EXTRALIBS})
+
# unittest_global_doublefree
if(WITH_CEPHFS)
add_executable(unittest_global_doublefree
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 &smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2017 OVH
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License version 2, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include "common/perf_histogram.h"
+
+#include "gtest/gtest.h"
+
+template <int DIM>
+class PerfHistogramAccessor : public PerfHistogram<DIM> {
+public:
+ typedef PerfHistogram<DIM> Base;
+
+ using Base::PerfHistogram;
+
+ static int64_t get_bucket_for_axis(
+ int64_t value, const PerfHistogramCommon::axis_config_d& axis_config) {
+ return Base::get_bucket_for_axis(value, axis_config);
+ }
+
+ static std::vector<std::pair<int64_t, int64_t>> get_axis_bucket_ranges(
+ const PerfHistogramCommon::axis_config_d& axis_config) {
+ return Base::get_axis_bucket_ranges(axis_config);
+ }
+
+ template <typename F1, typename F2, typename F3>
+ void visit_values(F1 f1, F2 f2, F3 f3) {
+ Base::visit_values(f1, f2, f3);
+ }
+};
+
+TEST(PerfHistogram, GetBucketForAxis) {
+ PerfHistogramCommon::axis_config_d linear{
+ "", PerfHistogramCommon::SCALE_LINEAR, 100, 3, 4};
+
+ ASSERT_EQ(0, PerfHistogramAccessor<1>::get_bucket_for_axis(-1, linear));
+ ASSERT_EQ(0, PerfHistogramAccessor<1>::get_bucket_for_axis(0, linear));
+ ASSERT_EQ(0, PerfHistogramAccessor<1>::get_bucket_for_axis(99, linear));
+ ASSERT_EQ(1, PerfHistogramAccessor<1>::get_bucket_for_axis(100, linear));
+ ASSERT_EQ(1, PerfHistogramAccessor<1>::get_bucket_for_axis(101, linear));
+ ASSERT_EQ(1, PerfHistogramAccessor<1>::get_bucket_for_axis(102, linear));
+ ASSERT_EQ(2, PerfHistogramAccessor<1>::get_bucket_for_axis(103, linear));
+ ASSERT_EQ(2, PerfHistogramAccessor<1>::get_bucket_for_axis(105, linear));
+ ASSERT_EQ(3, PerfHistogramAccessor<1>::get_bucket_for_axis(106, linear));
+ ASSERT_EQ(3, PerfHistogramAccessor<1>::get_bucket_for_axis(108, linear));
+ ASSERT_EQ(3, PerfHistogramAccessor<1>::get_bucket_for_axis(109, linear));
+
+ ASSERT_EQ(0, PerfHistogramAccessor<1>::get_bucket_for_axis(
+ std::numeric_limits<int64_t>::min(), linear));
+ ASSERT_EQ(3, PerfHistogramAccessor<1>::get_bucket_for_axis(
+ std::numeric_limits<int64_t>::max(), linear));
+
+ PerfHistogramCommon::axis_config_d logarithmic{
+ "", PerfHistogramCommon::SCALE_LOG2, 100, 3, 5};
+
+ ASSERT_EQ(0, PerfHistogramAccessor<1>::get_bucket_for_axis(-1, logarithmic));
+ ASSERT_EQ(0, PerfHistogramAccessor<1>::get_bucket_for_axis(0, logarithmic));
+ ASSERT_EQ(0, PerfHistogramAccessor<1>::get_bucket_for_axis(99, logarithmic));
+ ASSERT_EQ(1, PerfHistogramAccessor<1>::get_bucket_for_axis(100, logarithmic));
+ ASSERT_EQ(1, PerfHistogramAccessor<1>::get_bucket_for_axis(101, logarithmic));
+ ASSERT_EQ(1, PerfHistogramAccessor<1>::get_bucket_for_axis(102, logarithmic));
+ ASSERT_EQ(2, PerfHistogramAccessor<1>::get_bucket_for_axis(103, logarithmic));
+ ASSERT_EQ(2, PerfHistogramAccessor<1>::get_bucket_for_axis(105, logarithmic));
+ ASSERT_EQ(3, PerfHistogramAccessor<1>::get_bucket_for_axis(106, logarithmic));
+ ASSERT_EQ(3, PerfHistogramAccessor<1>::get_bucket_for_axis(111, logarithmic));
+ ASSERT_EQ(4, PerfHistogramAccessor<1>::get_bucket_for_axis(112, logarithmic));
+ ASSERT_EQ(4, PerfHistogramAccessor<1>::get_bucket_for_axis(124, logarithmic));
+
+ ASSERT_EQ(0, PerfHistogramAccessor<1>::get_bucket_for_axis(
+ std::numeric_limits<int64_t>::min(), logarithmic));
+ ASSERT_EQ(4, PerfHistogramAccessor<1>::get_bucket_for_axis(
+ std::numeric_limits<int64_t>::max(), logarithmic));
+}
+
+static const int XS = 5;
+static const int YS = 7;
+
+static const auto x_axis = PerfHistogramCommon::axis_config_d{
+ "x", PerfHistogramCommon::SCALE_LINEAR, 0, 1, XS};
+static const auto y_axis = PerfHistogramCommon::axis_config_d{
+ "y", PerfHistogramCommon::SCALE_LOG2, 0, 1, YS};
+
+TEST(PerfHistogram, ZeroedInitially) {
+ PerfHistogramAccessor<2> h{x_axis, y_axis};
+ for (int x = 0; x < XS; ++x) {
+ for (int y = 0; y < YS; ++y) {
+ ASSERT_EQ(0, h.read_bucket(x, y));
+ }
+ }
+}
+
+TEST(PerfHistogram, SimpleValues) {
+ PerfHistogramAccessor<2> h{x_axis, y_axis};
+ ASSERT_EQ(0, h.read_bucket(1, 1));
+ h.inc(0, 0);
+ ASSERT_EQ(1, h.read_bucket(1, 1));
+
+ ASSERT_EQ(0, h.read_bucket(2, 2));
+ h.inc(1, 1);
+ ASSERT_EQ(1, h.read_bucket(2, 2));
+
+ ASSERT_EQ(0, h.read_bucket(3, 3));
+ h.inc(2, 2);
+ ASSERT_EQ(1, h.read_bucket(3, 3));
+
+ ASSERT_EQ(0, h.read_bucket(4, 3));
+ h.inc(3, 3);
+ ASSERT_EQ(1, h.read_bucket(4, 3));
+}
+
+TEST(PerfHistogram, OneBucketRange) {
+ auto ranges = PerfHistogramAccessor<1>::get_axis_bucket_ranges(
+ PerfHistogramCommon::axis_config_d{"", PerfHistogramCommon::SCALE_LINEAR,
+ 0, 1, 1});
+
+ ASSERT_EQ(1, ranges.size());
+ ASSERT_EQ(std::numeric_limits<int64_t>::min(), ranges[0].first);
+ ASSERT_EQ(std::numeric_limits<int64_t>::max(), ranges[0].second);
+}
+
+TEST(PerfHistogram, TwoBucketRange) {
+ auto ranges = PerfHistogramAccessor<1>::get_axis_bucket_ranges(
+ PerfHistogramCommon::axis_config_d{"", PerfHistogramCommon::SCALE_LINEAR,
+ 0, 1, 2});
+
+ ASSERT_EQ(2, ranges.size());
+ ASSERT_EQ(std::numeric_limits<int64_t>::min(), ranges[0].first);
+ ASSERT_EQ(-1, ranges[0].second);
+ ASSERT_EQ(0, ranges[1].first);
+ ASSERT_EQ(std::numeric_limits<int64_t>::max(), ranges[1].second);
+}
+
+TEST(PerfHistogram, LinearBucketRange) {
+ PerfHistogramCommon::axis_config_d ac{"", PerfHistogramCommon::SCALE_LINEAR,
+ 100, 10, 15};
+ auto ranges = PerfHistogramAccessor<1>::get_axis_bucket_ranges(ac);
+
+ for (size_t i = 0; i < ranges.size(); ++i) {
+ ASSERT_EQ(
+ i, PerfHistogramAccessor<1>::get_bucket_for_axis(ranges[i].first, ac));
+ ASSERT_EQ(
+ i, PerfHistogramAccessor<1>::get_bucket_for_axis(ranges[i].second, ac));
+ }
+
+ for (size_t i = 1; i < ranges.size(); ++i) {
+ ASSERT_EQ(ranges[i].first, ranges[i - 1].second + 1);
+ }
+}
+
+TEST(PerfHistogram, LogarithmicBucketRange) {
+ PerfHistogramCommon::axis_config_d ac{"", PerfHistogramCommon::SCALE_LOG2,
+ 100, 10, 15};
+ auto ranges = PerfHistogramAccessor<1>::get_axis_bucket_ranges(ac);
+
+ for (size_t i = 0; i < ranges.size(); ++i) {
+ ASSERT_EQ(
+ i, PerfHistogramAccessor<1>::get_bucket_for_axis(ranges[i].first, ac));
+ ASSERT_EQ(
+ i, PerfHistogramAccessor<1>::get_bucket_for_axis(ranges[i].second, ac));
+ }
+
+ for (size_t i = 1; i < ranges.size(); ++i) {
+ ASSERT_EQ(ranges[i].first, ranges[i - 1].second + 1);
+ }
+}
+
+TEST(PerfHistogram, AxisAddressing) {
+ PerfHistogramCommon::axis_config_d ac1{"", PerfHistogramCommon::SCALE_LINEAR,
+ 0, 1, 7};
+ PerfHistogramCommon::axis_config_d ac2{"", PerfHistogramCommon::SCALE_LINEAR,
+ 0, 1, 9};
+ PerfHistogramCommon::axis_config_d ac3{"", PerfHistogramCommon::SCALE_LINEAR,
+ 0, 1, 11};
+
+ PerfHistogramAccessor<3> h{ac1, ac2, ac3};
+
+ h.inc(1, 2, 3); // Should end up in buckets 2, 3, 4
+ h.inc_bucket(4, 5, 6);
+
+ std::vector<int64_t> rawValues;
+ h.visit_values([](int) {},
+ [&rawValues](int64_t value) { rawValues.push_back(value); },
+ [](int) {});
+
+ for (size_t i = 0; i < rawValues.size(); ++i) {
+ switch (i) {
+ case 4 + 11 * (3 + 9 * 2):
+ case 6 + 11 * (5 + 9 * 4):
+ ASSERT_EQ(1, rawValues[i]);
+ break;
+ default:
+ ASSERT_EQ(0, rawValues[i]);
+ break;
+ }
+ }
+}
--- /dev/null
+#!/usr/bin/env python
+# coding: utf-8
+#
+# Ceph - scalable distributed file system
+#
+# Copyright (C) 2017 OVH
+#
+# This is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public
+# License version 2, as published by the Free Software
+# Foundation. See file COPYING.
+#
+
+import json
+import subprocess
+import time
+import os
+import argparse
+
+
+def shorten(val):
+ if isinstance(val, str):
+ return val
+ for u in ((3, ''), (6, 'k'), (9, 'M'), (12, 'G'), (15, 'T')):
+ if val < 10**u[0]:
+ return "{}{}".format(int(val / (10 ** (u[0]-3))), u[1])
+ return val
+
+
+def print_histogram(asok, logger, counter, last):
+
+ try:
+ out = subprocess.check_output(
+ "ceph --admin-daemon {} perf histogram dump".format(asok),
+ shell=True)
+ j = json.loads(out.decode('utf-8'))
+ except Exception as e:
+ return (last,
+ "Couldn't connect to admin socket, result: \n{}".format(e))
+
+ current = j['osd'][counter]['values']
+ axes = j['osd'][counter]['axes']
+ content = ""
+
+ content += "{}:\n".format(axes[1]['name'])
+ for r in axes[1]['ranges']:
+ content += "{0: >4} ".format(
+ shorten(r['min']) if 'min' in r else '')
+ content += "\n"
+ for r in axes[1]['ranges']:
+ content += "{0: >4} ".format(
+ shorten(r['max']) if 'max' in r else '')
+ content += "\n"
+
+ content += ("{0: >"+str(len(axes[1]['ranges'])*5+14)+"}:\n").format(
+ axes[0]['name'])
+
+ for i in range(len(current)):
+ for j in range(len(current[i])):
+ try:
+ diff = current[i][j] - last[i][j]
+ except IndexError:
+ diff = '-'
+ content += "{0: >4} ".format(shorten(diff))
+
+ r = axes[0]['ranges'][i]
+ content += "{0: >6} : {1}\n".format(
+ shorten(r['min']) if 'min' in r else '',
+ shorten(r['max']) if 'max' in r else '')
+ return (current, content)
+
+
+def loop_print(asok, logger, counter):
+ last = []
+ while True:
+
+ last, content = print_histogram(asok, logger, counter, last)
+ print("{}{}".format("\n"*100, content))
+ time.sleep(1)
+
+
+def main():
+ parser = argparse.ArgumentParser(
+ description='Continuously display ceph performance histogram')
+ parser.add_argument(
+ '--asok',
+ type=str,
+ default='/var/run/ceph/*.asok',
+ help='Path to asok file, can use wildcards')
+ parser.add_argument(
+ '--logger',
+ type=str,
+ default='osd')
+ parser.add_argument(
+ '--counter',
+ type=str,
+ default='op_w_latency_in_bytes_histogram')
+ args = parser.parse_args()
+
+ loop_print(args.asok, args.logger, args.counter)
+
+
+if __name__ == '__main__':
+ main()