virtual seastar::future<store_statfs_t> pool_statfs(int64_t pool_id) const = 0;
+ virtual seastar::future<> report_stats() { return seastar::now(); }
+
virtual uuid_d get_fsid() const = 0;
virtual seastar::future<> write_meta(const std::string& key,
ceph_assert(devices_by_id[device->get_device_id()] == device);
}
+device_stats_t
+ExtentPlacementManager::get_device_stats(
+ const writer_stats_t &journal_stats,
+ bool report_detail) const
+{
+ LOG_PREFIX(ExtentPlacementManager::get_device_stats);
+
+ /*
+ * RecordSubmitter::get_stats() isn't reentrant.
+ * And refer to EPM::init() for the writers.
+ */
+
+ writer_stats_t main_stats = journal_stats;
+ std::vector<writer_stats_t> main_writer_stats;
+ using enum data_category_t;
+ if (get_main_backend_type() == backend_type_t::SEGMENTED) {
+ // 0. oolmdat
+ main_writer_stats.emplace_back(
+ get_writer(METADATA, OOL_GENERATION)->get_stats());
+ main_stats.add(main_writer_stats.back());
+ // 1. ooldata
+ main_writer_stats.emplace_back(
+ get_writer(DATA, OOL_GENERATION)->get_stats());
+ main_stats.add(main_writer_stats.back());
+ // 2. mainmdat
+ main_writer_stats.emplace_back();
+ for (rewrite_gen_t gen = MIN_REWRITE_GENERATION; gen < MIN_COLD_GENERATION; ++gen) {
+ const auto &writer = get_writer(METADATA, gen);
+ ceph_assert(writer->get_type() == backend_type_t::SEGMENTED);
+ main_writer_stats.back().add(writer->get_stats());
+ }
+ main_stats.add(main_writer_stats.back());
+ // 3. maindata
+ main_writer_stats.emplace_back();
+ for (rewrite_gen_t gen = MIN_REWRITE_GENERATION; gen < MIN_COLD_GENERATION; ++gen) {
+ const auto &writer = get_writer(DATA, gen);
+ ceph_assert(writer->get_type() == backend_type_t::SEGMENTED);
+ main_writer_stats.back().add(writer->get_stats());
+ }
+ main_stats.add(main_writer_stats.back());
+ } else { // RBM
+ // TODO stats from RandomBlockOolWriter
+ }
+
+ writer_stats_t cold_stats = {};
+ std::vector<writer_stats_t> cold_writer_stats;
+ bool has_cold_tier = background_process.has_cold_tier();
+ if (has_cold_tier) {
+ // 0. coldmdat
+ cold_writer_stats.emplace_back();
+ for (rewrite_gen_t gen = MIN_COLD_GENERATION; gen < REWRITE_GENERATIONS; ++gen) {
+ const auto &writer = get_writer(METADATA, gen);
+ ceph_assert(writer->get_type() == backend_type_t::SEGMENTED);
+ cold_writer_stats.back().add(writer->get_stats());
+ }
+ cold_stats.add(cold_writer_stats.back());
+ // 1. colddata
+ cold_writer_stats.emplace_back();
+ for (rewrite_gen_t gen = MIN_COLD_GENERATION; gen < REWRITE_GENERATIONS; ++gen) {
+ const auto &writer = get_writer(DATA, gen);
+ ceph_assert(writer->get_type() == backend_type_t::SEGMENTED);
+ cold_writer_stats.back().add(writer->get_stats());
+ }
+ cold_stats.add(cold_writer_stats.back());
+ }
+
+ auto now = seastar::lowres_clock::now();
+ if (last_tp == seastar::lowres_clock::time_point::min()) {
+ last_tp = now;
+ return {};
+ }
+ std::chrono::duration<double> duration_d = now - last_tp;
+ double seconds = duration_d.count();
+ last_tp = now;
+
+ if (report_detail) {
+ std::ostringstream oss;
+ auto report_writer_stats = [seconds, &oss](
+ const char* name,
+ const writer_stats_t& stats) {
+ oss << "\n" << name << ": " << writer_stats_printer_t{seconds, stats};
+ };
+ report_writer_stats("tier-main", main_stats);
+ report_writer_stats(" inline", journal_stats);
+ if (get_main_backend_type() == backend_type_t::SEGMENTED) {
+ report_writer_stats(" oolmdat", main_writer_stats[0]);
+ report_writer_stats(" ooldata", main_writer_stats[1]);
+ report_writer_stats(" mainmdat", main_writer_stats[2]);
+ report_writer_stats(" maindata", main_writer_stats[3]);
+ } else { // RBM
+ // TODO stats from RandomBlockOolWriter
+ }
+ if (has_cold_tier) {
+ report_writer_stats("tier-cold", cold_stats);
+ report_writer_stats(" coldmdat", cold_writer_stats[0]);
+ report_writer_stats(" colddata", cold_writer_stats[1]);
+ }
+
+ auto report_by_src = [seconds, has_cold_tier, &oss,
+ &journal_stats,
+ &main_writer_stats,
+ &cold_writer_stats](transaction_type_t src) {
+ auto t_stats = get_by_src(journal_stats.stats_by_src, src);
+ for (const auto &writer_stats : main_writer_stats) {
+ t_stats += get_by_src(writer_stats.stats_by_src, src);
+ }
+ for (const auto &writer_stats : cold_writer_stats) {
+ t_stats += get_by_src(writer_stats.stats_by_src, src);
+ }
+ if (src == transaction_type_t::READ) {
+ ceph_assert(t_stats.is_empty());
+ return;
+ }
+ oss << "\n" << src << ": "
+ << tw_stats_printer_t{seconds, t_stats};
+
+ auto report_tw_stats = [seconds, src, &oss](
+ const char* name,
+ const writer_stats_t& stats) {
+ const auto& tw_stats = get_by_src(stats.stats_by_src, src);
+ if (tw_stats.is_empty()) {
+ return;
+ }
+ oss << "\n " << name << ": "
+ << tw_stats_printer_t{seconds, tw_stats};
+ };
+ report_tw_stats("inline", journal_stats);
+ report_tw_stats("oolmdat", main_writer_stats[0]);
+ report_tw_stats("ooldata", main_writer_stats[1]);
+ report_tw_stats("mainmdat", main_writer_stats[2]);
+ report_tw_stats("maindata", main_writer_stats[3]);
+ if (has_cold_tier) {
+ report_tw_stats("coldmdat", cold_writer_stats[0]);
+ report_tw_stats("colddata", cold_writer_stats[1]);
+ }
+ };
+ for (uint8_t _src=0; _src<TRANSACTION_TYPE_MAX; ++_src) {
+ auto src = static_cast<transaction_type_t>(_src);
+ report_by_src(src);
+ }
+
+ INFO("{}", oss.str());
+ }
+
+ main_stats.add(cold_stats);
+ return {main_stats.io_depth_stats.num_io,
+ main_stats.io_depth_stats.num_io_grouped,
+ main_stats.get_total_bytes()};
+}
+
ExtentPlacementManager::open_ertr::future<>
ExtentPlacementManager::open_for_write()
{
#pragma once
-#include "seastar/core/gate.hh"
+#include <seastar/core/gate.hh>
+#include <seastar/core/lowres_clock.hh>
#include "crimson/os/seastore/async_cleaner.h"
#include "crimson/os/seastore/cached_extent.h"
public:
virtual ~ExtentOolWriter() {}
+ virtual backend_type_t get_type() const = 0;
+
+ virtual writer_stats_t get_stats() const = 0;
+
using open_ertr = base_ertr;
virtual open_ertr::future<> open() = 0;
SegmentProvider &sp,
SegmentSeqAllocator &ssa);
+ backend_type_t get_type() const final {
+ return backend_type_t::SEGMENTED;
+ }
+
+ writer_stats_t get_stats() const final {
+ return record_submitter.get_stats();
+ }
+
open_ertr::future<> open() final {
return record_submitter.open(false).discard_result();
}
RandomBlockOolWriter(RBMCleaner* rb_cleaner) :
rb_cleaner(rb_cleaner) {}
+ backend_type_t get_type() const final {
+ return backend_type_t::RANDOM_BLOCK;
+ }
+
+ writer_stats_t get_stats() const final {
+ // TODO: collect stats
+ return {};
+ }
+
using open_ertr = ExtentOolWriter::open_ertr;
open_ertr::future<> open() final {
return open_ertr::now();
return background_process.get_stat();
}
+ device_stats_t get_device_stats(
+ const writer_stats_t &journal_stats,
+ bool report_detail) const;
+
using mount_ertr = crimson::errorator<
crimson::ct_error::input_output_error>;
using mount_ret = mount_ertr::future<>;
rewrite_gen_t gen) {
assert(hint < placement_hint_t::NUM_HINTS);
// TODO: might worth considering the hint
+ return get_writer(category, gen);
+ }
+
+ ExtentOolWriter* get_writer(data_category_t category,
+ rewrite_gen_t gen) {
+ assert(is_rewrite_generation(gen));
+ assert(gen != INLINE_GENERATION);
+ assert(gen <= dynamic_max_rewrite_generation);
+ ExtentOolWriter* ret = nullptr;
+ if (category == data_category_t::DATA) {
+ ret = data_writers_by_gen[generation_to_writer(gen)];
+ } else {
+ assert(category == data_category_t::METADATA);
+ ret = md_writers_by_gen[generation_to_writer(gen)];
+ }
+ assert(ret != nullptr);
+ return ret;
+ }
+
+ const ExtentOolWriter* get_writer(data_category_t category,
+ rewrite_gen_t gen) const {
assert(is_rewrite_generation(gen));
assert(gen != INLINE_GENERATION);
assert(gen <= dynamic_max_rewrite_generation);
SegmentSeqAllocatorRef ool_segment_seq_allocator;
extent_len_t max_data_allocation_size = 0;
+ mutable seastar::lowres_clock::time_point last_tp =
+ seastar::lowres_clock::time_point::min();
+
friend class ::transaction_manager_test_t;
};
class Journal {
public:
virtual JournalTrimmer &get_trimmer() = 0;
+
+ virtual writer_stats_t get_writer_stats() const = 0;
+
/**
* initializes journal for mkfs writes -- must run prior to calls
* to submit_record.
return trimmer;
}
+ writer_stats_t get_writer_stats() const final {
+ return record_submitter.get_stats();
+ }
+
open_for_mkfs_ret open_for_mkfs() final;
open_for_mount_ret open_for_mount() final;
return ret;
}
+writer_stats_t RecordSubmitter::get_stats() const
+{
+ writer_stats_t ret = stats;
+ ret.minus(last_stats);
+ last_stats = stats;
+ return ret;
+}
+
RecordSubmitter::wa_ertr::future<>
RecordSubmitter::wait_available()
{
LOG_PREFIX(RecordSubmitter::open);
DEBUG("{} register metrics", get_name());
stats = {};
+ last_stats = {};
namespace sm = seastar::metrics;
std::vector<sm::label_instance> label_instances;
label_instances.push_back(sm::label_instance("submitter", get_name()));
// whether is available to submit a record
bool is_available() const;
+ // get the stats since last_stats
+ writer_stats_t get_stats() const;
+
// wait for available if cannot submit, should check is_available() again
// when the future is resolved.
using wa_ertr = base_ertr;
std::optional<seastar::promise<> > wait_unfull_flush_promise;
writer_stats_t stats;
+ mutable writer_stats_t last_stats;
seastar::metrics::metric_group metrics;
};
return trimmer;
}
+ writer_stats_t get_writer_stats() const final {
+ return record_submitter.get_stats();
+ }
+
open_for_mkfs_ret open_for_mkfs() final;
open_for_mount_ret open_for_mount() final;
return SeaStore::stat();
}
+seastar::future<> SeaStore::report_stats()
+{
+ ceph_assert(seastar::this_shard_id() == primary_core);
+ shard_device_stats.resize(seastar::smp::count);
+ return shard_stores.invoke_on_all([this](const Shard &local_store) {
+ bool report_detail = false;
+ if (seastar::this_shard_id() == 0) {
+ // avoid too verbose logs, only report detail in a particular shard
+ report_detail = true;
+ }
+ shard_device_stats[seastar::this_shard_id()] =
+ local_store.get_device_stats(report_detail);
+ }).then([this] {
+ LOG_PREFIX(SeaStore);
+ auto now = seastar::lowres_clock::now();
+ if (last_tp == seastar::lowres_clock::time_point::min()) {
+ last_tp = now;
+ return seastar::now();
+ }
+ std::chrono::duration<double> duration_d = now - last_tp;
+ double seconds = duration_d.count();
+ last_tp = now;
+ device_stats_t ts = {};
+ for (const auto &s : shard_device_stats) {
+ ts.add(s);
+ }
+ constexpr const char* dfmt = "{:.2f}";
+ auto d_ts_num_io = static_cast<double>(ts.num_io);
+ std::ostringstream oss_iops;
+ oss_iops << "device IOPS:"
+ << fmt::format(dfmt, ts.num_io/seconds)
+ << "(";
+ std::ostringstream oss_depth;
+ oss_depth << "device per-writer depth:"
+ << fmt::format(dfmt, ts.total_depth/d_ts_num_io)
+ << "(";
+ std::ostringstream oss_bd;
+ oss_bd << "device bandwidth(MiB):"
+ << fmt::format(dfmt, ts.total_bytes/seconds/(1<<20))
+ << "(";
+ std::ostringstream oss_iosz;
+ oss_iosz << "device IO size(B):"
+ << fmt::format(dfmt, ts.total_bytes/d_ts_num_io)
+ << "(";
+ for (const auto &s : shard_device_stats) {
+ auto d_s_num_io = static_cast<double>(s.num_io);
+ oss_iops << fmt::format(dfmt, s.num_io/seconds) << ",";
+ oss_depth << fmt::format(dfmt, s.total_depth/d_s_num_io) << ",";
+ oss_bd << fmt::format(dfmt, s.total_bytes/seconds/(1<<20)) << ",";
+ oss_iosz << fmt::format(dfmt, s.total_bytes/d_s_num_io) << ",";
+ }
+ oss_iops << ")";
+ oss_depth << ")";
+ oss_bd << ")";
+ oss_iosz << ")";
+ INFO("{}", oss_iops.str());
+ INFO("{}", oss_depth.str());
+ INFO("{}", oss_bd.str());
+ INFO("{}", oss_iosz.str());
+ return seastar::now();
+ });
+}
+
TransactionManager::read_extent_iertr::future<std::optional<unsigned>>
SeaStore::Shard::get_coll_bits(CollectionRef ch, Transaction &t) const
{
*transaction_manager);
}
+device_stats_t SeaStore::Shard::get_device_stats(bool report_detail) const
+{
+ return transaction_manager->get_device_stats(report_detail);
+}
+
std::unique_ptr<SeaStore> make_seastore(
const std::string &device)
{
#pragma once
-#include <string>
-#include <unordered_map>
#include <map>
+#include <optional>
+#include <string>
#include <typeinfo>
+#include <unordered_map>
#include <vector>
-#include <optional>
#include <seastar/core/future.hh>
+#include <seastar/core/lowres_clock.hh>
#include <seastar/core/metrics_types.hh>
#include "include/uuid.h"
void init_managers();
+ device_stats_t get_device_stats(bool report_detail) const;
+
private:
struct internal_context_t {
CollectionRef ch;
seastar::future<store_statfs_t> stat() const final;
seastar::future<store_statfs_t> pool_statfs(int64_t pool_id) const final;
+ seastar::future<> report_stats() final;
+
uuid_d get_fsid() const final {
ceph_assert(seastar::this_shard_id() == primary_core);
return shard_stores.local().get_fsid();
DeviceRef device;
std::vector<DeviceRef> secondaries;
seastar::sharded<SeaStore::Shard> shard_stores;
+
+ mutable seastar::lowres_clock::time_point last_tp =
+ seastar::lowres_clock::time_point::min();
+ mutable std::vector<device_stats_t> shard_device_stats;
};
std::unique_ptr<SeaStore> make_seastore(
<< ")";
}
+std::ostream& operator<<(std::ostream& out, const tw_stats_printer_t& p)
+{
+ constexpr const char* dfmt = "{:.2f}";
+ double d_num_records = static_cast<double>(p.stats.num_records);
+ out << "rps="
+ << fmt::format(dfmt, d_num_records/p.seconds)
+ << ",bwMiB="
+ << fmt::format(dfmt, p.stats.get_total_bytes()/p.seconds/(1<<20))
+ << ",sizeB="
+ << fmt::format(dfmt, p.stats.get_total_bytes()/d_num_records)
+ << "("
+ << fmt::format(dfmt, p.stats.data_bytes/d_num_records)
+ << ","
+ << fmt::format(dfmt, p.stats.metadata_bytes/d_num_records)
+ << ")";
+ return out;
+}
+
+std::ostream& operator<<(std::ostream& out, const writer_stats_printer_t& p)
+{
+ constexpr const char* dfmt = "{:.2f}";
+ auto d_num_io = static_cast<double>(p.stats.io_depth_stats.num_io);
+ out << "iops="
+ << fmt::format(dfmt, d_num_io/p.seconds)
+ << ",depth="
+ << fmt::format(dfmt, p.stats.io_depth_stats.average())
+ << ",batch="
+ << fmt::format(dfmt, p.stats.record_batch_stats.average())
+ << ",bwMiB="
+ << fmt::format(dfmt, p.stats.get_total_bytes()/p.seconds/(1<<20))
+ << ",sizeB="
+ << fmt::format(dfmt, p.stats.get_total_bytes()/d_num_io)
+ << "("
+ << fmt::format(dfmt, p.stats.record_group_data_bytes/d_num_io)
+ << ","
+ << fmt::format(dfmt, p.stats.record_group_metadata_bytes/d_num_io)
+ << ","
+ << fmt::format(dfmt, p.stats.record_group_padding_bytes/d_num_io)
+ << ")";
+ return out;
+}
+
}
return counters_by_src[static_cast<std::size_t>(src)];
}
+template <typename CounterT>
+void add_srcs(counter_by_src_t<CounterT>& base,
+ const counter_by_src_t<CounterT>& by) {
+ for (std::size_t i=0; i<TRANSACTION_TYPE_MAX; ++i) {
+ base[i] += by[i];
+ }
+}
+
+template <typename CounterT>
+void minus_srcs(counter_by_src_t<CounterT>& base,
+ const counter_by_src_t<CounterT>& by) {
+ for (std::size_t i=0; i<TRANSACTION_TYPE_MAX; ++i) {
+ base[i] -= by[i];
+ }
+}
+
struct grouped_io_stats {
uint64_t num_io = 0;
uint64_t num_io_grouped = 0;
+ double average() const {
+ return static_cast<double>(num_io_grouped)/num_io;
+ }
+
+ bool is_empty() const {
+ return num_io == 0;
+ }
+
+ void add(const grouped_io_stats &o) {
+ num_io += o.num_io;
+ num_io_grouped += o.num_io_grouped;
+ }
+
+ void minus(const grouped_io_stats &o) {
+ num_io -= o.num_io;
+ num_io_grouped -= o.num_io_grouped;
+ }
+
void increment(uint64_t num_grouped_io) {
- ++num_io;
- num_io_grouped += num_grouped_io;
+ add({1, num_grouped_io});
+ }
+};
+
+struct device_stats_t {
+ uint64_t num_io = 0;
+ uint64_t total_depth = 0;
+ uint64_t total_bytes = 0;
+
+ void add(const device_stats_t& other) {
+ num_io += other.num_io;
+ total_depth += other.total_depth;
+ total_bytes += other.total_bytes;
}
};
uint64_t num_records = 0;
uint64_t metadata_bytes = 0;
uint64_t data_bytes = 0;
+
+ bool is_empty() const {
+ return num_records == 0;
+ }
+
+ uint64_t get_total_bytes() const {
+ return metadata_bytes + data_bytes;
+ }
+
+ trans_writer_stats_t&
+ operator+=(const trans_writer_stats_t& o) {
+ num_records += o.num_records;
+ metadata_bytes += o.metadata_bytes;
+ data_bytes += o.data_bytes;
+ return *this;
+ }
+
+ trans_writer_stats_t&
+ operator-=(const trans_writer_stats_t& o) {
+ num_records -= o.num_records;
+ metadata_bytes -= o.metadata_bytes;
+ data_bytes -= o.data_bytes;
+ return *this;
+ }
};
+struct tw_stats_printer_t {
+ double seconds;
+ const trans_writer_stats_t &stats;
+};
+std::ostream& operator<<(std::ostream&, const tw_stats_printer_t&);
struct writer_stats_t {
grouped_io_stats record_batch_stats;
uint64_t record_group_metadata_bytes = 0;
uint64_t record_group_data_bytes = 0;
counter_by_src_t<trans_writer_stats_t> stats_by_src;
+
+ bool is_empty() const {
+ return io_depth_stats.is_empty();
+ }
+
+ uint64_t get_total_bytes() const {
+ return record_group_padding_bytes +
+ record_group_metadata_bytes +
+ record_group_data_bytes;
+ }
+
+ void add(const writer_stats_t &o) {
+ record_batch_stats.add(o.record_batch_stats);
+ io_depth_stats.add(o.io_depth_stats);
+ record_group_padding_bytes += o.record_group_padding_bytes;
+ record_group_metadata_bytes += o.record_group_metadata_bytes;
+ record_group_data_bytes += o.record_group_data_bytes;
+ add_srcs(stats_by_src, o.stats_by_src);
+ }
+
+ void minus(const writer_stats_t &o) {
+ record_batch_stats.minus(o.record_batch_stats);
+ io_depth_stats.minus(o.io_depth_stats);
+ record_group_padding_bytes -= o.record_group_padding_bytes;
+ record_group_metadata_bytes -= o.record_group_metadata_bytes;
+ record_group_data_bytes -= o.record_group_data_bytes;
+ minus_srcs(stats_by_src, o.stats_by_src);
+ }
+};
+struct writer_stats_printer_t {
+ double seconds;
+ const writer_stats_t &stats;
};
+std::ostream& operator<<(std::ostream&, const writer_stats_printer_t&);
}
using close_ertr = base_ertr;
close_ertr::future<> close();
+ device_stats_t get_device_stats(bool report_detail) const {
+ writer_stats_t journal_stats = journal->get_writer_stats();
+ return epm->get_device_stats(journal_stats, report_detail);
+ }
+
/// Resets transaction
void reset_transaction_preserve_handle(Transaction &t) {
return cache->reset_transaction_preserve_handle(t);
INFO("reactor_utilizations: {}", oss.str());
});
});
+ gate.dispatch_in_background("stats_store", *this, [this] {
+ return store.report_stats();
+ });
});
stats_timer.arm_periodic(std::chrono::seconds(stats_seconds));
}