Add a 'scrub scheduling info' column to pgs dump.
Modify the name and behavior of 'last-scrub-duration'.
Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
Co-Authored-By: Aishwarya Mathuria <amathuri@redhat.com>
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#pragma once
+/**
+ * \file fmtlib formatter for utime_t
+ */
+#include <fmt/format.h>
+#include <fmt/chrono.h>
+
+#include <string_view>
+
+#include "include/utime.h"
+
+template <>
+struct fmt::formatter<utime_t> {
+ constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
+
+ template <typename FormatContext>
+ auto format(const utime_t& utime, FormatContext& ctx)
+ {
+ if (utime.sec() < ((time_t)(60 * 60 * 24 * 365 * 10))) {
+ // raw seconds. this looks like a relative time.
+ return fmt::format_to(ctx.out(), "{}.{:06}", (long)utime.sec(),
+ utime.usec());
+ }
+
+ // this looks like an absolute time.
+ // conform to http://en.wikipedia.org/wiki/ISO_8601
+ auto asgmt = fmt::gmtime(utime.sec());
+ return fmt::format_to(ctx.out(), "{:%FT%T}.{:06}{:%z}", asgmt, utime.usec(), asgmt);
+ }
+};
tab.define_column("LAST_DEEP_SCRUB", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("DEEP_SCRUB_STAMP", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("SNAPTRIMQ_LEN", TextTable::LEFT, TextTable::RIGHT);
- tab.define_column("SCRUB_DURATION", TextTable::LEFT, TextTable::RIGHT);
+ tab.define_column("LAST_SCRUB_DURATION", TextTable::LEFT, TextTable::RIGHT);
+ tab.define_column("SCRUB_SCHEDULING", TextTable::LEFT, TextTable::LEFT);
}
for (auto i = pg_stats.begin();
tab.define_column("ACTING", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("SCRUB_STAMP", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("DEEP_SCRUB_STAMP", TextTable::LEFT, TextTable::RIGHT);
- tab.define_column("SCRUB_DURATION", TextTable::LEFT, TextTable::RIGHT);
+ tab.define_column("LAST_SCRUB_DURATION", TextTable::LEFT, TextTable::RIGHT);
+ tab.define_column("SCRUB_SCHEDULING", TextTable::LEFT, TextTable::LEFT);
for (auto i = pgs.begin(); i != pgs.end(); ++i) {
const pg_stat_t& st = pg_stat.at(*i);
<< actingstr.str()
<< st.last_scrub_stamp
<< st.last_deep_scrub_stamp
- << st.scrub_duration
- << TextTable::endrow;
+ << st.last_scrub_duration
+ << st.dump_scrub_schedule()
+ << TextTable::endrow;
}
ss << tab;
if (!is_primary())
return;
+ if (m_scrubber) {
+ recovery_state.update_stats_wo_resched(
+ [scrubber = m_scrubber.get()](pg_history_t& hist,
+ pg_stat_t& info) mutable -> void {
+ info.scrub_sched_status = scrubber->get_schedule();
+ });
+ }
+
std::lock_guard l{pg_stats_publish_lock};
- auto stats = recovery_state.prepare_stats_for_publish(
- pg_stats_publish,
- unstable_stats);
+ auto stats =
+ recovery_state.prepare_stats_for_publish(pg_stats_publish, unstable_stats);
if (stats) {
pg_stats_publish = std::move(stats);
}
dout(10) << "handle_query_state" << dendl;
PeeringState::QueryState q(f);
recovery_state.handle_event(q, 0);
-
- // This code has moved to after the close of recovery_state array.
- // I don't think that scrub is a recovery state
- if (is_primary() && is_active() && m_scrubber && m_scrubber->is_scrub_active()) {
- m_scrubber->handle_query_state(f);
- }
}
void PG::init_collection_pool_opts()
}
}
+void PeeringState::update_stats_wo_resched(
+ std::function<void(pg_history_t &, pg_stat_t &)> f)
+{
+ f(info.history, info.stats);
+}
+
+
bool PeeringState::append_log_entries_update_missing(
const mempool::osd_pglog::list<pg_log_entry_t> &entries,
ObjectStore::Transaction &t, std::optional<eversion_t> trim_to,
std::function<bool(pg_history_t &, pg_stat_t &)> f,
ObjectStore::Transaction *t = nullptr);
+ void update_stats_wo_resched(
+ std::function<void(pg_history_t &, pg_stat_t &)> f);
+
/**
* adjust_purged_snaps
*
f->close_section();
if (is_primary() && is_active() && m_scrubber) {
- m_scrubber->dump(f.get());
+ m_scrubber->dump_scrubber(f.get(), m_planned_scrub);
}
f->open_object_section("agent_state");
#include "common/Formatter.h"
#include "common/StackStringStream.h"
+#include "include/utime_fmt.h"
#include "OSDMap.h"
#include "osd_types.h"
+#include "osd_types_fmt.h"
#include "os/Transaction.h"
using std::list;
f->dump_bool("pin_stats_invalid", pin_stats_invalid);
f->dump_bool("manifest_stats_invalid", manifest_stats_invalid);
f->dump_unsigned("snaptrimq_len", snaptrimq_len);
- f->dump_float("scrub_duration", scrub_duration);
+ f->dump_int("last_scrub_duration", last_scrub_duration);
+ f->dump_string("scrub_schedule", dump_scrub_schedule());
stats.dump(f);
f->open_array_section("up");
for (auto p = up.cbegin(); p != up.cend(); ++p)
f->dump_int("acting_primary", acting_primary);
}
+std::string pg_stat_t::dump_scrub_schedule() const
+{
+ if (scrub_sched_status.m_is_active) {
+ return fmt::format(
+ "{}scrubbing for {}s",
+ ((scrub_sched_status.m_is_deep == scrub_level_t::deep) ? "deep " : ""),
+ scrub_sched_status.m_duration_seconds);
+ }
+ switch (scrub_sched_status.m_sched_status) {
+ case pg_scrub_sched_status_t::unknown:
+ // no reported scrub schedule yet
+ return "--"s;
+ case pg_scrub_sched_status_t::not_queued:
+ return "no scrub is scheduled"s;
+ case pg_scrub_sched_status_t::scheduled:
+ return fmt::format(
+ "{} {}scrub scheduled @ {}",
+ (scrub_sched_status.m_is_periodic ? "periodic" : "user requested"),
+ ((scrub_sched_status.m_is_deep == scrub_level_t::deep) ? "deep " : ""),
+ scrub_sched_status.m_scheduled_at);
+ case pg_scrub_sched_status_t::queued:
+ return fmt::format(
+ "queued for {}scrub",
+ ((scrub_sched_status.m_is_deep == scrub_level_t::deep) ? "deep " : ""));
+ default:
+ // a bug!
+ return "SCRUB STATE MISMATCH!"s;
+ }
+}
+
+bool operator==(const pg_scrubbing_status_t& l, const pg_scrubbing_status_t& r)
+{
+ return
+ l.m_sched_status == r.m_sched_status &&
+ l.m_scheduled_at == r.m_scheduled_at &&
+ l.m_duration_seconds == r.m_duration_seconds &&
+ l.m_is_active == r.m_is_active &&
+ l.m_is_deep == r.m_is_deep &&
+ l.m_is_periodic == r.m_is_periodic;
+}
+
void pg_stat_t::encode(ceph::buffer::list &bl) const
{
ENCODE_START(27, 22, bl);
encode(manifest_stats_invalid, bl);
encode(avail_no_missing, bl);
encode(object_location_counts, bl);
- encode(scrub_duration, bl);
+ encode(last_scrub_duration, bl);
+ encode(scrub_sched_status.m_scheduled_at, bl);
+ encode(scrub_sched_status.m_duration_seconds, bl);
+ encode((__u16)scrub_sched_status.m_sched_status, bl);
+ encode(scrub_sched_status.m_is_active, bl);
+ encode((scrub_sched_status.m_is_deep==scrub_level_t::deep), bl);
+ encode(scrub_sched_status.m_is_periodic, bl);
ENCODE_FINISH(bl);
}
decode(object_location_counts, bl);
}
if (struct_v >= 27) {
- decode(scrub_duration, bl);
+ decode(last_scrub_duration, bl);
+ decode(scrub_sched_status.m_scheduled_at, bl);
+ decode(scrub_sched_status.m_duration_seconds, bl);
+ __u16 scrub_sched_as_u16;
+ decode(scrub_sched_as_u16, bl);
+ scrub_sched_status.m_sched_status = (pg_scrub_sched_status_t)(scrub_sched_as_u16);
+ decode(tmp, bl);
+ scrub_sched_status.m_is_active = tmp;
+ decode(tmp, bl);
+ scrub_sched_status.m_is_deep = tmp ? scrub_level_t::deep : scrub_level_t::shallow;
+ decode(tmp, bl);
+ scrub_sched_status.m_is_periodic = tmp;
}
}
DECODE_FINISH(bl);
a.last_deep_scrub = eversion_t(13, 14);
a.last_deep_scrub_stamp = utime_t(15, 16);
a.last_clean_scrub_stamp = utime_t(17, 18);
- a.scrub_duration = 0.003;
+ a.last_scrub_duration = 3617;
a.snaptrimq_len = 1048576;
list<object_stat_collection_t*> l;
object_stat_collection_t::generate_test_instances(l);
l.manifest_stats_invalid == r.manifest_stats_invalid &&
l.purged_snaps == r.purged_snaps &&
l.snaptrimq_len == r.snaptrimq_len &&
- l.scrub_duration == r.scrub_duration;
+ l.last_scrub_duration == r.last_scrub_duration &&
+ l.scrub_sched_status == r.scrub_sched_status;
}
// -- store_statfs_t --
return l.sum == r.sum;
}
+enum class scrub_level_t : bool { shallow = false, deep = true };
+enum class scrub_type_t : bool { not_repair = false, do_repair = true };
+
+/// is there a scrub in our future?
+enum class pg_scrub_sched_status_t : uint16_t {
+ unknown, ///< status not reported yet
+ not_queued, ///< not in the OSD's scrub queue. Probably not active.
+ active, ///< scrubbing
+ scheduled, ///< scheduled for a scrub at an already determined time
+ queued ///< queued to be scrubbed
+};
+
+struct pg_scrubbing_status_t {
+ utime_t m_scheduled_at{};
+ int32_t m_duration_seconds{0}; // relevant when scrubbing
+ pg_scrub_sched_status_t m_sched_status{pg_scrub_sched_status_t::unknown};
+ bool m_is_active{false};
+ scrub_level_t m_is_deep{scrub_level_t::shallow};
+ bool m_is_periodic{true};
+};
+
+bool operator==(const pg_scrubbing_status_t& l, const pg_scrubbing_status_t& r);
/** pg_stat
* aggregate stats for a single PG.
utime_t last_scrub_stamp;
utime_t last_deep_scrub_stamp;
utime_t last_clean_scrub_stamp;
+ int32_t last_scrub_duration{0};
object_stat_collection_t stats;
// absurd already, so cap it to 2^32 and save 4 bytes at the same time
uint32_t snaptrimq_len;
+ pg_scrubbing_status_t scrub_sched_status;
+
bool stats_invalid:1;
/// true if num_objects_dirty is not accurate (because it was not
/// maintained starting from pool creation)
bool pin_stats_invalid:1;
bool manifest_stats_invalid:1;
- double scrub_duration;
-
pg_stat_t()
: reported_seq(0),
reported_epoch(0),
hitset_stats_invalid(false),
hitset_bytes_stats_invalid(false),
pin_stats_invalid(false),
- manifest_stats_invalid(false),
- scrub_duration(0)
+ manifest_stats_invalid(false)
{ }
epoch_t get_effective_last_epoch_clean() const {
bool is_acting_osd(int32_t osd, bool primary) const;
void dump(ceph::Formatter *f) const;
void dump_brief(ceph::Formatter *f) const;
+ std::string dump_scrub_schedule() const;
void encode(ceph::buffer::list &bl) const;
void decode(ceph::buffer::list::const_iterator &bl);
static void generate_test_instances(std::list<pg_stat_t*>& o);
WRITE_CLASS_ENCODER_FEATURES(PushOp)
std::ostream& operator<<(std::ostream& out, const PushOp &op);
-enum class scrub_level_t : bool { shallow = false, deep = true };
-enum class scrub_type_t : bool { not_repair = false, do_repair = true };
-
/*
* summarize pg contents for purposes of a scrub
*/
// vim: ts=8 sw=2 smarttab
#include "./osd_scrub_sched.h"
-#include "include/utime.h"
+#include "include/utime_fmt.h"
#include "osd/OSD.h"
+#include "osd/osd_types_fmt.h"
#include "pg_scrubber.h"
<< registration_state() << dendl;
}
+std::string ScrubQueue::ScrubJob::scheduling_state(utime_t now_is,
+ bool is_deep_expected) const
+{
+ // if not in the OSD scheduling queues, not a candidate for scrubbing
+ if (state != qu_state_t::registered) {
+ return "no scrub is scheduled";
+ }
+
+ // if the time has passed, we are surely in the queue
+ // (note that for now we do not tell client if 'penalized')
+ if (now_is > schedule.scheduled_at) {
+ // we are never sure that the next scrub will indeed be shallow:
+ return fmt::format("queued for {}scrub", (is_deep_expected ? "deep " : ""));
+ }
+
+ return fmt::format("{}scrub scheduled @ {}", (is_deep_expected ? "deep " : ""),
+ schedule.scheduled_at);
+}
+
+
// ////////////////////////////////////////////////////////////////////////// //
// ScrubQueue
: " not-queued";
}
+ /**
+ * a text description of the "scheduling intentions" of this PG:
+ * are we already scheduled for a scrub/deep scrub? when?
+ */
+ std::string scheduling_state(utime_t now_is, bool is_deep_expected) const;
+
friend std::ostream& operator<<(std::ostream& out, const ScrubJob& pg);
};
dout(20) << __func__ << " pg(" << m_pg_id << ") planned:" << req_flags << dendl;
update_scrub_job(req_flags);
+ m_pg->publish_stats_to_osd();
}
m_start = m_pg->info.pgid.pgid.get_hobj_start();
m_active = true;
+ m_pg->publish_stats_to_osd();
}
void PgScrubber::on_replica_init()
* is cleared once scrubbing starts; Some of the values dumped here are
* thus transitory.
*/
-void PgScrubber::dump(ceph::Formatter* f) const
+void PgScrubber::dump_scrubber(ceph::Formatter* f,
+ const requested_scrub_t& request_flags) const
{
f->open_object_section("scrubber");
+
+ if (m_active) { // TBD replace with PR#42780's test
+ f->dump_bool("active", true);
+ dump_active_scrubber(f, state_test(PG_STATE_DEEP_SCRUB));
+ } else {
+ f->dump_bool("active", false);
+ f->dump_bool("must_scrub",
+ (m_pg->m_planned_scrub.must_scrub || m_flags.required));
+ f->dump_bool("must_deep_scrub", request_flags.must_deep_scrub);
+ f->dump_bool("must_repair", request_flags.must_repair);
+ f->dump_bool("need_auto", request_flags.need_auto);
+
+ f->dump_stream("scrub_reg_stamp") << m_scrub_job->get_sched_time();
+
+ // note that we are repeating logic that is coded elsewhere (currently PG.cc).
+ // This is not optimal.
+ bool deep_expected = (ceph_clock_now() >= m_pg->next_deepscrub_interval()) ||
+ request_flags.must_deep_scrub || request_flags.need_auto;
+ auto sched_state =
+ m_scrub_job->scheduling_state(ceph_clock_now(), deep_expected);
+ f->dump_string("schedule", sched_state);
+ }
+
+ f->close_section();
+}
+
+void PgScrubber::dump_active_scrubber(ceph::Formatter* f, bool is_deep) const
+{
f->dump_stream("epoch_start") << m_interval_start;
- f->dump_bool("active", m_active);
- if (m_active) {
- f->dump_stream("start") << m_start;
- f->dump_stream("end") << m_end;
- f->dump_stream("m_max_end") << m_max_end;
- f->dump_stream("subset_last_update") << m_subset_last_update;
- f->dump_bool("deep", m_is_deep);
- f->dump_bool("must_scrub", (m_pg->m_planned_scrub.must_scrub || m_flags.required));
- f->dump_bool("must_deep_scrub", m_pg->m_planned_scrub.must_deep_scrub);
- f->dump_bool("must_repair", m_pg->m_planned_scrub.must_repair);
- f->dump_bool("need_auto", m_pg->m_planned_scrub.need_auto);
- f->dump_bool("req_scrub", m_flags.required);
- f->dump_bool("time_for_deep", m_pg->m_planned_scrub.time_for_deep);
- f->dump_bool("auto_repair", m_flags.auto_repair);
- f->dump_bool("check_repair", m_flags.check_repair);
- f->dump_bool("deep_scrub_on_error", m_flags.deep_scrub_on_error);
- f->dump_stream("scrub_reg_stamp") << m_scrub_job->get_sched_time(); // utime_t
- f->dump_unsigned("priority", m_flags.priority);
- f->dump_int("shallow_errors", m_shallow_errors);
- f->dump_int("deep_errors", m_deep_errors);
- f->dump_int("fixed", m_fixed_count);
- {
- f->open_array_section("waiting_on_whom");
- for (const auto& p : m_maps_status.get_awaited()) {
- f->dump_stream("shard") << p;
- }
- f->close_section();
+ f->dump_stream("start") << m_start;
+ f->dump_stream("end") << m_end;
+ f->dump_stream("max_end") << m_max_end;
+ f->dump_stream("subset_last_update") << m_subset_last_update;
+ // note that m_is_deep will be set some time after PG_STATE_DEEP_SCRUB is
+ // asserted. Thus, using the latter.
+ f->dump_bool("deep", is_deep);
+
+ // dump the scrub-type flags
+ f->dump_bool("req_scrub", m_flags.required);
+ f->dump_bool("auto_repair", m_flags.auto_repair);
+ f->dump_bool("check_repair", m_flags.check_repair);
+ f->dump_bool("deep_scrub_on_error", m_flags.deep_scrub_on_error);
+ f->dump_unsigned("priority", m_flags.priority);
+
+ f->dump_int("shallow_errors", m_shallow_errors);
+ f->dump_int("deep_errors", m_deep_errors);
+ f->dump_int("fixed", m_fixed_count);
+ {
+ f->open_array_section("waiting_on_whom");
+ for (const auto& p : m_maps_status.get_awaited()) {
+ f->dump_stream("shard") << p;
}
+ f->close_section();
}
- f->close_section();
+ f->dump_string("schedule", "scrubbing");
}
+pg_scrubbing_status_t PgScrubber::get_schedule() const
+{
+ dout(25) << __func__ << dendl;
+
+ if (!m_scrub_job) {
+ return pg_scrubbing_status_t{};
+ }
+
+ auto now_is = ceph_clock_now();
+
+ if (m_active) {
+ // report current scrub info, including updated duration
+ int32_t duration = (utime_t{now_is} - scrub_begin_stamp).sec();
+
+ return pg_scrubbing_status_t{
+ utime_t{},
+ duration,
+ pg_scrub_sched_status_t::active,
+ true, // active
+ (m_is_deep ? scrub_level_t::deep : scrub_level_t::shallow),
+ false /* is periodic? unknown, actually */};
+ }
+ if (m_scrub_job->state != ScrubQueue::qu_state_t::registered) {
+ return pg_scrubbing_status_t{utime_t{},
+ 0,
+ pg_scrub_sched_status_t::not_queued,
+ false,
+ scrub_level_t::shallow,
+ false};
+ }
+
+ // Will next scrub surely be a deep one? note that deep-scrub might be
+ // selected even if we report a regular scrub here.
+ bool deep_expected = (now_is >= m_pg->next_deepscrub_interval()) ||
+ m_pg->m_planned_scrub.must_deep_scrub ||
+ m_pg->m_planned_scrub.need_auto;
+ scrub_level_t expected_level =
+ deep_expected ? scrub_level_t::deep : scrub_level_t::shallow;
+ bool periodic = !m_pg->m_planned_scrub.must_scrub &&
+ !m_pg->m_planned_scrub.need_auto &&
+ !m_pg->m_planned_scrub.must_deep_scrub;
+
+ // are we ripe for scrubbing?
+ if (now_is > m_scrub_job->schedule.scheduled_at) {
+ // we are waiting for our turn at the OSD.
+ return pg_scrubbing_status_t{m_scrub_job->schedule.scheduled_at,
+ 0,
+ pg_scrub_sched_status_t::queued,
+ false,
+ expected_level,
+ periodic};
+ }
+
+ return pg_scrubbing_status_t{m_scrub_job->schedule.scheduled_at,
+ 0,
+ pg_scrub_sched_status_t::scheduled,
+ false,
+ expected_level,
+ periodic};
+}
void PgScrubber::handle_query_state(ceph::Formatter* f)
{
f->dump_bool("scrubber.active", m_active);
f->dump_stream("scrubber.start") << m_start;
f->dump_stream("scrubber.end") << m_end;
- f->dump_stream("scrubber.m_max_end") << m_max_end;
- f->dump_stream("scrubber.m_subset_last_update") << m_subset_last_update;
+ f->dump_stream("scrubber.max_end") << m_max_end;
+ f->dump_stream("scrubber.subset_last_update") << m_subset_last_update;
f->dump_bool("scrubber.deep", m_is_deep);
{
f->open_array_section("scrubber.waiting_on_whom");
scrub_begin_stamp = ceph_clock_now();
}
-void PgScrubber::set_scrub_duration() {
- utime_t stamp = ceph_clock_now();
- utime_t duration = stamp - scrub_begin_stamp;
- m_pg->recovery_state.update_stats(
- [=](auto &history, auto &stats) {
- stats.scrub_duration = double(duration);
- return true;
- });
+void PgScrubber::set_scrub_duration()
+{
+ utime_t stamp = ceph_clock_now();
+ utime_t duration = stamp - scrub_begin_stamp;
+ m_pg->recovery_state.update_stats([=](auto& history, auto& stats) {
+ stats.last_scrub_duration = ceill(duration.to_msec()/1000.0);
+ return true;
+ });
}
void PgScrubber::reserve_replicas()
state_clear(PG_STATE_DEEP_SCRUB);
reset_internal_state();
-
- m_pg->publish_stats_to_osd();
}
/*
void handle_query_state(ceph::Formatter* f) final;
- void dump(ceph::Formatter* f) const override;
+ pg_scrubbing_status_t get_schedule() const final;
+
+ void dump_scrubber(ceph::Formatter* f,
+ const requested_scrub_t& request_flags) const final;
// used if we are a replica
void run_callbacks();
+ // 'query' command data for an active scrub
+ void dump_active_scrubber(ceph::Formatter* f, bool is_deep) const;
+
// ----- methods used to verify the relevance of incoming events:
/**
virtual void handle_query_state(ceph::Formatter* f) = 0;
- virtual void dump(ceph::Formatter* f) const = 0;
+ virtual pg_scrubbing_status_t get_schedule() const = 0;
+
+ virtual void dump_scrubber(ceph::Formatter* f,
+ const requested_scrub_t& request_flags) const = 0;
/**
* Return true if soid is currently being scrubbed and pending IOs should block.