Addition of a new column, SCRUB_DURATION, to the pg stats that stores the time taken for a PG scrub.
Fixes: https://tracker.ceph.com/issues/52605
Signed-off-by: Aishwarya Mathuria <amathuri@redhat.com>
teardown $dir || return 1
}
+function TEST_pg_dump_scrub_duration() {
+ local dir=$1
+ local poolname=test
+ local OSDS=3
+ local objects=15
+
+ TESTDATA="testdata.$$"
+
+ setup $dir || return 1
+ run_mon $dir a --osd_pool_default_size=$OSDS || return 1
+ run_mgr $dir x || return 1
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd || return 1
+ done
+
+ # Create a pool with a single pg
+ create_pool $poolname 1 1
+ wait_for_clean || return 1
+ poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }')
+
+ dd if=/dev/urandom of=$TESTDATA bs=1032 count=1
+ for i in `seq 1 $objects`
+ do
+ rados -p $poolname put obj${i} $TESTDATA
+ done
+ rm -f $TESTDATA
+
+ local pgid="${poolid}.0"
+ pg_scrub $pgid || return 1
+
+ ceph pg $pgid query | jq '.info.stats.scrub_duration'
+ test "$(ceph pg $pgid query | jq '.info.stats.scrub_duration')" '>' "0" || return 1
+
+ teardown $dir || return 1
+}
+
main osd-scrub-test "$@"
# Local Variables:
tab.define_column("LAST_DEEP_SCRUB", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("DEEP_SCRUB_STAMP", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("SNAPTRIMQ_LEN", TextTable::LEFT, TextTable::RIGHT);
+ tab.define_column("SCRUB_DURATION", TextTable::LEFT, TextTable::RIGHT);
}
for (auto i = pg_stats.begin();
<< st.last_deep_scrub
<< st.last_deep_scrub_stamp
<< st.snaptrimq_len
+ << st.scrub_duration
<< TextTable::endrow;
}
}
tab.define_column("ACTING", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("SCRUB_STAMP", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("DEEP_SCRUB_STAMP", TextTable::LEFT, TextTable::RIGHT);
+ tab.define_column("SCRUB_DURATION", TextTable::LEFT, TextTable::RIGHT);
for (auto i = pgs.begin(); i != pgs.end(); ++i) {
const pg_stat_t& st = pg_stat.at(*i);
<< actingstr.str()
<< st.last_scrub_stamp
<< st.last_deep_scrub_stamp
+ << st.scrub_duration
<< TextTable::endrow;
}
f->dump_bool("pin_stats_invalid", pin_stats_invalid);
f->dump_bool("manifest_stats_invalid", manifest_stats_invalid);
f->dump_unsigned("snaptrimq_len", snaptrimq_len);
+ f->dump_float("scrub_duration", scrub_duration);
stats.dump(f);
f->open_array_section("up");
for (auto p = up.cbegin(); p != up.cend(); ++p)
void pg_stat_t::encode(ceph::buffer::list &bl) const
{
- ENCODE_START(26, 22, bl);
+ ENCODE_START(27, 22, bl);
encode(version, bl);
encode(reported_seq, bl);
encode(reported_epoch, bl);
encode(manifest_stats_invalid, bl);
encode(avail_no_missing, bl);
encode(object_location_counts, bl);
+ encode(scrub_duration, bl);
ENCODE_FINISH(bl);
}
decode(avail_no_missing, bl);
decode(object_location_counts, bl);
}
+ if (struct_v >= 27) {
+ decode(scrub_duration, bl);
+ }
}
DECODE_FINISH(bl);
}
a.last_deep_scrub = eversion_t(13, 14);
a.last_deep_scrub_stamp = utime_t(15, 16);
a.last_clean_scrub_stamp = utime_t(17, 18);
+ a.scrub_duration = 0.003;
a.snaptrimq_len = 1048576;
list<object_stat_collection_t*> l;
object_stat_collection_t::generate_test_instances(l);
l.pin_stats_invalid == r.pin_stats_invalid &&
l.manifest_stats_invalid == r.manifest_stats_invalid &&
l.purged_snaps == r.purged_snaps &&
- l.snaptrimq_len == r.snaptrimq_len;
+ l.snaptrimq_len == r.snaptrimq_len &&
+ l.scrub_duration == r.scrub_duration;
}
// -- store_statfs_t --
bool pin_stats_invalid:1;
bool manifest_stats_invalid:1;
+ double scrub_duration;
+
pg_stat_t()
: reported_seq(0),
reported_epoch(0),
hitset_stats_invalid(false),
hitset_bytes_stats_invalid(false),
pin_stats_invalid(false),
- manifest_stats_invalid(false)
+ manifest_stats_invalid(false),
+ scrub_duration(0)
{ }
epoch_t get_effective_last_epoch_clean() const {
m_osds->get_nodeid());
}
+void PgScrubber::set_scrub_begin_time() {
+ scrub_begin_stamp = ceph_clock_now();
+}
+
+void PgScrubber::set_scrub_duration() {
+ utime_t stamp = ceph_clock_now();
+ utime_t duration = stamp - scrub_begin_stamp;
+ m_pg->recovery_state.update_stats(
+ [=](auto &history, auto &stats) {
+ stats.scrub_duration = double(duration);
+ return true;
+ });
+}
+
void PgScrubber::reserve_replicas()
{
dout(10) << __func__ << dendl;
std::string dump_awaited_maps() const final;
+ void set_scrub_begin_time() final;
+
+ void set_scrub_duration() final;
+
+ utime_t scrub_begin_stamp;
+
protected:
bool state_test(uint64_t m) const { return m_pg->state_test(m); }
void state_set(uint64_t m) { m_pg->state_set(m); }
dout(10) << "-- state -->> NotActive" << dendl;
}
+sc::result NotActive::react(const StartScrub&)
+{
+ dout(10) << "NotActive::react(const StartScrub&)" << dendl;
+ DECLARE_LOCALS;
+ scrbr->set_scrub_begin_time();
+ return transit<ReservingReplicas>();
+}
+
// ----------------------- ReservingReplicas ---------------------------------
ReservingReplicas::ReservingReplicas(my_context ctx) : my_base(ctx)
return discard_event();
}
+sc::result WaitDigestUpdate::react(const ScrubFinished&)
+{
+ DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
+ dout(10) << "WaitDigestUpdate::react(const ScrubFinished&)" << dendl;
+ scrbr->set_scrub_duration();
+ return transit<NotActive>();
+}
+
ScrubMachine::ScrubMachine(PG* pg, ScrubMachineListener* pg_scrub)
: m_pg{pg}, m_pg_id{pg->pg_id}, m_scrbr{pg_scrub}
{
struct NotActive : sc::state<NotActive, ScrubMachine> {
explicit NotActive(my_context ctx);
- using reactions = mpl::list<sc::transition<StartScrub, ReservingReplicas>,
+ using reactions = mpl::list<sc::custom_reaction<StartScrub>,
// a scrubbing that was initiated at recovery completion,
// and requires no resource reservations:
sc::transition<AfterRepairScrub, ReservingReplicas>,
sc::transition<StartReplica, ReplicaWaitUpdates>,
sc::transition<StartReplicaNoWait, ActiveReplica>>;
+ sc::result react(const StartScrub&);
};
struct ReservingReplicas : sc::state<ReservingReplicas, ScrubMachine> {
explicit WaitDigestUpdate(my_context ctx);
using reactions = mpl::list<sc::custom_reaction<DigestUpdate>,
- sc::transition<NextChunk, PendingTimer>,
- sc::transition<ScrubFinished, NotActive>>;
+ sc::custom_reaction<ScrubFinished>,
+ sc::transition<NextChunk, PendingTimer>>;
sc::result react(const DigestUpdate&);
+ sc::result react(const ScrubFinished&);
};
// ----------------------------- the "replica active" states -----------------------
virtual void unreserve_replicas() = 0;
+ virtual void set_scrub_begin_time() = 0;
+
+ virtual void set_scrub_duration() = 0;
+
/**
* No new scrub session will start while a scrub was initiate on a PG,
* and that PG is trying to acquire replica resources.