From a86ead953dc5fa2c78a4fe86700b0c1aba2727af Mon Sep 17 00:00:00 2001 From: Sridhar Seshasayee Date: Tue, 15 Mar 2022 01:38:57 +0530 Subject: [PATCH] osd: Add snaptrim duration to pg dump stats. Add the snaptrim duration to the json formatted output of the pg dump stats. Define methods for a PG to set the snaptrim begin time and then to calculate the total time spent to trim all the objects for the snaps in the snap_trimq for the PG. Tests: - Librados C and C++ API tests to verify the time spent for a snaptrim operation on a PG. These tests use the self-managed snaps APIs. - Standalone tests to verify snaptrim duration using rados pool snaps. Signed-off-by: Sridhar Seshasayee --- qa/standalone/misc/test-snaptrim-stats.sh | 82 ++++++++++++++++++++++- src/osd/PG.h | 15 +++++ src/osd/PeeringState.cc | 1 + src/osd/PrimaryLogPG.cc | 2 + src/osd/osd_types.cc | 7 +- src/osd/osd_types.h | 2 + src/test/librados/snapshots_stats.cc | 24 ++++--- src/test/librados/snapshots_stats_cxx.cc | 24 ++++--- 8 files changed, 137 insertions(+), 20 deletions(-) diff --git a/qa/standalone/misc/test-snaptrim-stats.sh b/qa/standalone/misc/test-snaptrim-stats.sh index 71a6fced040..98b3e4fdd9c 100755 --- a/qa/standalone/misc/test-snaptrim-stats.sh +++ b/qa/standalone/misc/test-snaptrim-stats.sh @@ -88,18 +88,98 @@ function TEST_snaptrim_stats() { wait_for_clean || return 1 sleep $WAIT_FOR_UPDATE local objects_trimmed=0 + local snaptrim_duration_total=0.0 for i in $(seq 0 $(expr $PGNUM - 1)) do local pgid="${poolid}.${i}" objects_trimmed=$(expr $objects_trimmed + $(ceph pg $pgid query | \ jq '.info.stats.objects_trimmed')) + snaptrim_duration_total=`echo $snaptrim_duration_total + $(ceph pg \ + $pgid query | jq '.info.stats.snaptrim_duration') | bc` done - test $objects_trimmed -eq $objects || return 1 + echo "$snaptrim_duration_total > 0.0" | bc || return 1 teardown $dir || return 1 } +function TEST_snaptrim_stats_multiple_snaps() { + local dir=$1 + local poolname=test + local OSDS=3 + local PGNUM=8 + local PGPNUM=8 + local objects=10 + local WAIT_FOR_UPDATE=10 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=$OSDS || return 1 + run_mgr $dir x || return 1 + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd --osd_pool_default_pg_autoscale_mode=off || return 1 + done + + # disable scrubs + ceph osd set noscrub || return 1 + ceph osd set nodeep-scrub || return 1 + + # Create a pool + create_pool $poolname $PGNUM $PGPNUM + wait_for_clean || return 1 + poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }') + + # write a few objects + local TESTDATA="testdata.0" + dd if=/dev/urandom of=$TESTDATA bs=4096 count=1 + for i in `seq 1 $objects` + do + rados -p $poolname put obj${i} $TESTDATA + done + rm -f $TESTDATA + + # create snapshots, clones + NUMSNAPS=2 + for i in `seq 1 $NUMSNAPS` + do + rados -p $poolname mksnap snap${i} + TESTDATA="testdata".${i} + dd if=/dev/urandom of=$TESTDATA bs=4096 count=1 + for i in `seq 1 $objects` + do + rados -p $poolname put obj${i} $TESTDATA + done + rm -f $TESTDATA + done + + # remove the snapshots, should trigger snaptrim + local total_objects_trimmed=0 + for i in `seq 1 $NUMSNAPS` + do + rados -p $poolname rmsnap snap${i} + + # check for snaptrim stats + wait_for_clean || return 1 + sleep $WAIT_FOR_UPDATE + local objects_trimmed=0 + local snaptrim_duration_total=0.0 + for i in $(seq 0 $(expr $PGNUM - 1)) + do + local pgid="${poolid}.${i}" + objects_trimmed=$(expr $objects_trimmed + $(ceph pg $pgid query | \ + jq '.info.stats.objects_trimmed')) + snaptrim_duration_total=`echo $snaptrim_duration_total + $(ceph pg \ + $pgid query | jq '.info.stats.snaptrim_duration') | bc` + done + test $objects_trimmed -eq $objects || return 1 + echo "$snaptrim_duration_total > 0.0" | bc || return 1 + total_objects_trimmed=$(expr $total_objects_trimmed + $objects_trimmed) + done + + test $total_objects_trimmed -eq $((objects * NUMSNAPS)) || return 1 + + teardown $dir || return 1 +} main test-snaptrim-stats "$@" # Local Variables: diff --git a/src/osd/PG.h b/src/osd/PG.h index 8ac58659326..753d26dd274 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -577,6 +577,21 @@ public: }); } + utime_t snaptrim_begin_stamp; + + void set_snaptrim_begin_stamp() { + snaptrim_begin_stamp = ceph_clock_now(); + } + + void set_snaptrim_duration() { + utime_t cur_stamp = ceph_clock_now(); + utime_t duration = cur_stamp - snaptrim_begin_stamp; + recovery_state.update_stats_wo_resched( + [=](auto &history, auto &stats) { + stats.snaptrim_duration = double(duration); + }); + } + unsigned get_target_pg_log_entries() const override; void clear_publish_stats() override; diff --git a/src/osd/PeeringState.cc b/src/osd/PeeringState.cc index 2101449867c..0158ed92da6 100644 --- a/src/osd/PeeringState.cc +++ b/src/osd/PeeringState.cc @@ -3239,6 +3239,7 @@ void PeeringState::split_into( info.stats.stats_invalid = true; child->info.stats.stats_invalid = true; child->info.stats.objects_trimmed = 0; + child->info.stats.snaptrim_duration = 0.0; child->info.last_epoch_started = info.last_epoch_started; child->info.last_interval_started = info.last_interval_started; diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index 6b32ab42a54..ef6a980fd01 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -4904,6 +4904,7 @@ void PrimaryLogPG::kick_snap_trim() } else { dout(10) << __func__ << ": clean and snaps to trim, kicking" << dendl; reset_objects_trimmed(); + set_snaptrim_begin_stamp(); snap_trimmer_machine.process_event(KickTrim()); } } @@ -15649,6 +15650,7 @@ boost::statechart::result PrimaryLogPG::AwaitAsyncWork::react(const DoSnapWork&) pg->recovery_state.share_pg_info(); } post_event(KickTrim()); + pg->set_snaptrim_duration(); return transit< NotTrimming >(); } ceph_assert(!to_trim.empty()); diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index 7291ccab5a4..6d9cb19d832 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -2875,6 +2875,7 @@ void pg_stat_t::dump(Formatter *f) const f->dump_string("scrub_schedule", dump_scrub_schedule()); f->dump_float("scrub_duration", scrub_duration); f->dump_int("objects_trimmed", objects_trimmed); + f->dump_float("snaptrim_duration", snaptrim_duration); stats.dump(f); f->open_array_section("up"); for (auto p = up.cbegin(); p != up.cend(); ++p) @@ -3028,6 +3029,7 @@ void pg_stat_t::encode(ceph::buffer::list &bl) const encode(objects_scrubbed, bl); encode(scrub_duration, bl); encode(objects_trimmed, bl); + encode(snaptrim_duration, bl); ENCODE_FINISH(bl); } @@ -3121,6 +3123,7 @@ void pg_stat_t::decode(ceph::buffer::list::const_iterator &bl) } if (struct_v >= 28) { decode(objects_trimmed, bl); + decode(snaptrim_duration, bl); } } DECODE_FINISH(bl); @@ -3159,6 +3162,7 @@ void pg_stat_t::generate_test_instances(list& o) a.snaptrimq_len = 1048576; a.objects_scrubbed = 0; a.objects_trimmed = 0; + a.snaptrim_duration = 0.123; list l; object_stat_collection_t::generate_test_instances(l); a.stats = *l.back(); @@ -3236,7 +3240,8 @@ bool operator==(const pg_stat_t& l, const pg_stat_t& r) l.scrub_sched_status == r.scrub_sched_status && l.objects_scrubbed == r.objects_scrubbed && l.scrub_duration == r.scrub_duration && - l.objects_trimmed == r.objects_trimmed; + l.objects_trimmed == r.objects_trimmed && + l.snaptrim_duration == r.snaptrim_duration; } // -- store_statfs_t -- diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index f29c40ae66f..54c47be3d2f 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -2274,6 +2274,7 @@ struct pg_stat_t { // absurd already, so cap it to 2^32 and save 4 bytes at the same time uint32_t snaptrimq_len; int64_t objects_trimmed; + double snaptrim_duration; pg_scrubbing_status_t scrub_sched_status; @@ -2301,6 +2302,7 @@ struct pg_stat_t { acting_primary(-1), snaptrimq_len(0), objects_trimmed(0), + snaptrim_duration(0.0), stats_invalid(false), dirty_stats_invalid(false), omap_stats_invalid(false), diff --git a/src/test/librados/snapshots_stats.cc b/src/test/librados/snapshots_stats.cc index 6a0fcaec48a..143299ab1a9 100644 --- a/src/test/librados/snapshots_stats.cc +++ b/src/test/librados/snapshots_stats.cc @@ -127,9 +127,9 @@ protected: } }; -int get_objects_trimmed(json_spirit::Object& pg_dump) { - int objs_trimmed = 0; - +void get_snaptrim_stats(json_spirit::Object& pg_dump, + int *objs_trimmed, + double *trim_duration) { // pg_map json_spirit::Object pgmap; for (json_spirit::Object::size_type i = 0; i < pg_dump.size(); ++i) { @@ -156,13 +156,13 @@ int get_objects_trimmed(json_spirit::Object& pg_dump) { for(json_spirit::Object::size_type k = 0; k < pg_stat.size(); ++k) { json_spirit::Pair& stats = pg_stat[k]; if (stats.name_ == "objects_trimmed") { - objs_trimmed += stats.value_.get_int(); - break; + *objs_trimmed += stats.value_.get_int(); + } + if (stats.name_ == "snaptrim_duration") { + *trim_duration += stats.value_.get_real(); } } } - - return objs_trimmed; } const int bufsize = 128; @@ -210,6 +210,7 @@ TEST_F(LibRadosSnapshotStatsSelfManaged, SnaptrimStats) { // Dump pg stats and determine if snaptrim stats are getting set int objects_trimmed = 0; + double snaptrim_duration = 0.0; int tries = 0; do { char *buf, *st; @@ -225,7 +226,7 @@ TEST_F(LibRadosSnapshotStatsSelfManaged, SnaptrimStats) { // pg dump object json_spirit::Object& obj = v.get_obj(); - objects_trimmed = get_objects_trimmed(obj); + get_snaptrim_stats(obj, &objects_trimmed, &snaptrim_duration); if (objects_trimmed < num_objs) { tries++; objects_trimmed = 0; @@ -236,6 +237,8 @@ TEST_F(LibRadosSnapshotStatsSelfManaged, SnaptrimStats) { // final check for objects trimmed ASSERT_EQ(objects_trimmed, num_objs); + std::cout << "Snaptrim duration: " << snaptrim_duration << std::endl; + ASSERT_GT(snaptrim_duration, 0.0); // clean-up remaining objects for (int i = 0; i < num_objs; ++i) { @@ -289,6 +292,7 @@ TEST_F(LibRadosSnapshotStatsSelfManagedEC, SnaptrimStats) { // Dump pg stats and determine if snaptrim stats are getting set int objects_trimmed = 0; + double snaptrim_duration = 0.0; int tries = 0; do { char *buf, *st; @@ -304,7 +308,7 @@ TEST_F(LibRadosSnapshotStatsSelfManagedEC, SnaptrimStats) { // pg dump object json_spirit::Object& obj = v.get_obj(); - objects_trimmed = get_objects_trimmed(obj); + get_snaptrim_stats(obj, &objects_trimmed, &snaptrim_duration); if (objects_trimmed != num_objs) { tries++; objects_trimmed = 0; @@ -315,6 +319,8 @@ TEST_F(LibRadosSnapshotStatsSelfManagedEC, SnaptrimStats) { // final check for objects trimmed ASSERT_EQ(objects_trimmed, num_objs); + std::cout << "Snaptrim duration: " << snaptrim_duration << std::endl; + ASSERT_GT(snaptrim_duration, 0.0); // clean-up remaining objects for (int i = 0; i < num_objs; ++i) { diff --git a/src/test/librados/snapshots_stats_cxx.cc b/src/test/librados/snapshots_stats_cxx.cc index d7106f6b3af..f6be3a915df 100644 --- a/src/test/librados/snapshots_stats_cxx.cc +++ b/src/test/librados/snapshots_stats_cxx.cc @@ -119,9 +119,9 @@ protected: } }; -int get_objects_trimmed(json_spirit::Object& pg_dump) { - int objs_trimmed = 0; - +void get_snaptrim_stats(json_spirit::Object& pg_dump, + int *objs_trimmed, + double *trim_duration) { // pg_map json_spirit::Object pgmap; for (json_spirit::Object::size_type i = 0; i < pg_dump.size(); ++i) { @@ -148,13 +148,13 @@ int get_objects_trimmed(json_spirit::Object& pg_dump) { for(json_spirit::Object::size_type k = 0; k < pg_stat.size(); ++k) { json_spirit::Pair& stats = pg_stat[k]; if (stats.name_ == "objects_trimmed") { - objs_trimmed += stats.value_.get_int(); - break; + *objs_trimmed += stats.value_.get_int(); + } + if (stats.name_ == "snaptrim_duration") { + *trim_duration += stats.value_.get_real(); } } } - - return objs_trimmed; } const int bufsize = 128; @@ -203,6 +203,7 @@ TEST_F(LibRadosSnapshotStatsSelfManagedPP, SnaptrimStatsPP) { // Dump pg stats and determine if snaptrim stats are getting set int objects_trimmed = 0; + double snaptrim_duration = 0.0; int tries = 0; do { string cmd = string("{\"prefix\": \"pg dump\",\"format\":\"json\"}"); @@ -215,7 +216,7 @@ TEST_F(LibRadosSnapshotStatsSelfManagedPP, SnaptrimStatsPP) { // pg_map json_spirit::Object& obj = v.get_obj(); - objects_trimmed = get_objects_trimmed(obj); + get_snaptrim_stats(obj, &objects_trimmed, &snaptrim_duration); if (objects_trimmed < num_objs) { tries++; objects_trimmed = 0; @@ -226,6 +227,8 @@ TEST_F(LibRadosSnapshotStatsSelfManagedPP, SnaptrimStatsPP) { // final check for objects trimmed ASSERT_EQ(objects_trimmed, num_objs); + std::cout << "Snaptrim duration: " << snaptrim_duration << std::endl; + ASSERT_GT(snaptrim_duration, 0.0); // clean-up remaining objects ioctx.snap_set_read(librados::SNAP_HEAD); @@ -282,6 +285,7 @@ TEST_F(LibRadosSnapshotStatsSelfManagedECPP, SnaptrimStatsECPP) { // Dump pg stats and determine if snaptrim stats are getting set int objects_trimmed = 0; + double snaptrim_duration = 0.0; int tries = 0; do { string cmd = string("{\"prefix\": \"pg dump\",\"format\":\"json\"}"); @@ -294,7 +298,7 @@ TEST_F(LibRadosSnapshotStatsSelfManagedECPP, SnaptrimStatsECPP) { // pg_map json_spirit::Object& obj = v.get_obj(); - objects_trimmed = get_objects_trimmed(obj); + get_snaptrim_stats(obj, &objects_trimmed, &snaptrim_duration); if (objects_trimmed < num_objs) { tries++; objects_trimmed = 0; @@ -305,6 +309,8 @@ TEST_F(LibRadosSnapshotStatsSelfManagedECPP, SnaptrimStatsECPP) { // final check for objects trimmed ASSERT_EQ(objects_trimmed, num_objs); + std::cout << "Snaptrim duration: " << snaptrim_duration << std::endl; + ASSERT_GT(snaptrim_duration, 0.0); // clean-up remaining objects ioctx.snap_set_read(LIBRADOS_SNAP_HEAD); -- 2.39.5