From 57abdb11fae9f94417e1ee6c9427a33f729a9703 Mon Sep 17 00:00:00 2001 From: David Zafman Date: Tue, 19 Mar 2019 13:55:36 -0700 Subject: [PATCH] osd, test: Add num_shards_repaired to osd_stat_t for pushes with repair set 3(3) Fixes: http://tracker.ceph.com/issues/38616 Signed-off-by: David Zafman --- qa/standalone/scrub/osd-scrub-repair.sh | 101 +++++++++++++++++++++++- src/messages/MOSDPGPush.h | 9 ++- src/osd/ECBackend.cc | 11 ++- src/osd/ECBackend.h | 3 +- src/osd/OSD.cc | 7 ++ src/osd/OSD.h | 1 + src/osd/PGBackend.h | 2 + src/osd/PrimaryLogPG.h | 6 ++ src/osd/ReplicatedBackend.cc | 16 +++- src/osd/ReplicatedBackend.h | 2 +- src/osd/osd_types.cc | 12 ++- src/osd/osd_types.h | 7 +- 12 files changed, 163 insertions(+), 14 deletions(-) diff --git a/qa/standalone/scrub/osd-scrub-repair.sh b/qa/standalone/scrub/osd-scrub-repair.sh index 2af971f157651..b62e2c086c813 100755 --- a/qa/standalone/scrub/osd-scrub-repair.sh +++ b/qa/standalone/scrub/osd-scrub-repair.sh @@ -498,7 +498,9 @@ function TEST_auto_repair_bluestore_failed_norecov() { function TEST_repair_stats() { local dir=$1 local poolname=testpool + local OSDS=2 local OBJS=30 + # This need to be an even number local REPAIRS=20 # Launch a cluster with 5 seconds scrub interval @@ -507,7 +509,7 @@ function TEST_repair_stats() { run_mgr $dir x || return 1 local ceph_osd_args="--osd_deep_scrub_randomize_ratio=0 \ --osd-scrub-interval-randomize-ratio=0" - for id in $(seq 0 2) ; do + for id in $(seq 0 $(expr $OSDS - 1)) ; do run_osd_bluestore $dir $id $ceph_osd_args || return 1 done @@ -530,20 +532,115 @@ function TEST_repair_stats() { local primary=$(get_primary $poolname obj1) kill_daemons $dir TERM osd.$other >&2 < /dev/null || return 1 + kill_daemons $dir TERM osd.$primary >&2 < /dev/null || return 1 + for i in $(seq 1 $REPAIRS) + do + # Remove from both osd.0 and osd.1 + OSD=$(expr $i % 2) + _objectstore_tool_nodown $dir $OSD obj$i remove || return 1 + done + run_osd_bluestore $dir $primary $ceph_osd_args || return 1 + run_osd_bluestore $dir $other $ceph_osd_args || return 1 + wait_for_clean || return 1 + + repair $pgid + wait_for_clean || return 1 + ceph pg dump pgs + + # This should have caused 1 object to be repaired + ceph pg $pgid query | jq '.info.stats.stat_sum' + COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired') + test "$COUNT" = "$REPAIRS" || return 1 + + ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $primary )" + COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $primary ).num_shards_repaired") + test "$COUNT" = "$(expr $REPAIRS / 2)" || return 1 + + ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $other )" + COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $other ).num_shards_repaired") + test "$COUNT" = "$(expr $REPAIRS / 2)" || return 1 + + ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum" + COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired") + test "$COUNT" = "$REPAIRS" || return 1 + + # Tear down + teardown $dir || return 1 +} + +function TEST_repair_stats_ec() { + local dir=$1 + local poolname=testpool + local OSDS=3 + local OBJS=30 + # This need to be an even number + local REPAIRS=26 + local allow_overwrites=false + + # Launch a cluster with 5 seconds scrub interval + setup $dir || return 1 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + local ceph_osd_args="--osd_deep_scrub_randomize_ratio=0 \ + --osd-scrub-interval-randomize-ratio=0" + for id in $(seq 0 $(expr $OSDS - 1)) ; do + run_osd_bluestore $dir $id $ceph_osd_args || return 1 + done + + # Create an EC pool + create_ec_pool $poolname $allow_overwrites k=2 m=1 || return 1 + + # Put an object + local payload=ABCDEF + echo $payload > $dir/ORIGINAL + for i in $(seq 1 $OBJS) + do + rados --pool $poolname put obj$i $dir/ORIGINAL || return 1 + done + + # Remove the object from one shard physically + # Restarted osd get $ceph_osd_args passed + local other=$(get_not_primary $poolname obj1) + local pgid=$(get_pg $poolname obj1) + local primary=$(get_primary $poolname obj1) + + kill_daemons $dir TERM osd.$other >&2 < /dev/null || return 1 + kill_daemons $dir TERM osd.$primary >&2 < /dev/null || return 1 for i in $(seq 1 $REPAIRS) do - _objectstore_tool_nodown $dir $other obj$i remove || return 1 + # Remove from both osd.0 and osd.1 + OSD=$(expr $i % 2) + _objectstore_tool_nodown $dir $OSD obj$i remove || return 1 done + run_osd_bluestore $dir $primary $ceph_osd_args || return 1 run_osd_bluestore $dir $other $ceph_osd_args || return 1 + wait_for_clean || return 1 repair $pgid wait_for_clean || return 1 ceph pg dump pgs # This should have caused 1 object to be repaired + ceph pg $pgid query | jq '.info.stats.stat_sum' COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired') test "$COUNT" = "$REPAIRS" || return 1 + for osd in $(seq 0 $(expr $OSDS - 1)) ; do + if [ $osd = $other -o $osd = $primary ]; then + repair=$(expr $REPAIRS / 2) + else + repair="0" + fi + + ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $osd )" + COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $osd ).num_shards_repaired") + test "$COUNT" = "$repair" || return 1 + done + + ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum" + COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired") + test "$COUNT" = "$REPAIRS" || return 1 + # Tear down teardown $dir || return 1 } diff --git a/src/messages/MOSDPGPush.h b/src/messages/MOSDPGPush.h index d7da913306728..3960ad70ea387 100644 --- a/src/messages/MOSDPGPush.h +++ b/src/messages/MOSDPGPush.h @@ -21,7 +21,7 @@ class MOSDPGPush : public MessageInstance { public: friend factory; private: - static constexpr int HEAD_VERSION = 3; + static constexpr int HEAD_VERSION = 4; static constexpr int COMPAT_VERSION = 2; public: @@ -29,6 +29,7 @@ public: spg_t pgid; epoch_t map_epoch = 0, min_epoch = 0; vector pushes; + bool is_repair = false; private: uint64_t cost; @@ -79,6 +80,11 @@ public: } else { min_epoch = map_epoch; } + if (header.version >= 4) { + decode(is_repair, p); + } else { + is_repair = false; + } } void encode_payload(uint64_t features) override { @@ -90,6 +96,7 @@ public: encode(pgid.shard, payload); encode(from, payload); encode(min_epoch, payload); + encode(is_repair, payload); } std::string_view get_type_name() const override { return "MOSDPGPush"; } diff --git a/src/osd/ECBackend.cc b/src/osd/ECBackend.cc index dfa1f4276b27e..36a77cc7417a9 100644 --- a/src/osd/ECBackend.cc +++ b/src/osd/ECBackend.cc @@ -285,7 +285,8 @@ struct RecoveryMessages { void ECBackend::handle_recovery_push( const PushOp &op, - RecoveryMessages *m) + RecoveryMessages *m, + bool is_repair) { if (get_parent()->check_failsafe_full()) { dout(10) << __func__ << " Out of space (failsafe) processing push request." << dendl; @@ -361,6 +362,8 @@ void ECBackend::handle_recovery_push( if ((get_parent()->pgb_is_primary())) { ceph_assert(recovery_ops.count(op.soid)); ceph_assert(recovery_ops[op.soid].obc); + if (get_parent()->pg_is_repair()) + get_parent()->inc_osd_stat_repaired(); get_parent()->on_local_recover( op.soid, op.recovery_info, @@ -368,6 +371,9 @@ void ECBackend::handle_recovery_push( false, &m->t); } else { + // If primary told us this is a repair, bump osd_stat_t::num_objects_repaired + if (is_repair) + get_parent()->inc_osd_stat_repaired(); get_parent()->on_local_recover( op.soid, op.recovery_info, @@ -517,6 +523,7 @@ void ECBackend::dispatch_recovery_messages(RecoveryMessages &m, int priority) msg->pgid = spg_t(get_parent()->get_info().pgid.pgid, i->first.shard); msg->pushes.swap(i->second); msg->compute_cost(cct); + msg->is_repair = get_parent()->pg_is_repair(); get_parent()->send_message( i->first.osd, msg); @@ -825,7 +832,7 @@ bool ECBackend::_handle_message( for (vector::const_iterator i = op->pushes.begin(); i != op->pushes.end(); ++i) { - handle_recovery_push(*i, &rm); + handle_recovery_push(*i, &rm, op->is_repair); } dispatch_recovery_messages(rm, priority); return true; diff --git a/src/osd/ECBackend.h b/src/osd/ECBackend.h index 89d5dcbcb5240..e003a08c73667 100644 --- a/src/osd/ECBackend.h +++ b/src/osd/ECBackend.h @@ -306,7 +306,8 @@ private: RecoveryMessages *m); void handle_recovery_push( const PushOp &op, - RecoveryMessages *m); + RecoveryMessages *m, + bool is_repair); void handle_recovery_push_reply( const PushReplyOp &op, pg_shard_t from, diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 020fb437294e1..d32c73b25d568 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -945,6 +945,13 @@ osd_stat_t OSDService::set_osd_stat(vector& hb_peers, return osd_stat; } +void OSDService::inc_osd_stat_repaired() +{ + std::lock_guard l(stat_lock); + osd_stat.num_shards_repaired++; + return; +} + float OSDService::compute_adjusted_ratio(osd_stat_t new_stat, float *pratio, uint64_t adjust_used) { diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 12ceefc9f5f0d..ba01a8eb46379 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -907,6 +907,7 @@ public: void set_statfs(const struct store_statfs_t &stbuf, osd_alert_list_t& alerts); osd_stat_t set_osd_stat(vector& hb_peers, int num_pgs); + void inc_osd_stat_repaired(void); float compute_adjusted_ratio(osd_stat_t new_stat, float *pratio, uint64_t adjust_used = 0); osd_stat_t get_osd_stat() { std::lock_guard l(stat_lock); diff --git a/src/osd/PGBackend.h b/src/osd/PGBackend.h index 75f895b1b39cb..fa1354c70b269 100644 --- a/src/osd/PGBackend.h +++ b/src/osd/PGBackend.h @@ -294,6 +294,8 @@ typedef std::shared_ptr OSDMapRef; virtual bool check_osdmap_full(const set &missing_on) = 0; + virtual bool pg_is_repair() = 0; + virtual void inc_osd_stat_repaired() = 0; virtual bool pg_is_remote_backfilling() = 0; virtual void pg_add_local_num_bytes(int64_t num_bytes) = 0; virtual void pg_sub_local_num_bytes(int64_t num_bytes) = 0; diff --git a/src/osd/PrimaryLogPG.h b/src/osd/PrimaryLogPG.h index e5f47b6a6c851..c0f4afb1846ad 100644 --- a/src/osd/PrimaryLogPG.h +++ b/src/osd/PrimaryLogPG.h @@ -402,6 +402,12 @@ public: release_object_locks(manager); } + bool pg_is_repair() override { + return is_repair(); + } + void inc_osd_stat_repaired() override { + osd->inc_osd_stat_repaired(); + } bool pg_is_remote_backfilling() override { return is_remote_backfilling(); } diff --git a/src/osd/ReplicatedBackend.cc b/src/osd/ReplicatedBackend.cc index 2602bbd24e31a..9614a58997343 100644 --- a/src/osd/ReplicatedBackend.cc +++ b/src/osd/ReplicatedBackend.cc @@ -750,7 +750,7 @@ void ReplicatedBackend::_do_push(OpRequestRef op) i != m->pushes.end(); ++i) { replies.push_back(PushReplyOp()); - handle_push(from, *i, &(replies.back()), &t); + handle_push(from, *i, &(replies.back()), &t, m->is_repair); } MOSDPGPushReply *reply = new MOSDPGPushReply; @@ -1726,8 +1726,10 @@ bool ReplicatedBackend::handle_pull_response( if (complete) { pi.stat.num_objects_recovered++; // XXX: This could overcount if regular recovery is needed right after a repair - if (get_parent()->pg_is_repair()) + if (get_parent()->pg_is_repair()) { pi.stat.num_objects_repaired++; + get_parent()->inc_osd_stat_repaired(); + } clear_pull_from(piter); to_continue->push_back({hoid, pi.stat}); get_parent()->on_local_recover( @@ -1743,7 +1745,7 @@ bool ReplicatedBackend::handle_pull_response( void ReplicatedBackend::handle_push( pg_shard_t from, const PushOp &pop, PushReplyOp *response, - ObjectStore::Transaction *t) + ObjectStore::Transaction *t, bool is_repair) { dout(10) << "handle_push " << pop.recovery_info @@ -1767,13 +1769,18 @@ void ReplicatedBackend::handle_push( pop.omap_entries, t); - if (complete) + if (complete) { + if (is_repair) { + get_parent()->inc_osd_stat_repaired(); + dout(20) << __func__ << " repair complete" << dendl; + } get_parent()->on_local_recover( pop.recovery_info.soid, pop.recovery_info, ObjectContextRef(), // ok, is replica false, t); + } } void ReplicatedBackend::send_pushes(int prio, map > &pushes) @@ -1796,6 +1803,7 @@ void ReplicatedBackend::send_pushes(int prio, map > & msg->map_epoch = get_osdmap_epoch(); msg->min_epoch = get_parent()->get_last_peering_reset_epoch(); msg->set_priority(prio); + msg->is_repair = get_parent()->pg_is_repair(); for (; (j != i->second.end() && cost < cct->_conf->osd_max_push_cost && diff --git a/src/osd/ReplicatedBackend.h b/src/osd/ReplicatedBackend.h index c5593080deed0..8f447495a4ed1 100644 --- a/src/osd/ReplicatedBackend.h +++ b/src/osd/ReplicatedBackend.h @@ -244,7 +244,7 @@ private: list *to_continue, ObjectStore::Transaction *t); void handle_push(pg_shard_t from, const PushOp &op, PushReplyOp *response, - ObjectStore::Transaction *t); + ObjectStore::Transaction *t, bool is_repair); static void trim_pushed_data(const interval_set ©_subset, const interval_set &intervals_received, diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index 3830167e248b1..af3f0d70a01c3 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -381,6 +381,7 @@ void osd_stat_t::dump(Formatter *f) const f->close_section(); f->dump_int("snap_trim_queue_len", snap_trim_queue_len); f->dump_int("num_snap_trimming", num_snap_trimming); + f->dump_int("num_shards_repaired", num_shards_repaired); f->open_object_section("op_queue_age_hist"); op_queue_age_hist.dump(f); f->close_section(); @@ -394,7 +395,7 @@ void osd_stat_t::dump(Formatter *f) const void osd_stat_t::encode(bufferlist &bl, uint64_t features) const { - ENCODE_START(10, 2, bl); + ENCODE_START(11, 2, bl); //////// for compatibility //////// int64_t kb = statfs.kb(); @@ -425,6 +426,7 @@ void osd_stat_t::encode(bufferlist &bl, uint64_t features) const encode(statfs, bl); /////////////////////////////////// encode(os_alerts, bl); + encode(num_shards_repaired, bl); ENCODE_FINISH(bl); } @@ -432,7 +434,7 @@ void osd_stat_t::decode(bufferlist::const_iterator &bl) { int64_t kb, kb_used,kb_avail; int64_t kb_used_data, kb_used_omap, kb_used_meta; - DECODE_START_LEGACY_COMPAT_LEN(10, 2, 2, bl); + DECODE_START_LEGACY_COMPAT_LEN(11, 2, 2, bl); decode(kb, bl); decode(kb_used, bl); decode(kb_avail, bl); @@ -487,6 +489,11 @@ void osd_stat_t::decode(bufferlist::const_iterator &bl) } else { os_alerts.clear(); } + if (struct_v >= 11) { + decode(num_shards_repaired, bl); + } else { + num_shards_repaired = 0; + } DECODE_FINISH(bl); } @@ -501,6 +508,7 @@ void osd_stat_t::generate_test_instances(std::list& o) o.back()->hb_peers.push_back(7); o.back()->snap_trim_queue_len = 8; o.back()->num_snap_trimming = 99; + o.back()->num_shards_repaired = 101; o.back()->os_alerts[0].emplace( "some alert", "some alert details"); o.back()->os_alerts[1].emplace( diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index fbcc71ba3474f..440d43357d388 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -2321,6 +2321,7 @@ struct osd_stat_t { store_statfs_t statfs; vector hb_peers; int32_t snap_trim_queue_len, num_snap_trimming; + uint64_t num_shards_repaired; pow2_hist_t op_queue_age_hist; @@ -2332,12 +2333,14 @@ struct osd_stat_t { uint32_t num_pgs = 0; - osd_stat_t() : snap_trim_queue_len(0), num_snap_trimming(0) {} + osd_stat_t() : snap_trim_queue_len(0), num_snap_trimming(0), + num_shards_repaired(0) {} void add(const osd_stat_t& o) { statfs.add(o.statfs); snap_trim_queue_len += o.snap_trim_queue_len; num_snap_trimming += o.num_snap_trimming; + num_shards_repaired += o.num_shards_repaired; op_queue_age_hist.add(o.op_queue_age_hist); os_perf_stat.add(o.os_perf_stat); num_pgs += o.num_pgs; @@ -2352,6 +2355,7 @@ struct osd_stat_t { statfs.sub(o.statfs); snap_trim_queue_len -= o.snap_trim_queue_len; num_snap_trimming -= o.num_snap_trimming; + num_shards_repaired -= o.num_shards_repaired; op_queue_age_hist.sub(o.op_queue_age_hist); os_perf_stat.sub(o.os_perf_stat); num_pgs -= o.num_pgs; @@ -2376,6 +2380,7 @@ inline bool operator==(const osd_stat_t& l, const osd_stat_t& r) { return l.statfs == r.statfs && l.snap_trim_queue_len == r.snap_trim_queue_len && l.num_snap_trimming == r.num_snap_trimming && + l.num_shards_repaired == r.num_shards_repaired && l.hb_peers == r.hb_peers && l.op_queue_age_hist == r.op_queue_age_hist && l.os_perf_stat == r.os_perf_stat && -- 2.39.5