From: David Zafman Date: Thu, 11 Apr 2019 01:45:32 +0000 (-0700) Subject: osd: For recovery track OSDs that have 0 missing to know how degraded we are X-Git-Tag: v15.1.0~2828^2~8 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=f0e7202cffed6e4adf54513891f1deeea2341d81;p=ceph.git osd: For recovery track OSDs that have 0 missing to know how degraded we are Add backfill priority log message and remove redundanacy to recovery priority Fixes: https://tracker.ceph.com/issues/39099 Signed-off-by: David Zafman --- diff --git a/src/osd/PG.cc b/src/osd/PG.cc index ffdce25ab22..b4ad94181d6 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -2471,15 +2471,23 @@ inline int PG::clamp_recovery_priority(int priority) unsigned PG::get_recovery_priority() { // a higher value -> a higher priority - int64_t ret = 0; + int ret = OSD_RECOVERY_PRIORITY_BASE; if (state & PG_STATE_FORCED_RECOVERY) { ret = OSD_RECOVERY_PRIORITY_FORCED; } else { - pool.info.opts.get(pool_opts_t::RECOVERY_PRIORITY, &ret); - ret = clamp_recovery_priority(OSD_RECOVERY_PRIORITY_BASE + ret); + // XXX: This priority boost isn't so much about inactive, but about data-at-risk + if (is_degraded() && info.stats.avail_no_missing.size() < pool.info.min_size) { + // inactive: no. of replicas < min_size, highest priority since it blocks IO + ret = OSD_RECOVERY_INACTIVE_PRIORITY_BASE + (pool.info.min_size - info.stats.avail_no_missing.size()); + } + + int64_t pool_recovery_priority = 0; + pool.info.opts.get(pool_opts_t::RECOVERY_PRIORITY, &pool_recovery_priority); + + ret = clamp_recovery_priority(pool_recovery_priority + ret); } - dout(20) << __func__ << " recovery priority for " << *this << " is " << ret << ", state is " << state << dendl; + dout(20) << __func__ << " recovery priority is " << ret << dendl; return static_cast(ret); } @@ -2511,6 +2519,7 @@ unsigned PG::get_backfill_priority() ret = clamp_recovery_priority(pool_recovery_priority + ret); } + dout(20) << __func__ << " backfill priority is " << ret << dendl; return static_cast(ret); } @@ -3201,6 +3210,7 @@ void PG::_update_calc_stats() info.stats.stats.sum.num_objects_degraded = 0; info.stats.stats.sum.num_objects_unfound = 0; info.stats.stats.sum.num_objects_misplaced = 0; + info.stats.avail_no_missing.clear(); if ((is_remapped() || is_undersized() || !is_clean()) && (is_peered() || is_activating())) { dout(20) << __func__ << " actingset " << actingset << " upset " @@ -3234,6 +3244,8 @@ void PG::_update_calc_stats() acting_source_objects.emplace(missing, pg_whoami); } info.stats.stats.sum.num_objects_missing_on_primary = missing; + if (missing == 0) + info.stats.avail_no_missing.push_back(pg_whoami); dout(20) << __func__ << " shard " << pg_whoami << " primary objects " << num_objects << " missing " << missing @@ -3267,6 +3279,8 @@ void PG::_update_calc_stats() acting_source_objects.emplace(missing, peer.first); } peer.second.stats.stats.sum.num_objects_missing = missing; + if (missing == 0) + info.stats.avail_no_missing.push_back(peer.first); dout(20) << __func__ << " shard " << peer.first << " objects " << peer_num_objects << " missing " << missing diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index cc3ec866e43..58848a39937 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -2667,6 +2667,10 @@ void pg_stat_t::dump(Formatter *f) const for (auto p = acting.cbegin(); p != acting.cend(); ++p) f->dump_int("osd", *p); f->close_section(); + f->open_array_section("avail_no_missing"); + for (auto p = avail_no_missing.cbegin(); p != avail_no_missing.cend(); ++p) + f->dump_stream("shard") << *p; + f->close_section(); f->open_array_section("blocked_by"); for (auto p = blocked_by.cbegin(); p != blocked_by.cend(); ++p) f->dump_int("osd", *p); @@ -2700,7 +2704,7 @@ void pg_stat_t::dump_brief(Formatter *f) const void pg_stat_t::encode(ceph::buffer::list &bl) const { - ENCODE_START(25, 22, bl); + ENCODE_START(26, 22, bl); encode(version, bl); encode(reported_seq, bl); encode(reported_epoch, bl); @@ -2746,6 +2750,7 @@ void pg_stat_t::encode(ceph::buffer::list &bl) const encode(top_state, bl); encode(purged_snaps, bl); encode(manifest_stats_invalid, bl); + encode(avail_no_missing, bl); ENCODE_FINISH(bl); } @@ -2753,7 +2758,7 @@ void pg_stat_t::decode(ceph::buffer::list::const_iterator &bl) { bool tmp; uint32_t old_state; - DECODE_START(25, bl); + DECODE_START(26, bl); decode(version, bl); decode(reported_seq, bl); decode(reported_epoch, bl); @@ -2816,6 +2821,9 @@ void pg_stat_t::decode(ceph::buffer::list::const_iterator &bl) } else { manifest_stats_invalid = true; } + if (struct_v >= 26) { + decode(avail_no_missing, bl); + } } DECODE_FINISH(bl); } @@ -2857,6 +2865,7 @@ void pg_stat_t::generate_test_instances(list& o) a.up.push_back(123); a.up_primary = 123; a.acting.push_back(456); + a.avail_no_missing.push_back(pg_shard_t(456, shard_id_t::NO_SHARD)); a.acting_primary = 456; o.push_back(new pg_stat_t(a)); @@ -2901,6 +2910,7 @@ bool operator==(const pg_stat_t& l, const pg_stat_t& r) l.ondisk_log_size == r.ondisk_log_size && l.up == r.up && l.acting == r.acting && + l.avail_no_missing == r.avail_no_missing && l.mapping_epoch == r.mapping_epoch && l.blocked_by == r.blocked_by && l.last_became_active == r.last_became_active && diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 1d59a85fd9d..85cdd89014b 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -83,6 +83,9 @@ /// base backfill priority for MBackfillReserve (inactive PG) #define OSD_BACKFILL_INACTIVE_PRIORITY_BASE 220 +/// base recovery priority for MRecoveryReserve (inactive PG) +#define OSD_RECOVERY_INACTIVE_PRIORITY_BASE 220 + /// max manually/automatically set recovery priority for MBackfillReserve #define OSD_RECOVERY_PRIORITY_MAX 253 @@ -2076,6 +2079,7 @@ struct pg_stat_t { int64_t ondisk_log_size; // >= active_log_size std::vector up, acting; + std::vector avail_no_missing; epoch_t mapping_epoch; std::vector blocked_by; ///< osds on which the pg is blocked