From c64327f3a85e29400b0b7b8ae40f5256d074966f Mon Sep 17 00:00:00 2001 From: David Zafman Date: Wed, 10 Apr 2019 18:45:32 -0700 Subject: [PATCH] osd: For recovery track OSDs that have 0 missing to know how degraded we are Add backfill priority log message and remove redundanacy to recovery priority Fixes: https://tracker.ceph.com/issues/39099 Signed-off-by: David Zafman (cherry picked from commit f0e7202cffed6e4adf54513891f1deeea2341d81) Conflicts: src/osd/osd_types.h - nautilus does not have 75014ceb1437c5bb48293574ec6f991e4bec64bb --- src/osd/PG.cc | 22 ++++++++++++++++++---- src/osd/osd_types.cc | 14 ++++++++++++-- src/osd/osd_types.h | 4 ++++ 3 files changed, 34 insertions(+), 6 deletions(-) diff --git a/src/osd/PG.cc b/src/osd/PG.cc index f7f536265c433..8e8fceafdc50e 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -2473,15 +2473,23 @@ inline int PG::clamp_recovery_priority(int priority) unsigned PG::get_recovery_priority() { // a higher value -> a higher priority - int64_t ret = 0; + int ret = OSD_RECOVERY_PRIORITY_BASE; if (state & PG_STATE_FORCED_RECOVERY) { ret = OSD_RECOVERY_PRIORITY_FORCED; } else { - pool.info.opts.get(pool_opts_t::RECOVERY_PRIORITY, &ret); - ret = clamp_recovery_priority(OSD_RECOVERY_PRIORITY_BASE + ret); + // XXX: This priority boost isn't so much about inactive, but about data-at-risk + if (is_degraded() && info.stats.avail_no_missing.size() < pool.info.min_size) { + // inactive: no. of replicas < min_size, highest priority since it blocks IO + ret = OSD_RECOVERY_INACTIVE_PRIORITY_BASE + (pool.info.min_size - info.stats.avail_no_missing.size()); + } + + int64_t pool_recovery_priority = 0; + pool.info.opts.get(pool_opts_t::RECOVERY_PRIORITY, &pool_recovery_priority); + + ret = clamp_recovery_priority(pool_recovery_priority + ret); } - dout(20) << __func__ << " recovery priority for " << *this << " is " << ret << ", state is " << state << dendl; + dout(20) << __func__ << " recovery priority is " << ret << dendl; return static_cast(ret); } @@ -2513,6 +2521,7 @@ unsigned PG::get_backfill_priority() ret = clamp_recovery_priority(pool_recovery_priority + ret); } + dout(20) << __func__ << " backfill priority is " << ret << dendl; return static_cast(ret); } @@ -3203,6 +3212,7 @@ void PG::_update_calc_stats() info.stats.stats.sum.num_objects_degraded = 0; info.stats.stats.sum.num_objects_unfound = 0; info.stats.stats.sum.num_objects_misplaced = 0; + info.stats.avail_no_missing.clear(); if ((is_remapped() || is_undersized() || !is_clean()) && (is_peered() || is_activating())) { dout(20) << __func__ << " actingset " << actingset << " upset " @@ -3236,6 +3246,8 @@ void PG::_update_calc_stats() acting_source_objects.insert(make_pair(missing, pg_whoami)); } info.stats.stats.sum.num_objects_missing_on_primary = missing; + if (missing == 0) + info.stats.avail_no_missing.push_back(pg_whoami); dout(20) << __func__ << " shard " << pg_whoami << " primary objects " << num_objects << " missing " << missing @@ -3269,6 +3281,8 @@ void PG::_update_calc_stats() acting_source_objects.insert(make_pair(missing, peer.first)); } peer.second.stats.stats.sum.num_objects_missing = missing; + if (missing == 0) + info.stats.avail_no_missing.push_back(peer.first); dout(20) << __func__ << " shard " << peer.first << " objects " << peer_num_objects << " missing " << missing diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index d114e1e473ba8..ba00dd54ba4f0 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -2641,6 +2641,10 @@ void pg_stat_t::dump(Formatter *f) const for (vector::const_iterator p = acting.begin(); p != acting.end(); ++p) f->dump_int("osd", *p); f->close_section(); + f->open_array_section("avail_no_missing"); + for (auto p = avail_no_missing.cbegin(); p != avail_no_missing.cend(); ++p) + f->dump_stream("shard") << *p; + f->close_section(); f->open_array_section("blocked_by"); for (vector::const_iterator p = blocked_by.begin(); p != blocked_by.end(); ++p) @@ -2677,7 +2681,7 @@ void pg_stat_t::dump_brief(Formatter *f) const void pg_stat_t::encode(bufferlist &bl) const { - ENCODE_START(25, 22, bl); + ENCODE_START(26, 22, bl); encode(version, bl); encode(reported_seq, bl); encode(reported_epoch, bl); @@ -2723,6 +2727,7 @@ void pg_stat_t::encode(bufferlist &bl) const encode(top_state, bl); encode(purged_snaps, bl); encode(manifest_stats_invalid, bl); + encode(avail_no_missing, bl); ENCODE_FINISH(bl); } @@ -2730,7 +2735,7 @@ void pg_stat_t::decode(bufferlist::const_iterator &bl) { bool tmp; uint32_t old_state; - DECODE_START(25, bl); + DECODE_START(26, bl); decode(version, bl); decode(reported_seq, bl); decode(reported_epoch, bl); @@ -2793,6 +2798,9 @@ void pg_stat_t::decode(bufferlist::const_iterator &bl) } else { manifest_stats_invalid = true; } + if (struct_v >= 26) { + decode(avail_no_missing, bl); + } } DECODE_FINISH(bl); } @@ -2834,6 +2842,7 @@ void pg_stat_t::generate_test_instances(list& o) a.up.push_back(123); a.up_primary = 123; a.acting.push_back(456); + a.avail_no_missing.push_back(pg_shard_t(456, shard_id_t::NO_SHARD)); a.acting_primary = 456; o.push_back(new pg_stat_t(a)); @@ -2878,6 +2887,7 @@ bool operator==(const pg_stat_t& l, const pg_stat_t& r) l.ondisk_log_size == r.ondisk_log_size && l.up == r.up && l.acting == r.acting && + l.avail_no_missing == r.avail_no_missing && l.mapping_epoch == r.mapping_epoch && l.blocked_by == r.blocked_by && l.last_became_active == r.last_became_active && diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index c5fa0ca7dc38e..90b4edea6181a 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -82,6 +82,9 @@ /// base backfill priority for MBackfillReserve (inactive PG) #define OSD_BACKFILL_INACTIVE_PRIORITY_BASE 220 +/// base recovery priority for MRecoveryReserve (inactive PG) +#define OSD_RECOVERY_INACTIVE_PRIORITY_BASE 220 + /// max manually/automatically set recovery priority for MBackfillReserve #define OSD_RECOVERY_PRIORITY_MAX 253 @@ -2078,6 +2081,7 @@ struct pg_stat_t { int64_t ondisk_log_size; // >= active_log_size vector up, acting; + vector avail_no_missing; epoch_t mapping_epoch; vector blocked_by; ///< osds on which the pg is blocked -- 2.39.5