From 42497780db7f1535407a644fca408985456e2fbc Mon Sep 17 00:00:00 2001 From: David Zafman Date: Fri, 12 Apr 2019 16:21:12 -0700 Subject: [PATCH] osd: Prevent priority from overflowing in the next base level Signed-off-by: David Zafman --- src/osd/PG.cc | 26 +++++++++++++++++--------- src/osd/PG.h | 2 +- src/osd/osd_types.h | 7 +++++++ 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/src/osd/PG.cc b/src/osd/PG.cc index ba5a2245134..aaeeb158275 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -2453,11 +2453,13 @@ bool PG::set_force_backfill(bool b) return did; } -int PG::clamp_recovery_priority(int priority, int pool_recovery_priority) +int PG::clamp_recovery_priority(int priority, int pool_recovery_priority, int max) { static_assert(OSD_RECOVERY_PRIORITY_MIN < OSD_RECOVERY_PRIORITY_MAX, "Invalid priority range"); static_assert(OSD_RECOVERY_PRIORITY_MIN >= 0, "Priority range must match unsigned type"); + ceph_assert(max <= OSD_RECOVERY_PRIORITY_MAX); + // User can't set this too high anymore, but might be a legacy value if (pool_recovery_priority > OSD_POOL_PRIORITY_MAX) pool_recovery_priority = OSD_POOL_PRIORITY_MAX; @@ -2470,8 +2472,8 @@ int PG::clamp_recovery_priority(int priority, int pool_recovery_priority) priority += pool_recovery_priority; // Clamp to valid range - if (priority > OSD_RECOVERY_PRIORITY_MAX) { - return OSD_RECOVERY_PRIORITY_MAX; + if (priority > max) { + return max; } else if (priority < OSD_RECOVERY_PRIORITY_MIN) { return OSD_RECOVERY_PRIORITY_MIN; } else { @@ -2483,20 +2485,22 @@ unsigned PG::get_recovery_priority() { // a higher value -> a higher priority int ret = OSD_RECOVERY_PRIORITY_BASE; + int base = ret; if (state & PG_STATE_FORCED_RECOVERY) { ret = OSD_RECOVERY_PRIORITY_FORCED; } else { // XXX: This priority boost isn't so much about inactive, but about data-at-risk if (is_degraded() && info.stats.avail_no_missing.size() < pool.info.min_size) { + base = OSD_RECOVERY_INACTIVE_PRIORITY_BASE; // inactive: no. of replicas < min_size, highest priority since it blocks IO - ret = OSD_RECOVERY_INACTIVE_PRIORITY_BASE + (pool.info.min_size - info.stats.avail_no_missing.size()); + ret = base + (pool.info.min_size - info.stats.avail_no_missing.size()); } int64_t pool_recovery_priority = 0; pool.info.opts.get(pool_opts_t::RECOVERY_PRIORITY, &pool_recovery_priority); - ret = clamp_recovery_priority(ret, pool_recovery_priority); + ret = clamp_recovery_priority(ret, pool_recovery_priority, max_prio_map[base]); } dout(20) << __func__ << " recovery priority is " << ret << dendl; return static_cast(ret); @@ -2506,28 +2510,32 @@ unsigned PG::get_backfill_priority() { // a higher value -> a higher priority int ret = OSD_BACKFILL_PRIORITY_BASE; + int base = ret; + if (state & PG_STATE_FORCED_BACKFILL) { ret = OSD_BACKFILL_PRIORITY_FORCED; } else { if (acting.size() < pool.info.min_size) { + base = OSD_BACKFILL_INACTIVE_PRIORITY_BASE; // inactive: no. of replicas < min_size, highest priority since it blocks IO - ret = OSD_BACKFILL_INACTIVE_PRIORITY_BASE + (pool.info.min_size - acting.size()); + ret = base + (pool.info.min_size - acting.size()); } else if (is_undersized()) { // undersized: OSD_BACKFILL_DEGRADED_PRIORITY_BASE + num missing replicas ceph_assert(pool.info.size > actingset.size()); - ret = OSD_BACKFILL_DEGRADED_PRIORITY_BASE + (pool.info.size - actingset.size()); + base = OSD_BACKFILL_DEGRADED_PRIORITY_BASE; + ret = base + (pool.info.size - actingset.size()); } else if (is_degraded()) { // degraded: baseline degraded - ret = OSD_BACKFILL_DEGRADED_PRIORITY_BASE; + base = ret = OSD_BACKFILL_DEGRADED_PRIORITY_BASE; } // Adjust with pool's recovery priority int64_t pool_recovery_priority = 0; pool.info.opts.get(pool_opts_t::RECOVERY_PRIORITY, &pool_recovery_priority); - ret = clamp_recovery_priority(ret, pool_recovery_priority); + ret = clamp_recovery_priority(ret, pool_recovery_priority, max_prio_map[base]); } dout(20) << __func__ << " backfill priority is " << ret << dendl; diff --git a/src/osd/PG.h b/src/osd/PG.h index 5a423452cb5..869d229954c 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -1432,7 +1432,7 @@ protected: bool needs_backfill() const; /// clip calculated priority to reasonable range - int clamp_recovery_priority(int prio, int pool_recovery_prio); + int clamp_recovery_priority(int prio, int pool_recovery_prio, int max); /// get log recovery reservation priority unsigned get_recovery_priority(); /// get backfill reservation priority diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index b329bbdd779..9b9bdd23062 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -108,6 +108,13 @@ /// priority when more full #define OSD_DELETE_PRIORITY_FULL 255 +static std::map max_prio_map = { + {OSD_BACKFILL_PRIORITY_BASE, OSD_BACKFILL_DEGRADED_PRIORITY_BASE - 1}, + {OSD_BACKFILL_DEGRADED_PRIORITY_BASE, OSD_RECOVERY_PRIORITY_BASE - 1}, + {OSD_RECOVERY_PRIORITY_BASE, OSD_BACKFILL_INACTIVE_PRIORITY_BASE - 1}, + {OSD_RECOVERY_INACTIVE_PRIORITY_BASE, OSD_RECOVERY_PRIORITY_MAX}, + {OSD_BACKFILL_INACTIVE_PRIORITY_BASE, OSD_RECOVERY_PRIORITY_MAX} +}; typedef hobject_t collection_list_handle_t; -- 2.39.5