From: Bartłomiej Święcki Date: Fri, 2 Dec 2016 15:54:46 +0000 (+0100) Subject: osd: Increase priority for inactive PGs backfill X-Git-Tag: v11.1.1~84^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=6a76adcdb1f92c136841d960aa7cd4e5b94addec;p=ceph-ci.git osd: Increase priority for inactive PGs backfill This change does prioritize backfill of PGs which don't have min_size active copies. Such PGs would cause IO stalls for clients and would increase throttlers usage. This change also fixes few subtlle out-of-bounds bugs. Signed-off-by: Bartłomiej Święcki --- diff --git a/PendingReleaseNotes b/PendingReleaseNotes index 54befe629c9..72f3a2f9acd 100644 --- a/PendingReleaseNotes +++ b/PendingReleaseNotes @@ -1,3 +1,9 @@ 11.1.1 ------ + * Calculation of recovery priorities has been updated. + This could lead to unintuitive recovery prioritization + during cluster upgrade. In case of such recovery, OSDs + in old version would operate on different priority ranges + than new ones. Once upgraded, cluster will operate on + consistent values. diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 1d2857b84cc..bb15592206a 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -2125,18 +2125,31 @@ unsigned PG::get_recovery_priority() int pool_recovery_priority = 0; pool.info.opts.get(pool_opts_t::RECOVERY_PRIORITY, &pool_recovery_priority); - unsigned ret = OSD_RECOVERY_PRIORITY_BASE + pool_recovery_priority; - if (ret > OSD_RECOVERY_PRIORITY_MAX) + int ret = OSD_RECOVERY_PRIORITY_BASE + pool_recovery_priority; + + // Clamp to valid range + if (ret > OSD_RECOVERY_PRIORITY_MAX) { ret = OSD_RECOVERY_PRIORITY_MAX; - return ret; + } else if (ret < OSD_RECOVERY_PRIORITY_MIN) { + ret = OSD_RECOVERY_PRIORITY_MIN; + } + + static_assert(OSD_RECOVERY_PRIORITY_MIN < OSD_RECOVERY_PRIORITY_MAX, "Invalid priority range"); + static_assert(OSD_RECOVERY_PRIORITY_MIN >= 0, "Priority range must match unsigned type"); + + return static_cast(ret); } unsigned PG::get_backfill_priority() { // a higher value -> a higher priority - unsigned ret = OSD_BACKFILL_PRIORITY_BASE; - if (is_undersized()) { + int ret = OSD_BACKFILL_PRIORITY_BASE; + if (acting.size() < pool.info.min_size) { + // inactive: no. of replicas < min_size, highest priority since it blocks IO + ret = OSD_BACKFILL_INACTIVE_PRIORITY_BASE + (pool.info.min_size - acting.size()); + + } else if (is_undersized()) { // undersized: OSD_BACKFILL_DEGRADED_PRIORITY_BASE + num missing replicas assert(pool.info.size > actingset.size()); ret = OSD_BACKFILL_DEGRADED_PRIORITY_BASE + (pool.info.size - actingset.size()); @@ -2145,9 +2158,20 @@ unsigned PG::get_backfill_priority() // degraded: baseline degraded ret = OSD_BACKFILL_DEGRADED_PRIORITY_BASE; } - assert (ret < OSD_RECOVERY_PRIORITY_MAX); - return ret; + // Adjust with pool's recovery priority + int pool_recovery_priority = 0; + pool.info.opts.get(pool_opts_t::RECOVERY_PRIORITY, &pool_recovery_priority); + ret += pool_recovery_priority; + + // Clamp to valid range + if (ret > OSD_RECOVERY_PRIORITY_MAX) { + ret = OSD_RECOVERY_PRIORITY_MAX; + } else if (ret < OSD_RECOVERY_PRIORITY_MIN) { + ret = OSD_RECOVERY_PRIORITY_MIN; + } + + return static_cast(ret); } void PG::finish_recovery(list& tfin) diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index b9112bda7cb..6581740b292 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -66,17 +66,24 @@ #define CEPH_OSD_FEATURE_INCOMPAT_FASTINFO CompatSet::Feature(15, "fastinfo pg attr") -/// max recovery priority for MBackfillReserve -#define OSD_RECOVERY_PRIORITY_MAX 255u +/// min recovery priority for MBackfillReserve +#define OSD_RECOVERY_PRIORITY_MIN 0 -/// base recovery priority for MBackfillReserve -#define OSD_RECOVERY_PRIORITY_BASE 230u +/// base backfill priority for MBackfillReserve +#define OSD_BACKFILL_PRIORITY_BASE 100 /// base backfill priority for MBackfillReserve (degraded PG) -#define OSD_BACKFILL_DEGRADED_PRIORITY_BASE 200u +#define OSD_BACKFILL_DEGRADED_PRIORITY_BASE 140 + +/// base recovery priority for MBackfillReserve +#define OSD_RECOVERY_PRIORITY_BASE 180 + +/// base backfill priority for MBackfillReserve (inactive PG) +#define OSD_BACKFILL_INACTIVE_PRIORITY_BASE 220 + +/// max recovery priority for MBackfillReserve +#define OSD_RECOVERY_PRIORITY_MAX 255 -/// base backfill priority for MBackfillReserve -#define OSD_BACKFILL_PRIORITY_BASE 1u typedef hobject_t collection_list_handle_t;