``recovery_priority``
-:Description: When a value is set it will boost the computed reservation priority
- by this amount. This value should be less than 30.
+:Description: When a value is set it will increase or decrease the computed
+ reservation priority. This value must be in the range -10 to
+ 10. Use a negative priority for less important pools so they
+ have lower priority than any new pools.
:Type: Integer
:Default: ``0``
export objects=50
export poolprefix=test
export FORCE_PRIO="254" # See OSD_BACKFILL_PRIORITY_FORCED
- export DEGRADED_PRIO="140" # See OSD_BACKFILL_DEGRADED_PRIORITY_BASE
- export NORMAL_PRIO="100" # See OSD_BACKFILL_PRIORITY_BASE
+ export DEGRADED_PRIO="150" # See OSD_BACKFILL_DEGRADED_PRIORITY_BASE + 10
+ export NORMAL_PRIO="110" # See OSD_BACKFILL_PRIORITY_BASE + 10
local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
for func in $funcs ; do
export objects=200
export poolprefix=test
export FORCE_PRIO="255" # See OSD_RECOVERY_PRIORITY_FORCED
- export NORMAL_PRIO="180" # See OSD_RECOVERY_PRIORITY_BASE
+ export NORMAL_PRIO="190" # See OSD_RECOVERY_PRIORITY_BASE + 10
local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
for func in $funcs ; do
ceph osd pool get $TEST_POOL_GETSET recovery_priority | expect_false grep '.'
ceph osd pool set $TEST_POOL_GETSET recovery_priority 5
ceph osd pool get $TEST_POOL_GETSET recovery_priority | grep 'recovery_priority: 5'
+ ceph osd pool set $TEST_POOL_GETSET recovery_priority -5
+ ceph osd pool get $TEST_POOL_GETSET recovery_priority | grep 'recovery_priority: -5'
ceph osd pool set $TEST_POOL_GETSET recovery_priority 0
ceph osd pool get $TEST_POOL_GETSET recovery_priority | expect_false grep '.'
- expect_false ceph osd pool set $TEST_POOL_GETSET recovery_priority -1
- expect_false ceph osd pool set $TEST_POOL_GETSET recovery_priority 30
+ expect_false ceph osd pool set $TEST_POOL_GETSET recovery_priority -11
+ expect_false ceph osd pool set $TEST_POOL_GETSET recovery_priority 11
ceph osd pool get $TEST_POOL_GETSET recovery_op_priority | expect_false grep '.'
ceph osd pool set $TEST_POOL_GETSET recovery_op_priority 5
if (mon->is_leader()) {
mon->clog->debug() << "osdmap " << osdmap;
+ if (!priority_convert) {
+ // Only do this once at start-up
+ convert_pool_priorities();
+ priority_convert = true;
+ }
} else {
list<MonOpRequestRef> ls;
take_all_failures(ls);
ss << "error parsing int value '" << val << "': " << interr;
return -EINVAL;
}
- if (n < 0) {
- ss << "pool recovery_priority can not be negative";
- return -EINVAL;
- } else if (n >= 30) {
- ss << "pool recovery_priority should be less than 30 due to "
- << "Ceph internal implementation restrictions";
+ if (n > OSD_POOL_PRIORITY_MAX || n < OSD_POOL_PRIORITY_MIN) {
+ ss << "pool recovery_priority must be between " << OSD_POOL_PRIORITY_MIN
+ << " and " << OSD_POOL_PRIORITY_MAX;
return -EINVAL;
}
} else if (var == "pg_autoscale_bias") {
ret, epoch, get_last_committed(), blp);
mon->send_reply(op, reply);
}
+
+void OSDMonitor::convert_pool_priorities(void)
+{
+ pool_opts_t::key_t key = pool_opts_t::get_opt_desc("recovery_priority").key;
+ int64_t max_prio = 0;
+ int64_t min_prio = 0;
+ for (const auto &i : osdmap.get_pools()) {
+ const auto &pool = i.second;
+
+ if (pool.opts.is_set(key)) {
+ int64_t prio;
+ pool.opts.get(key, &prio);
+ if (prio > max_prio)
+ max_prio = prio;
+ if (prio < min_prio)
+ min_prio = prio;
+ }
+ }
+ if (max_prio <= OSD_POOL_PRIORITY_MAX && min_prio >= OSD_POOL_PRIORITY_MIN) {
+ dout(20) << __func__ << " nothing to fix" << dendl;
+ return;
+ }
+ // Current pool priorities exceeds new maximum
+ for (const auto &i : osdmap.get_pools()) {
+ const auto pool_id = i.first;
+ pg_pool_t pool = i.second;
+
+ int64_t prio = 0;
+ pool.opts.get(key, &prio);
+ int64_t n;
+
+ if (prio > 0 && max_prio > OSD_POOL_PRIORITY_MAX) { // Likely scenario
+ // Scaled priority range 0 to OSD_POOL_PRIORITY_MAX
+ n = (float)prio / max_prio * OSD_POOL_PRIORITY_MAX;
+ } else if (prio < 0 && min_prio < OSD_POOL_PRIORITY_MIN) {
+ // Scaled priority range OSD_POOL_PRIORITY_MIN to 0
+ n = (float)prio / min_prio * OSD_POOL_PRIORITY_MIN;
+ } else {
+ continue;
+ }
+ if (n == 0) {
+ pool.opts.unset(key);
+ } else {
+ pool.opts.set(key, static_cast<int64_t>(n));
+ }
+ dout(10) << __func__ << " pool " << pool_id
+ << " recovery_priority adjusted "
+ << prio << " to " << n << dendl;
+ pool.last_change = pending_inc.epoch;
+ pending_inc.new_pools[pool_id] = pool;
+ }
+}
set<int> pending_metadata_rm;
map<int, failure_info_t> failure_info;
map<int,utime_t> down_pending_out; // osd down -> out
+ bool priority_convert = false;
map<int,double> osd_weight;
pending_inc.new_flags &= ~flag;
}
}
+ void convert_pool_priorities(void);
};
#endif
return did;
}
-inline int PG::clamp_recovery_priority(int priority)
+int PG::clamp_recovery_priority(int priority, int pool_recovery_priority)
{
static_assert(OSD_RECOVERY_PRIORITY_MIN < OSD_RECOVERY_PRIORITY_MAX, "Invalid priority range");
static_assert(OSD_RECOVERY_PRIORITY_MIN >= 0, "Priority range must match unsigned type");
+ // User can't set this too high anymore, but might be a legacy value
+ if (pool_recovery_priority > OSD_POOL_PRIORITY_MAX)
+ pool_recovery_priority = OSD_POOL_PRIORITY_MAX;
+ if (pool_recovery_priority < OSD_POOL_PRIORITY_MIN)
+ pool_recovery_priority = OSD_POOL_PRIORITY_MIN;
+ // Shift range from min to max to 0 to max - min
+ pool_recovery_priority += (0 - OSD_POOL_PRIORITY_MIN);
+ ceph_assert(pool_recovery_priority >= 0 && pool_recovery_priority <= (OSD_POOL_PRIORITY_MAX - OSD_POOL_PRIORITY_MIN));
+
+ priority += pool_recovery_priority;
+
// Clamp to valid range
if (priority > OSD_RECOVERY_PRIORITY_MAX) {
return OSD_RECOVERY_PRIORITY_MAX;
int64_t pool_recovery_priority = 0;
pool.info.opts.get(pool_opts_t::RECOVERY_PRIORITY, &pool_recovery_priority);
- ret = clamp_recovery_priority(pool_recovery_priority + ret);
+ ret = clamp_recovery_priority(ret, pool_recovery_priority);
}
dout(20) << __func__ << " recovery priority is " << ret << dendl;
return static_cast<unsigned>(ret);
int64_t pool_recovery_priority = 0;
pool.info.opts.get(pool_opts_t::RECOVERY_PRIORITY, &pool_recovery_priority);
- ret = clamp_recovery_priority(pool_recovery_priority + ret);
+ ret = clamp_recovery_priority(ret, pool_recovery_priority);
}
dout(20) << __func__ << " backfill priority is " << ret << dendl;
bool needs_backfill() const;
/// clip calculated priority to reasonable range
- inline int clamp_recovery_priority(int priority);
+ int clamp_recovery_priority(int prio, int pool_recovery_prio);
/// get log recovery reservation priority
unsigned get_recovery_priority();
/// get backfill reservation priority
#define CEPH_OSD_FEATURE_INCOMPAT_RECOVERY_DELETES CompatSet::Feature(16, "deletes in missing set")
+/// pool priority range set by user
+#define OSD_POOL_PRIORITY_MAX 10
+#define OSD_POOL_PRIORITY_MIN -OSD_POOL_PRIORITY_MAX
+
/// min recovery priority for MBackfillReserve
#define OSD_RECOVERY_PRIORITY_MIN 0