unsigned PG::get_recovery_priority()
{
// a higher value -> a higher priority
- int64_t ret = 0;
+ int ret = OSD_RECOVERY_PRIORITY_BASE;
if (state & PG_STATE_FORCED_RECOVERY) {
ret = OSD_RECOVERY_PRIORITY_FORCED;
} else {
- pool.info.opts.get(pool_opts_t::RECOVERY_PRIORITY, &ret);
- ret = clamp_recovery_priority(OSD_RECOVERY_PRIORITY_BASE + ret);
+ // XXX: This priority boost isn't so much about inactive, but about data-at-risk
+ if (is_degraded() && info.stats.avail_no_missing.size() < pool.info.min_size) {
+ // inactive: no. of replicas < min_size, highest priority since it blocks IO
+ ret = OSD_RECOVERY_INACTIVE_PRIORITY_BASE + (pool.info.min_size - info.stats.avail_no_missing.size());
+ }
+
+ int64_t pool_recovery_priority = 0;
+ pool.info.opts.get(pool_opts_t::RECOVERY_PRIORITY, &pool_recovery_priority);
+
+ ret = clamp_recovery_priority(pool_recovery_priority + ret);
}
- dout(20) << __func__ << " recovery priority for " << *this << " is " << ret << ", state is " << state << dendl;
+ dout(20) << __func__ << " recovery priority is " << ret << dendl;
return static_cast<unsigned>(ret);
}
ret = clamp_recovery_priority(pool_recovery_priority + ret);
}
+ dout(20) << __func__ << " backfill priority is " << ret << dendl;
return static_cast<unsigned>(ret);
}
info.stats.stats.sum.num_objects_degraded = 0;
info.stats.stats.sum.num_objects_unfound = 0;
info.stats.stats.sum.num_objects_misplaced = 0;
+ info.stats.avail_no_missing.clear();
if ((is_remapped() || is_undersized() || !is_clean()) && (is_peered() || is_activating())) {
dout(20) << __func__ << " actingset " << actingset << " upset "
acting_source_objects.insert(make_pair(missing, pg_whoami));
}
info.stats.stats.sum.num_objects_missing_on_primary = missing;
+ if (missing == 0)
+ info.stats.avail_no_missing.push_back(pg_whoami);
dout(20) << __func__ << " shard " << pg_whoami
<< " primary objects " << num_objects
<< " missing " << missing
acting_source_objects.insert(make_pair(missing, peer.first));
}
peer.second.stats.stats.sum.num_objects_missing = missing;
+ if (missing == 0)
+ info.stats.avail_no_missing.push_back(peer.first);
dout(20) << __func__ << " shard " << peer.first
<< " objects " << peer_num_objects
<< " missing " << missing
for (vector<int32_t>::const_iterator p = acting.begin(); p != acting.end(); ++p)
f->dump_int("osd", *p);
f->close_section();
+ f->open_array_section("avail_no_missing");
+ for (auto p = avail_no_missing.cbegin(); p != avail_no_missing.cend(); ++p)
+ f->dump_stream("shard") << *p;
+ f->close_section();
f->open_array_section("blocked_by");
for (vector<int32_t>::const_iterator p = blocked_by.begin();
p != blocked_by.end(); ++p)
void pg_stat_t::encode(bufferlist &bl) const
{
- ENCODE_START(25, 22, bl);
+ ENCODE_START(26, 22, bl);
encode(version, bl);
encode(reported_seq, bl);
encode(reported_epoch, bl);
encode(top_state, bl);
encode(purged_snaps, bl);
encode(manifest_stats_invalid, bl);
+ encode(avail_no_missing, bl);
ENCODE_FINISH(bl);
}
{
bool tmp;
uint32_t old_state;
- DECODE_START(25, bl);
+ DECODE_START(26, bl);
decode(version, bl);
decode(reported_seq, bl);
decode(reported_epoch, bl);
} else {
manifest_stats_invalid = true;
}
+ if (struct_v >= 26) {
+ decode(avail_no_missing, bl);
+ }
}
DECODE_FINISH(bl);
}
a.up.push_back(123);
a.up_primary = 123;
a.acting.push_back(456);
+ a.avail_no_missing.push_back(pg_shard_t(456, shard_id_t::NO_SHARD));
a.acting_primary = 456;
o.push_back(new pg_stat_t(a));
l.ondisk_log_size == r.ondisk_log_size &&
l.up == r.up &&
l.acting == r.acting &&
+ l.avail_no_missing == r.avail_no_missing &&
l.mapping_epoch == r.mapping_epoch &&
l.blocked_by == r.blocked_by &&
l.last_became_active == r.last_became_active &&
/// base backfill priority for MBackfillReserve (inactive PG)
#define OSD_BACKFILL_INACTIVE_PRIORITY_BASE 220
+/// base recovery priority for MRecoveryReserve (inactive PG)
+#define OSD_RECOVERY_INACTIVE_PRIORITY_BASE 220
+
/// max manually/automatically set recovery priority for MBackfillReserve
#define OSD_RECOVERY_PRIORITY_MAX 253
int64_t ondisk_log_size; // >= active_log_size
vector<int32_t> up, acting;
+ vector<pg_shard_t> avail_no_missing;
epoch_t mapping_epoch;
vector<int32_t> blocked_by; ///< osds on which the pg is blocked