function TEST_repair_stats() {
local dir=$1
local poolname=testpool
+ local OSDS=2
local OBJS=30
+ # This need to be an even number
local REPAIRS=20
# Launch a cluster with 5 seconds scrub interval
run_mgr $dir x || return 1
local ceph_osd_args="--osd_deep_scrub_randomize_ratio=0 \
--osd-scrub-interval-randomize-ratio=0"
- for id in $(seq 0 2) ; do
+ for id in $(seq 0 $(expr $OSDS - 1)) ; do
run_osd_bluestore $dir $id $ceph_osd_args || return 1
done
local primary=$(get_primary $poolname obj1)
kill_daemons $dir TERM osd.$other >&2 < /dev/null || return 1
+ kill_daemons $dir TERM osd.$primary >&2 < /dev/null || return 1
+ for i in $(seq 1 $REPAIRS)
+ do
+ # Remove from both osd.0 and osd.1
+ OSD=$(expr $i % 2)
+ _objectstore_tool_nodown $dir $OSD obj$i remove || return 1
+ done
+ run_osd_bluestore $dir $primary $ceph_osd_args || return 1
+ run_osd_bluestore $dir $other $ceph_osd_args || return 1
+ wait_for_clean || return 1
+
+ repair $pgid
+ wait_for_clean || return 1
+ ceph pg dump pgs
+
+ # This should have caused 1 object to be repaired
+ ceph pg $pgid query | jq '.info.stats.stat_sum'
+ COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired')
+ test "$COUNT" = "$REPAIRS" || return 1
+
+ ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $primary )"
+ COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $primary ).num_shards_repaired")
+ test "$COUNT" = "$(expr $REPAIRS / 2)" || return 1
+
+ ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $other )"
+ COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $other ).num_shards_repaired")
+ test "$COUNT" = "$(expr $REPAIRS / 2)" || return 1
+
+ ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum"
+ COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired")
+ test "$COUNT" = "$REPAIRS" || return 1
+
+ # Tear down
+ teardown $dir || return 1
+}
+
+function TEST_repair_stats_ec() {
+ local dir=$1
+ local poolname=testpool
+ local OSDS=3
+ local OBJS=30
+ # This need to be an even number
+ local REPAIRS=26
+ local allow_overwrites=false
+
+ # Launch a cluster with 5 seconds scrub interval
+ setup $dir || return 1
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ local ceph_osd_args="--osd_deep_scrub_randomize_ratio=0 \
+ --osd-scrub-interval-randomize-ratio=0"
+ for id in $(seq 0 $(expr $OSDS - 1)) ; do
+ run_osd_bluestore $dir $id $ceph_osd_args || return 1
+ done
+
+ # Create an EC pool
+ create_ec_pool $poolname $allow_overwrites k=2 m=1 || return 1
+
+ # Put an object
+ local payload=ABCDEF
+ echo $payload > $dir/ORIGINAL
+ for i in $(seq 1 $OBJS)
+ do
+ rados --pool $poolname put obj$i $dir/ORIGINAL || return 1
+ done
+
+ # Remove the object from one shard physically
+ # Restarted osd get $ceph_osd_args passed
+ local other=$(get_not_primary $poolname obj1)
+ local pgid=$(get_pg $poolname obj1)
+ local primary=$(get_primary $poolname obj1)
+
+ kill_daemons $dir TERM osd.$other >&2 < /dev/null || return 1
+ kill_daemons $dir TERM osd.$primary >&2 < /dev/null || return 1
for i in $(seq 1 $REPAIRS)
do
- _objectstore_tool_nodown $dir $other obj$i remove || return 1
+ # Remove from both osd.0 and osd.1
+ OSD=$(expr $i % 2)
+ _objectstore_tool_nodown $dir $OSD obj$i remove || return 1
done
+ run_osd_bluestore $dir $primary $ceph_osd_args || return 1
run_osd_bluestore $dir $other $ceph_osd_args || return 1
+ wait_for_clean || return 1
repair $pgid
wait_for_clean || return 1
ceph pg dump pgs
# This should have caused 1 object to be repaired
+ ceph pg $pgid query | jq '.info.stats.stat_sum'
COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired')
test "$COUNT" = "$REPAIRS" || return 1
+ for osd in $(seq 0 $(expr $OSDS - 1)) ; do
+ if [ $osd = $other -o $osd = $primary ]; then
+ repair=$(expr $REPAIRS / 2)
+ else
+ repair="0"
+ fi
+
+ ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $osd )"
+ COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $osd ).num_shards_repaired")
+ test "$COUNT" = "$repair" || return 1
+ done
+
+ ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum"
+ COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired")
+ test "$COUNT" = "$REPAIRS" || return 1
+
# Tear down
teardown $dir || return 1
}
public:
friend factory;
private:
- static constexpr int HEAD_VERSION = 3;
+ static constexpr int HEAD_VERSION = 4;
static constexpr int COMPAT_VERSION = 2;
public:
spg_t pgid;
epoch_t map_epoch = 0, min_epoch = 0;
vector<PushOp> pushes;
+ bool is_repair = false;
private:
uint64_t cost;
} else {
min_epoch = map_epoch;
}
+ if (header.version >= 4) {
+ decode(is_repair, p);
+ } else {
+ is_repair = false;
+ }
}
void encode_payload(uint64_t features) override {
encode(pgid.shard, payload);
encode(from, payload);
encode(min_epoch, payload);
+ encode(is_repair, payload);
}
std::string_view get_type_name() const override { return "MOSDPGPush"; }
void ECBackend::handle_recovery_push(
const PushOp &op,
- RecoveryMessages *m)
+ RecoveryMessages *m,
+ bool is_repair)
{
if (get_parent()->check_failsafe_full()) {
dout(10) << __func__ << " Out of space (failsafe) processing push request." << dendl;
if ((get_parent()->pgb_is_primary())) {
ceph_assert(recovery_ops.count(op.soid));
ceph_assert(recovery_ops[op.soid].obc);
+ if (get_parent()->pg_is_repair())
+ get_parent()->inc_osd_stat_repaired();
get_parent()->on_local_recover(
op.soid,
op.recovery_info,
false,
&m->t);
} else {
+ // If primary told us this is a repair, bump osd_stat_t::num_objects_repaired
+ if (is_repair)
+ get_parent()->inc_osd_stat_repaired();
get_parent()->on_local_recover(
op.soid,
op.recovery_info,
msg->pgid = spg_t(get_parent()->get_info().pgid.pgid, i->first.shard);
msg->pushes.swap(i->second);
msg->compute_cost(cct);
+ msg->is_repair = get_parent()->pg_is_repair();
get_parent()->send_message(
i->first.osd,
msg);
for (vector<PushOp>::const_iterator i = op->pushes.begin();
i != op->pushes.end();
++i) {
- handle_recovery_push(*i, &rm);
+ handle_recovery_push(*i, &rm, op->is_repair);
}
dispatch_recovery_messages(rm, priority);
return true;
RecoveryMessages *m);
void handle_recovery_push(
const PushOp &op,
- RecoveryMessages *m);
+ RecoveryMessages *m,
+ bool is_repair);
void handle_recovery_push_reply(
const PushReplyOp &op,
pg_shard_t from,
return osd_stat;
}
+void OSDService::inc_osd_stat_repaired()
+{
+ std::lock_guard l(stat_lock);
+ osd_stat.num_shards_repaired++;
+ return;
+}
+
float OSDService::compute_adjusted_ratio(osd_stat_t new_stat, float *pratio,
uint64_t adjust_used)
{
void set_statfs(const struct store_statfs_t &stbuf,
osd_alert_list_t& alerts);
osd_stat_t set_osd_stat(vector<int>& hb_peers, int num_pgs);
+ void inc_osd_stat_repaired(void);
float compute_adjusted_ratio(osd_stat_t new_stat, float *pratio, uint64_t adjust_used = 0);
osd_stat_t get_osd_stat() {
std::lock_guard l(stat_lock);
virtual bool check_osdmap_full(const set<pg_shard_t> &missing_on) = 0;
+ virtual bool pg_is_repair() = 0;
+ virtual void inc_osd_stat_repaired() = 0;
virtual bool pg_is_remote_backfilling() = 0;
virtual void pg_add_local_num_bytes(int64_t num_bytes) = 0;
virtual void pg_sub_local_num_bytes(int64_t num_bytes) = 0;
release_object_locks(manager);
}
+ bool pg_is_repair() override {
+ return is_repair();
+ }
+ void inc_osd_stat_repaired() override {
+ osd->inc_osd_stat_repaired();
+ }
bool pg_is_remote_backfilling() override {
return is_remote_backfilling();
}
i != m->pushes.end();
++i) {
replies.push_back(PushReplyOp());
- handle_push(from, *i, &(replies.back()), &t);
+ handle_push(from, *i, &(replies.back()), &t, m->is_repair);
}
MOSDPGPushReply *reply = new MOSDPGPushReply;
if (complete) {
pi.stat.num_objects_recovered++;
// XXX: This could overcount if regular recovery is needed right after a repair
- if (get_parent()->pg_is_repair())
+ if (get_parent()->pg_is_repair()) {
pi.stat.num_objects_repaired++;
+ get_parent()->inc_osd_stat_repaired();
+ }
clear_pull_from(piter);
to_continue->push_back({hoid, pi.stat});
get_parent()->on_local_recover(
void ReplicatedBackend::handle_push(
pg_shard_t from, const PushOp &pop, PushReplyOp *response,
- ObjectStore::Transaction *t)
+ ObjectStore::Transaction *t, bool is_repair)
{
dout(10) << "handle_push "
<< pop.recovery_info
pop.omap_entries,
t);
- if (complete)
+ if (complete) {
+ if (is_repair) {
+ get_parent()->inc_osd_stat_repaired();
+ dout(20) << __func__ << " repair complete" << dendl;
+ }
get_parent()->on_local_recover(
pop.recovery_info.soid,
pop.recovery_info,
ObjectContextRef(), // ok, is replica
false,
t);
+ }
}
void ReplicatedBackend::send_pushes(int prio, map<pg_shard_t, vector<PushOp> > &pushes)
msg->map_epoch = get_osdmap_epoch();
msg->min_epoch = get_parent()->get_last_peering_reset_epoch();
msg->set_priority(prio);
+ msg->is_repair = get_parent()->pg_is_repair();
for (;
(j != i->second.end() &&
cost < cct->_conf->osd_max_push_cost &&
list<pull_complete_info> *to_continue,
ObjectStore::Transaction *t);
void handle_push(pg_shard_t from, const PushOp &op, PushReplyOp *response,
- ObjectStore::Transaction *t);
+ ObjectStore::Transaction *t, bool is_repair);
static void trim_pushed_data(const interval_set<uint64_t> ©_subset,
const interval_set<uint64_t> &intervals_received,
f->close_section();
f->dump_int("snap_trim_queue_len", snap_trim_queue_len);
f->dump_int("num_snap_trimming", num_snap_trimming);
+ f->dump_int("num_shards_repaired", num_shards_repaired);
f->open_object_section("op_queue_age_hist");
op_queue_age_hist.dump(f);
f->close_section();
void osd_stat_t::encode(bufferlist &bl, uint64_t features) const
{
- ENCODE_START(10, 2, bl);
+ ENCODE_START(11, 2, bl);
//////// for compatibility ////////
int64_t kb = statfs.kb();
encode(statfs, bl);
///////////////////////////////////
encode(os_alerts, bl);
+ encode(num_shards_repaired, bl);
ENCODE_FINISH(bl);
}
{
int64_t kb, kb_used,kb_avail;
int64_t kb_used_data, kb_used_omap, kb_used_meta;
- DECODE_START_LEGACY_COMPAT_LEN(10, 2, 2, bl);
+ DECODE_START_LEGACY_COMPAT_LEN(11, 2, 2, bl);
decode(kb, bl);
decode(kb_used, bl);
decode(kb_avail, bl);
} else {
os_alerts.clear();
}
+ if (struct_v >= 11) {
+ decode(num_shards_repaired, bl);
+ } else {
+ num_shards_repaired = 0;
+ }
DECODE_FINISH(bl);
}
o.back()->hb_peers.push_back(7);
o.back()->snap_trim_queue_len = 8;
o.back()->num_snap_trimming = 99;
+ o.back()->num_shards_repaired = 101;
o.back()->os_alerts[0].emplace(
"some alert", "some alert details");
o.back()->os_alerts[1].emplace(
store_statfs_t statfs;
vector<int> hb_peers;
int32_t snap_trim_queue_len, num_snap_trimming;
+ uint64_t num_shards_repaired;
pow2_hist_t op_queue_age_hist;
uint32_t num_pgs = 0;
- osd_stat_t() : snap_trim_queue_len(0), num_snap_trimming(0) {}
+ osd_stat_t() : snap_trim_queue_len(0), num_snap_trimming(0),
+ num_shards_repaired(0) {}
void add(const osd_stat_t& o) {
statfs.add(o.statfs);
snap_trim_queue_len += o.snap_trim_queue_len;
num_snap_trimming += o.num_snap_trimming;
+ num_shards_repaired += o.num_shards_repaired;
op_queue_age_hist.add(o.op_queue_age_hist);
os_perf_stat.add(o.os_perf_stat);
num_pgs += o.num_pgs;
statfs.sub(o.statfs);
snap_trim_queue_len -= o.snap_trim_queue_len;
num_snap_trimming -= o.num_snap_trimming;
+ num_shards_repaired -= o.num_shards_repaired;
op_queue_age_hist.sub(o.op_queue_age_hist);
os_perf_stat.sub(o.os_perf_stat);
num_pgs -= o.num_pgs;
return l.statfs == r.statfs &&
l.snap_trim_queue_len == r.snap_trim_queue_len &&
l.num_snap_trimming == r.num_snap_trimming &&
+ l.num_shards_repaired == r.num_shards_repaired &&
l.hb_peers == r.hb_peers &&
l.op_queue_age_hist == r.op_queue_age_hist &&
l.os_perf_stat == r.os_perf_stat &&