local poolname=pool-rep
local objname=obj-$inject-$$
+ local pgid=$(get_pg $poolname $objname)
+
rados_put $dir $poolname $objname || return 1
inject_$inject rep data $poolname $objname $dir 0 || return 1
rados_get $dir $poolname $objname || return 1
+ COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired')
+ test "$COUNT" = "1" || return 1
+
inject_$inject rep data $poolname $objname $dir 0 || return 1
inject_$inject rep data $poolname $objname $dir 1 || return 1
rados_get $dir $poolname $objname || return 1
+ COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired')
+ test "$COUNT" = "2" || return 1
+
inject_$inject rep data $poolname $objname $dir 0 || return 1
inject_$inject rep data $poolname $objname $dir 1 || return 1
inject_$inject rep data $poolname $objname $dir 2 || return 1
rados_get $dir $poolname $objname hang || return 1
+
+ # After hang another repair couldn't happen, so count stays the same
+ COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired')
+ test "$COUNT" = "2" || return 1
}
function TEST_rados_get_with_eio() {
diff $dir/ORIGINAL $dir/COPY || return 1
grep scrub_finish $dir/osd.${primary}.log
+ # This should have caused 1 object to be repaired
+ COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired')
+ test "$COUNT" = "1" || return 1
+
# Tear down
teardown $dir || return 1
}
teardown $dir || return 1
}
+function TEST_repair_stats() {
+ local dir=$1
+ local poolname=testpool
+ local OBJS=30
+ local REPAIRS=20
+
+ # Launch a cluster with 5 seconds scrub interval
+ setup $dir || return 1
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ local ceph_osd_args="--osd_deep_scrub_randomize_ratio=0 \
+ --osd-scrub-interval-randomize-ratio=0"
+ for id in $(seq 0 2) ; do
+ run_osd_bluestore $dir $id $ceph_osd_args || return 1
+ done
+
+ create_pool $poolname 1 1 || return 1
+ ceph osd pool set $poolname size 2
+ wait_for_clean || return 1
+
+ # Put an object
+ local payload=ABCDEF
+ echo $payload > $dir/ORIGINAL
+ for i in $(seq 1 $OBJS)
+ do
+ rados --pool $poolname put obj$i $dir/ORIGINAL || return 1
+ done
+
+ # Remove the object from one shard physically
+ # Restarted osd get $ceph_osd_args passed
+ local other=$(get_not_primary $poolname obj1)
+ local pgid=$(get_pg $poolname obj1)
+ local primary=$(get_primary $poolname obj1)
+
+ kill_daemons $dir TERM osd.$other >&2 < /dev/null || return 1
+ for i in $(seq 1 $REPAIRS)
+ do
+ _objectstore_tool_nodown $dir $other obj$i remove || return 1
+ done
+ run_osd_bluestore $dir $other $ceph_osd_args || return 1
+
+ repair $pgid
+ wait_for_clean || return 1
+ ceph pg dump pgs
+
+ # This should have caused 1 object to be repaired
+ COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired')
+ test "$COUNT" = "$REPAIRS" || return 1
+
+ # Tear down
+ teardown $dir || return 1
+}
+
function corrupt_and_repair_jerasure() {
local dir=$1
local allow_overwrites=$2
stat.num_bytes_recovered = op.recovery_info.size;
stat.num_keys_recovered = 0; // ??? op ... omap_entries.size(); ?
stat.num_objects_recovered = 1;
+ if (get_parent()->pg_is_repair())
+ stat.num_objects_repaired = 1;
get_parent()->on_global_recover(op.hoid, stat, false);
dout(10) << __func__ << ": WRITING return " << op << dendl;
recovery_ops.erase(op.hoid);
if (is_scrubbing()) {
return false;
}
+ // An interrupted recovery repair could leave this set.
+ state_clear(PG_STATE_REPAIR);
scrubber.priority = scrubber.must_scrub ?
cct->_conf->osd_requested_scrub_priority : get_scrub_priority();
scrubber.must_scrub = false;
void PG::_finish_recovery(Context *c)
{
lock();
+ // When recovery is initiated by a repair, that flag is left on
+ state_clear(PG_STATE_REPAIR);
if (deleting) {
unlock();
return;
end >= scrubber.start);
}
-void PG::scrub_clear_state()
+void PG::scrub_clear_state(bool has_error)
{
ceph_assert(is_locked());
state_clear(PG_STATE_SCRUBBING);
- state_clear(PG_STATE_REPAIR);
+ if (!has_error)
+ state_clear(PG_STATE_REPAIR);
state_clear(PG_STATE_DEEP_SCRUB);
publish_stats_to_osd();
DoRecovery())));
}
- scrub_clear_state();
+ scrub_clear_state(has_error);
scrub_unreserve_replicas();
if (is_active() && is_primary()) {
{
context< RecoveryMachine >().log_enter(state_name);
PG *pg = context< RecoveryMachine >().pg;
+ pg->state_clear(PG_STATE_REPAIR);
pg->publish_stats_to_osd();
}
bool scrub_process_inconsistent();
bool ops_blocked_by_scrub() const;
void scrub_finish();
- void scrub_clear_state();
+ void scrub_clear_state(bool keep_repair = false);
void _scan_snaps(ScrubMap &map);
void _repair_oinfo_oid(ScrubMap &map);
void _scan_rollback_obs(const vector<ghobject_t> &rollback_obs);
}
bool is_recovering() const { return state_test(PG_STATE_RECOVERING); }
bool is_premerge() const { return state_test(PG_STATE_PREMERGE); }
+ bool is_repair() const { return state_test(PG_STATE_REPAIR); }
bool is_empty() const { return info.last_update == eversion_t(0,0); }
const hobject_t &hoid) = 0;
virtual bool pg_is_undersized() const = 0;
+ virtual bool pg_is_repair() const = 0;
virtual void log_operation(
const vector<pg_log_entry_t> &logv,
if (!object_missing) {
object_stat_sum_t stat_diff;
stat_diff.num_objects_recovered = 1;
+ if (scrub_after_recovery)
+ stat_diff.num_objects_repaired = 1;
on_global_recover(soid, stat_diff, true);
} else {
auto recovery_handle = pgbackend->open_recovery_op();
if (!eio_errors_to_process) {
eio_errors_to_process = true;
ceph_assert(is_clean());
+ state_set(PG_STATE_REPAIR);
queue_peering_event(
PGPeeringEventRef(
std::make_shared<PGPeeringEvent>(
return is_undersized();
}
+ bool pg_is_repair() const override {
+ return is_repair();
+ }
+
void update_peer_last_complete_ondisk(
pg_shard_t fromosd,
eversion_t lcod) override {
if (complete) {
pi.stat.num_objects_recovered++;
+ // XXX: This could overcount if regular recovery is needed right after a repair
+ if (get_parent()->pg_is_repair())
+ pi.stat.num_objects_repaired++;
clear_pull_from(piter);
to_continue->push_back({hoid, pi.stat});
get_parent()->on_local_recover(
if (new_progress.is_complete(recovery_info)) {
new_progress.data_complete = true;
- if (stat)
+ if (stat) {
stat->num_objects_recovered++;
+ if (get_parent()->pg_is_repair())
+ stat->num_objects_repaired++;
+ }
}
if (stat) {
f->dump_int("num_objects_manifest", num_objects_manifest);
f->dump_int("num_omap_bytes", num_omap_bytes);
f->dump_int("num_omap_keys", num_omap_keys);
+ f->dump_int("num_objects_repaired", num_objects_repaired);
}
void object_stat_sum_t::encode(bufferlist& bl) const
{
- ENCODE_START(19, 14, bl);
+ ENCODE_START(20, 14, bl);
#if defined(CEPH_LITTLE_ENDIAN)
bl.append((char *)(&num_bytes), sizeof(object_stat_sum_t));
#else
encode(num_objects_manifest, bl);
encode(num_omap_bytes, bl);
encode(num_omap_keys, bl);
+ encode(num_objects_repaired, bl);
#endif
ENCODE_FINISH(bl);
}
void object_stat_sum_t::decode(bufferlist::const_iterator& bl)
{
bool decode_finish = false;
- DECODE_START(19, bl); // make sure to also update fast decode below
+ DECODE_START(20, bl); // make sure to also update fast decode below
#if defined(CEPH_LITTLE_ENDIAN)
if (struct_v >= 19) { // this must match newest decode version
bl.copy(sizeof(object_stat_sum_t), (char*)(&num_bytes));
decode(num_omap_bytes, bl);
decode(num_omap_keys, bl);
}
+ if (struct_v >= 20) {
+ decode(num_objects_repaired, bl);
+ }
}
DECODE_FINISH(bl);
}
a.num_objects_manifest = 2;
a.num_omap_bytes = 20000;
a.num_omap_keys = 200;
+ a.num_objects_repaired = 300;
o.push_back(new object_stat_sum_t(a));
}
num_objects_manifest += o.num_objects_manifest;
num_omap_bytes += o.num_omap_bytes;
num_omap_keys += o.num_omap_keys;
+ num_objects_repaired += o.num_objects_repaired;
}
void object_stat_sum_t::sub(const object_stat_sum_t& o)
num_objects_manifest -= o.num_objects_manifest;
num_omap_bytes -= o.num_omap_bytes;
num_omap_keys -= o.num_omap_keys;
+ num_objects_repaired -= o.num_objects_repaired;
}
bool operator==(const object_stat_sum_t& l, const object_stat_sum_t& r)
l.num_large_omap_objects == r.num_large_omap_objects &&
l.num_objects_manifest == r.num_objects_manifest &&
l.num_omap_bytes == r.num_omap_bytes &&
- l.num_omap_keys == r.num_omap_keys;
+ l.num_omap_keys == r.num_omap_keys &&
+ l.num_objects_repaired == r.num_objects_repaired;
}
// -- object_stat_collection_t --
int64_t num_objects_manifest = 0;
int64_t num_omap_bytes = 0;
int64_t num_omap_keys = 0;
+ int64_t num_objects_repaired = 0;
object_stat_sum_t()
: num_bytes(0),
FLOOR(num_evict_mode_full);
FLOOR(num_objects_pinned);
FLOOR(num_legacy_snapsets);
+ FLOOR(num_objects_repaired);
#undef FLOOR
}
SPLIT(num_objects_manifest);
SPLIT(num_omap_bytes);
SPLIT(num_omap_keys);
+ SPLIT(num_objects_repaired);
SPLIT_PRESERVE_NONZERO(num_shallow_scrub_errors);
SPLIT_PRESERVE_NONZERO(num_deep_scrub_errors);
for (unsigned i = 0; i < out.size(); ++i) {
sizeof(num_objects_manifest) +
sizeof(num_omap_bytes) +
sizeof(num_omap_keys) +
+ sizeof(num_objects_repaired) +
sizeof(num_objects_recovered) +
sizeof(num_bytes_recovered) +
sizeof(num_keys_recovered) +