COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired")
test "$COUNT" = "1" || return 1
+ local object_osds=($(get_osds $poolname $objname))
+ local primary=${object_osds[0]}
+ local bad_peer=${object_osds[1]}
inject_$inject rep data $poolname $objname $dir 0 || return 1
inject_$inject rep data $poolname $objname $dir 1 || return 1
+ # Force primary to pull from the bad peer, so we can repair it too!
+ set_config osd $primary osd_debug_feed_pullee $bad_peer || return 1
+ rados_get $dir $poolname $objname || return 1
+
+ # Wait until automatic repair of bad peer is done
+ wait_for_clean || return 1
+
+ inject_$inject rep data $poolname $objname $dir 0 || return 1
+ inject_$inject rep data $poolname $objname $dir 2 || return 1
rados_get $dir $poolname $objname || return 1
COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired')
- test "$COUNT" = "2" || return 1
+ test "$COUNT" = "3" || return 1
flush_pg_stats
COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired")
- test "$COUNT" = "2" || return 1
+ test "$COUNT" = "4" || return 1
inject_$inject rep data $poolname $objname $dir 0 || return 1
inject_$inject rep data $poolname $objname $dir 1 || return 1
# After hang another repair couldn't happen, so count stays the same
COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired')
- test "$COUNT" = "2" || return 1
+ test "$COUNT" = "3" || return 1
flush_pg_stats
COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired")
- test "$COUNT" = "2" || return 1
+ test "$COUNT" = "4" || return 1
}
function TEST_rados_get_with_eio() {
// Only use clone_overlap for recovery if there are fewer than
// osd_recover_clone_overlap_limit entries in the overlap set
OPTION(osd_recover_clone_overlap_limit, OPT_INT)
+OPTION(osd_debug_feed_pullee, OPT_INT)
OPTION(osd_backfill_scan_min, OPT_INT)
OPTION(osd_backfill_scan_max, OPT_INT)
.set_default(10)
.set_description(""),
+ Option("osd_debug_feed_pullee", Option::TYPE_INT, Option::LEVEL_DEV)
+ .set_default(-1)
+ .set_description("Feed a pullee, and force primary to pull "
+ "a currently missing object from it"),
+
Option("osd_backfill_scan_min", Option::TYPE_INT, Option::LEVEL_ADVANCED)
.set_default(64)
.set_description(""),
ceph_assert(!q->second.empty());
// pick a pullee
- auto p = q->second.begin();
- std::advance(p,
- util::generate_random_number<int>(0,
- q->second.size() - 1));
+ auto p = q->second.end();
+ if (cct->_conf->osd_debug_feed_pullee >= 0) {
+ for (auto it = q->second.begin(); it != q->second.end(); it++) {
+ if (it->osd == cct->_conf->osd_debug_feed_pullee) {
+ p = it;
+ break;
+ }
+ }
+ }
+ if (p == q->second.end()) {
+ // probably because user feed a wrong pullee
+ p = q->second.begin();
+ std::advance(p,
+ util::generate_random_number<int>(0,
+ q->second.size() - 1));
+ }
ceph_assert(get_osdmap()->is_up(p->osd));
pg_shard_t fromshard = *p;