}
# Test recovery the first k copies aren't all available
-function TEST_ec_recovery_errors() {
+function TEST_ec_single_recovery_error() {
local dir=$1
local objname=myobject
# Cluster should recover this object
wait_for_clean || return 1
+ rados_get $dir $poolname myobject || return 1
+
+ delete_erasure_coded_pool $poolname
+}
+
+# Test recovery when repeated reads are needed due to EIO
+function TEST_ec_recovery_multiple_errors() {
+ local dir=$1
+ local objname=myobject
+
+ setup_osds 9 || return 1
+
+ local poolname=pool-jerasure
+ create_erasure_coded_pool $poolname 4 4 || return 1
+
+ rados_put $dir $poolname $objname || return 1
+ inject_eio ec data $poolname $objname $dir 0 || return 1
+ # first read will try shards 0,1,2 when 0 gets EIO, shard 3 gets
+ # tried as well. Make that fail to test multiple-EIO handling.
+ inject_eio ec data $poolname $objname $dir 3 || return 1
+ inject_eio ec data $poolname $objname $dir 4 || return 1
+
+ local -a initial_osds=($(get_osds $poolname $objname))
+ local last_osd=${initial_osds[-1]}
+ # Kill OSD
+ kill_daemons $dir TERM osd.${last_osd} >&2 < /dev/null || return 1
+ ceph osd down ${last_osd} || return 1
+ ceph osd out ${last_osd} || return 1
+
+ # Cluster should recover this object
+ wait_for_clean || return 1
+
+ rados_get $dir $poolname myobject || return 1
+
delete_erasure_coded_pool $poolname
}
void ECBackend::get_all_avail_shards(
const hobject_t &hoid,
+ const set<pg_shard_t> &error_shards,
set<int> &have,
map<shard_id_t, pg_shard_t> &shards,
bool for_recovery)
++i) {
dout(10) << __func__ << ": checking acting " << *i << dendl;
const pg_missing_t &missing = get_parent()->get_shard_missing(*i);
+ if (error_shards.find(*i) != error_shards.end())
+ continue;
if (!missing.is_missing(hoid)) {
assert(!have.count(i->shard));
have.insert(i->shard);
get_parent()->get_backfill_shards().begin();
i != get_parent()->get_backfill_shards().end();
++i) {
+ if (error_shards.find(*i) != error_shards.end())
+ continue;
if (have.count(i->shard)) {
assert(shards.count(i->shard));
continue;
if (m) {
assert(!(*m).is_missing(hoid));
}
+ if (error_shards.find(*i) != error_shards.end())
+ continue;
have.insert(i->shard);
shards.insert(make_pair(i->shard, *i));
}
set<int> have;
map<shard_id_t, pg_shard_t> shards;
+ set<pg_shard_t> error_shards;
- get_all_avail_shards(hoid, have, shards, for_recovery);
+ get_all_avail_shards(hoid, error_shards, have, shards, for_recovery);
set<int> need;
int r = ec_impl->minimum_to_decode(want, have, &need);
int ECBackend::get_remaining_shards(
const hobject_t &hoid,
const set<int> &avail,
+ const set<int> &want,
+ const read_result_t &result,
set<pg_shard_t> *to_read,
bool for_recovery)
{
set<int> have;
map<shard_id_t, pg_shard_t> shards;
+ set<pg_shard_t> error_shards;
+ for (auto &p : result.errors) {
+ error_shards.insert(p.first);
+ }
+
+ get_all_avail_shards(hoid, error_shards, have, shards, for_recovery);
+
+ set<int> need;
+ int r = ec_impl->minimum_to_decode(want, have, &need);
+ if (r < 0) {
+ dout(0) << __func__ << " not enough shards left to try for " << hoid
+ << " read result was " << result << dendl;
+ return -EIO;
+ }
- get_all_avail_shards(hoid, have, shards, for_recovery);
+ set<int> shards_left;
+ for (auto p : need) {
+ if (avail.find(p) == avail.end()) {
+ shards_left.insert(p);
+ }
+ }
- for (set<int>::iterator i = have.begin();
- i != have.end();
+ for (set<int>::iterator i = shards_left.begin();
+ i != shards_left.end();
++i) {
assert(shards.count(shard_id_t(*i)));
- if (avail.find(*i) == avail.end())
- to_read->insert(shards[shard_id_t(*i)]);
+ assert(avail.find(*i) == avail.end());
+ to_read->insert(shards[shard_id_t(*i)]);
}
return 0;
}
already_read.insert(i->shard);
dout(10) << __func__ << " have/error shards=" << already_read << dendl;
set<pg_shard_t> shards;
- int r = get_remaining_shards(hoid, already_read, &shards, rop.for_recovery);
+ int r = get_remaining_shards(hoid, already_read, rop.want_to_read[hoid],
+ rop.complete[hoid], &shards, rop.for_recovery);
if (r)
return r;
- if (shards.empty())
- return -EIO;
-
- dout(10) << __func__ << " Read remaining shards " << shards << dendl;
- // TODOSAM: this doesn't seem right
list<boost::tuple<uint64_t, uint64_t, uint32_t> > offsets =
rop.to_read.find(hoid)->second.to_read;
GenContext<pair<RecoveryMessages *, read_result_t& > &> *c =