CephContext *cct;
OSDriver osdriver;
SnapMapper snap_mapper;
+ bool eio_errors_to_process = false;
virtual PGBackend *get_pgbackend() = 0;
public:
list<OpRequestRef> waiting_for_scrub;
list<OpRequestRef> waiting_for_cache_not_full;
+ list<OpRequestRef> waiting_for_clean_to_primary_repair;
map<hobject_t, list<OpRequestRef>> waiting_for_unreadable_object,
waiting_for_degraded_object,
waiting_for_blocked_object;
struct Recovered : boost::statechart::state< Recovered, Active >, NamedState {
typedef boost::mpl::list<
boost::statechart::transition< GoClean, Clean >,
+ boost::statechart::transition< DoRecovery, WaitLocalRecoveryReserved >,
boost::statechart::custom_reaction< AllReplicasActivated >
> reactions;
explicit Recovered(my_context ctx);
op->mark_delayed("waiting for cache not full");
}
+void PrimaryLogPG::block_for_clean(
+ const hobject_t& oid, OpRequestRef op)
+{
+ dout(20) << __func__ << ": blocking object " << oid
+ << " on primary repair" << dendl;
+ waiting_for_clean_to_primary_repair.push_back(op);
+ op->mark_delayed("waiting for clean to repair");
+}
+
void PrimaryLogPG::block_write_on_snap_rollback(
const hobject_t& oid, ObjectContextRef obc, OpRequestRef op)
{
} else {
int r = pgbackend->objects_read_sync(
soid, op.extent.offset, op.extent.length, op.flags, &osd_op.outdata);
+ if (r == -EIO) {
+ r = rep_repair_primary_object(soid, ctx->op);
+ }
if (r >= 0)
op.extent.length = r;
else {
bufferlist t;
uint64_t len = miter->first - last;
r = pgbackend->objects_read_sync(soid, last, len, op.flags, &t);
+ if (r == -EIO) {
+ r = rep_repair_primary_object(soid, ctx->op);
+ }
if (r < 0) {
osd->clog->error() << coll << " " << soid
<< " sparse-read failed to read: "
RequestBackfill())));
} else {
dout(10) << "activate all replicas clean, no recovery" << dendl;
+ eio_errors_to_process = false;
queue_peering_event(
CephPeeringEvtRef(
std::make_shared<CephPeeringEvt>(
RequestBackfill())));
} else {
dout(10) << "recovery done, no backfill" << dendl;
+ eio_errors_to_process = false;
queue_peering_event(
CephPeeringEvtRef(
std::make_shared<CephPeeringEvt>(
} else { // backfilling
state_clear(PG_STATE_BACKFILL);
dout(10) << "recovery done, backfill done" << dendl;
+ eio_errors_to_process = false;
queue_peering_event(
CephPeeringEvtRef(
std::make_shared<CephPeeringEvt>(
return osd->check_osdmap_full(missing_on);
}
+int PrimaryLogPG::rep_repair_primary_object(const hobject_t& soid, OpRequestRef op)
+{
+ // Only supports replicated pools
+ assert(!pool.info.require_rollback());
+ assert(is_primary());
+
+ // Get non-primary shards
+ list<pg_shard_t> op_shards;
+ for (auto&& i : actingset) {
+ if (i == pg_whoami) continue; // Exclude self (primary)
+ op_shards.push_back(i);
+ }
+ if (op_shards.empty()) {
+ dout(0) << __func__ << " No other replicas available for " << soid << dendl;
+ return -EIO;
+ }
+
+ dout(10) << __func__ << " " << soid
+ << " peers osd.{" << op_shards << "}" << dendl;
+
+ if (!is_clean()) {
+ block_for_clean(soid, op);
+ return -EAGAIN;
+ }
+
+ assert(!pg_log.get_missing().is_missing(soid));
+ bufferlist bv;
+ int r = get_pgbackend()->objects_get_attr(soid, OI_ATTR, &bv);
+ if (r < 0)
+ return r;
+ object_info_t oi;
+ try {
+ bufferlist::iterator bliter = bv.begin();
+ ::decode(oi, bliter);
+ } catch (...) {
+ dout(0) << __func__ << ": bad object_info_t: " << soid << dendl;
+ // XXX: Too bad I can't get the version to recover, so can't repair
+ return -EIO;
+ }
+
+ pg_log.missing_add(soid, oi.version, eversion_t());
+
+ pg_log.set_last_requested(0);
+
+ missing_loc.add_missing(soid, oi.version, eversion_t());
+ for (auto &&i : op_shards)
+ missing_loc.add_location(soid, i);
+
+ // Restart the op after object becomes readable again
+ waiting_for_unreadable_object[soid].push_back(op);
+ op->mark_delayed("waiting for missing object");
+
+ if (!eio_errors_to_process) {
+ eio_errors_to_process = true;
+ assert(is_clean());
+ queue_peering_event(
+ CephPeeringEvtRef(
+ std::make_shared<CephPeeringEvt>(
+ get_osdmap()->get_epoch(),
+ get_osdmap()->get_epoch(),
+ DoRecovery())));
+ } else {
+ // A prior error must have already cleared clean state and queued recovery
+ // or a map change has triggered re-peering.
+ // Not inlining the recovery by calling maybe_kick_recovery(soid);
+ dout(5) << __func__<< ": Read error on " << soid << ", but already seen errors" << dendl;
+ }
+
+ return -EAGAIN;
+}
+
/*---SnapTrimmer Logging---*/
#undef dout_prefix
#define dout_prefix *_dout << pg->gen_prefix()
&new_progress,
pop,
&(pi.stat), cache_dont_need);
+ // XXX: What can we do here?
assert(r == 0);
pi.recovery_progress = new_progress;
}
p != out_op->data_included.end();
++p) {
bufferlist bit;
- store->read(ch, ghobject_t(recovery_info.soid),
+ int r = store->read(ch, ghobject_t(recovery_info.soid),
p.get_start(), p.get_len(), bit,
cache_dont_need ? CEPH_OSD_OP_FLAG_FADVISE_DONTNEED: 0);
+ if (r < 0)
+ return r;
if (p.get_len() != bit.length()) {
dout(10) << " extent " << p.get_start() << "~" << p.get_len()
<< " is actually " << p.get_start() << "~" << bit.length()
pi->recovery_info,
pi->recovery_progress, &new_progress, reply,
&(pi->stat));
+ // XXX: What can we do here?
assert(r == 0);
pi->recovery_progress = new_progress;
return true;