delete _h;
}
-void ECBackend::recover_object(
+int ECBackend::recover_object(
const hobject_t &hoid,
eversion_t v,
ObjectContextRef head,
}
}
dout(10) << __func__ << ": built op " << h->ops.back() << dendl;
+ return 0;
}
bool ECBackend::can_handle_while_inactive(
int priority
) override;
- void recover_object(
+ int recover_object(
const hobject_t &hoid,
eversion_t v,
ObjectContextRef head,
* @param missing [in] set of info, missing pairs for queried nodes
* @param overlaps [in] mapping of object to file offset overlaps
*/
- virtual void recover_object(
+ virtual int recover_object(
const hobject_t &hoid, ///< [in] object to recover
eversion_t v, ///< [in] version to recover
ObjectContextRef head, ///< [in] context of the head/snapdir object
start_recovery_op(soid);
assert(!recovering.count(soid));
recovering.insert(make_pair(soid, obc));
- pgbackend->recover_object(
+ int r = pgbackend->recover_object(
soid,
v,
head_obc,
obc,
h);
+ // This is only a pull which shouldn't return an error
+ assert(r >= 0);
return PULL_YES;
}
return started;
}
+bool PrimaryLogPG::primary_error(
+ const hobject_t& soid, eversion_t v)
+{
+ pg_log.missing_add(soid, v, eversion_t());
+ pg_log.set_last_requested(0);
+ missing_loc.remove_location(soid, pg_whoami);
+ bool uhoh = true;
+ assert(!actingbackfill.empty());
+ for (set<pg_shard_t>::iterator i = actingbackfill.begin();
+ i != actingbackfill.end();
+ ++i) {
+ if (*i == get_primary()) continue;
+ pg_shard_t peer = *i;
+ if (!peer_missing[peer].is_missing(soid, v)) {
+ missing_loc.add_location(soid, peer);
+ dout(10) << info.pgid << " unexpectedly missing " << soid << " v" << v
+ << ", there should be a copy on shard " << peer << dendl;
+ uhoh = false;
+ }
+ }
+ if (uhoh)
+ osd->clog->error() << info.pgid << " missing primary copy of " << soid << ", unfound";
+ else
+ osd->clog->error() << info.pgid << " missing primary copy of " << soid
+ << ", will try copies on " << missing_loc.get_locations(soid);
+ return uhoh;
+}
+
int PrimaryLogPG::prep_object_replica_pushes(
const hobject_t& soid, eversion_t v,
PGBackend::RecoveryHandle *h)
// NOTE: we know we will get a valid oloc off of disk here.
ObjectContextRef obc = get_object_context(soid, false);
if (!obc) {
- pg_log.missing_add(soid, v, eversion_t());
- missing_loc.remove_location(soid, pg_whoami);
- bool uhoh = true;
- assert(!actingbackfill.empty());
- for (set<pg_shard_t>::iterator i = actingbackfill.begin();
- i != actingbackfill.end();
- ++i) {
- if (*i == get_primary()) continue;
- pg_shard_t peer = *i;
- if (!peer_missing[peer].is_missing(soid, v)) {
- missing_loc.add_location(soid, peer);
- dout(10) << info.pgid << " unexpectedly missing " << soid << " v" << v
- << ", there should be a copy on shard " << peer << dendl;
- uhoh = false;
- }
- }
- if (uhoh)
- osd->clog->error() << info.pgid << " missing primary copy of " << soid << ", unfound";
- else
- osd->clog->error() << info.pgid << " missing primary copy of " << soid
- << ", will try copies on " << missing_loc.get_locations(soid);
+ primary_error(soid, v);
return 0;
}
* In almost all cases, therefore, this lock should be uncontended.
*/
obc->ondisk_read_lock();
- pgbackend->recover_object(
+ int r = pgbackend->recover_object(
soid,
v,
ObjectContextRef(),
obc, // has snapset context
h);
obc->ondisk_read_unlock();
+ if (r < 0) {
+ dout(0) << __func__ << " Error " << r << " on oid " << soid << dendl;
+ list<pg_shard_t> fl = { pg_whoami };
+ failed_push(fl, soid);
+ primary_error(soid, v);
+ return 0;
+ }
return 1;
}
return ops;
}
-void PrimaryLogPG::prep_backfill_object_push(
+int PrimaryLogPG::prep_backfill_object_push(
hobject_t oid, eversion_t v,
ObjectContextRef obc,
vector<pg_shard_t> peers,
// We need to take the read_lock here in order to flush in-progress writes
obc->ondisk_read_lock();
- pgbackend->recover_object(
+ int r = pgbackend->recover_object(
oid,
v,
ObjectContextRef(),
obc,
h);
obc->ondisk_read_unlock();
+ return r;
}
void PrimaryLogPG::update_range(
assert(!pool.info.require_rollback());
assert(is_primary());
- // Get non-primary shards
- list<pg_shard_t> op_shards;
- for (auto&& i : actingset) {
- if (i == pg_whoami) continue; // Exclude self (primary)
- op_shards.push_back(i);
- }
- if (op_shards.empty()) {
- dout(0) << __func__ << " No other replicas available for " << soid << dendl;
- return -EIO;
- }
-
dout(10) << __func__ << " " << soid
- << " peers osd.{" << op_shards << "}" << dendl;
+ << " peers osd.{" << actingbackfill << "}" << dendl;
if (!is_clean()) {
block_for_clean(soid, op);
return -EIO;
}
- pg_log.missing_add(soid, oi.version, eversion_t());
-
- pg_log.set_last_requested(0);
-
missing_loc.add_missing(soid, oi.version, eversion_t());
- for (auto &&i : op_shards)
- missing_loc.add_location(soid, i);
+ if (primary_error(soid, oi.version)) {
+ dout(0) << __func__ << " No other replicas available for " << soid << dendl;
+ // XXX: If we knew that there is no down osd which could include this
+ // object, it would be nice if we could return EIO here.
+ // If a "never fail" flag was available, that could be used
+ // for rbd to NOT return EIO until object marked lost.
+
+ // Drop through to save this op in case an osd comes up with the object.
+ }
// Restart the op after object becomes readable again
waiting_for_unreadable_object[soid].push_back(op);
const hobject_t &oid,
const object_stat_sum_t &stat_diff) override;
void failed_push(const list<pg_shard_t> &from, const hobject_t &soid) override;
+ bool primary_error(const hobject_t& soid, eversion_t v) override;
void cancel_pull(const hobject_t &soid) override;
void apply_stats(
const hobject_t &soid,
hobject_t last_backfill_started;
bool new_backfill;
+ void primary_error(const hobject_t& soid, eversion_t v);
int prep_object_replica_pushes(const hobject_t& soid, eversion_t v,
PGBackend::RecoveryHandle *h);
ThreadPool::TPHandle &handle ///< [in] tp handle
);
- void prep_backfill_object_push(
+ int prep_backfill_object_push(
hobject_t oid, eversion_t v, ObjectContextRef obc,
vector<pg_shard_t> peers,
PGBackend::RecoveryHandle *h);
delete h;
}
-void ReplicatedBackend::recover_object(
+int ReplicatedBackend::recover_object(
const hobject_t &hoid,
eversion_t v,
ObjectContextRef head,
hoid,
head,
h);
- return;
} else {
assert(obc);
int started = start_pushes(
hoid,
obc,
h);
- assert(started > 0);
+ if (started < 0) {
+ pushing[hoid].clear();
+ return started;
+ }
}
+ return 0;
}
void ReplicatedBackend::check_recovery_sources(const OSDMapRef& osdmap)
assert(j != bc->pulling.end());
ObjectContextRef obc = j->second.obc;
bc->clear_pull(j, false /* already did it */);
- if (!bc->start_pushes(i.hoid, obc, h)) {
+ int started = bc->start_pushes(i.hoid, obc, h);
+ // XXX: Handle errors here?
+ assert(started >= 0);
+ if (!started) {
bc->get_parent()->on_global_recover(
i.hoid, i.stat);
}
* intelligently push an object to a replica. make use of existing
* clones/heads and dup data ranges where possible.
*/
-void ReplicatedBackend::prep_push_to_replica(
+int ReplicatedBackend::prep_push_to_replica(
ObjectContextRef obc, const hobject_t& soid, pg_shard_t peer,
PushOp *pop, bool cache_dont_need)
{
lock_manager);
}
- prep_push(
+ return prep_push(
obc,
soid,
peer,
std::move(lock_manager));
}
-void ReplicatedBackend::prep_push(ObjectContextRef obc,
+int ReplicatedBackend::prep_push(ObjectContextRef obc,
const hobject_t& soid, pg_shard_t peer,
PushOp *pop, bool cache_dont_need)
{
data_subset.insert(0, obc->obs.oi.size);
map<hobject_t, interval_set<uint64_t>> clone_subsets;
- prep_push(obc, soid, peer,
+ return prep_push(obc, soid, peer,
obc->obs.oi.version, data_subset, clone_subsets,
pop, cache_dont_need, ObcLockManager());
}
-void ReplicatedBackend::prep_push(
+int ReplicatedBackend::prep_push(
ObjectContextRef obc,
const hobject_t& soid, pg_shard_t peer,
eversion_t version,
&new_progress,
pop,
&(pi.stat), cache_dont_need);
- // XXX: What can we do here?
- assert(r == 0);
+ if (r < 0)
+ return r;
pi.recovery_progress = new_progress;
+ return 0;
}
void ReplicatedBackend::submit_push_data(
int r = store->read(ch, ghobject_t(recovery_info.soid),
p.get_start(), p.get_len(), bit,
cache_dont_need ? CEPH_OSD_OP_FLAG_FADVISE_DONTNEED: 0);
- if (r < 0)
+ if (r < 0) {
return r;
+ }
if (p.get_len() != bit.length()) {
dout(10) << " extent " << p.get_start() << "~" << p.get_len()
<< " is actually " << p.get_start() << "~" << bit.length()
if (j->second.is_missing(soid)) {
++pushes;
h->pushes[peer].push_back(PushOp());
- prep_push_to_replica(obc, soid, peer,
+ int r = prep_push_to_replica(obc, soid, peer,
&(h->pushes[peer].back()), h->cache_dont_need);
+ if (r < 0) {
+ // prep_push_to_replica() should fail on first attempt or not at all
+ assert(pushes == 1);
+ h->pushes[peer].pop_back();
+ return r;
+ }
}
}
return pushes;
int priority) override;
/// @see PGBackend::recover_object
- void recover_object(
+ int recover_object(
const hobject_t &hoid,
eversion_t v,
ObjectContextRef head,
const hobject_t &soid,
ObjectContextRef obj,
RPGHandle *h);
- void prep_push_to_replica(
+ int prep_push_to_replica(
ObjectContextRef obc, const hobject_t& soid, pg_shard_t peer,
PushOp *pop, bool cache_dont_need = true);
- void prep_push(
+ int prep_push(
ObjectContextRef obc,
const hobject_t& oid, pg_shard_t dest,
PushOp *op,
bool cache_dont_need);
- void prep_push(
+ int prep_push(
ObjectContextRef obc,
const hobject_t& soid, pg_shard_t peer,
eversion_t version,