#endif
PG::RecoveryCtx rctx = create_context();
- int started = pg->start_recovery_ops(max, &rctx, handle);
+
+ int started;
+ bool more = pg->start_recovery_ops(max, &rctx, handle, &started);
dout(10) << "do_recovery started " << started << "/" << max << " on " << *pg << dendl;
/*
* It may be that our initial locations were bad and we errored
* out while trying to pull.
*/
- if (!started && pg->have_unfound()) {
+ if (!more && pg->have_unfound()) {
pg->discover_all_missing(*rctx.query_map);
if (rctx.query_map->empty()) {
dout(10) << "do_recovery no luck, giving up on this pg for now" << dendl;
virtual void check_local() = 0;
- virtual int start_recovery_ops(
+ /**
+ * @param ops_begun returns how many recovery ops the function started
+ * @returns true if any useful work was accomplished; false otherwise
+ */
+ virtual bool start_recovery_ops(
int max, RecoveryCtx *prctx,
- ThreadPool::TPHandle &handle) = 0;
+ ThreadPool::TPHandle &handle,
+ int *ops_begun) = 0;
void purge_strays();
}
-int ReplicatedPG::start_recovery_ops(
+bool ReplicatedPG::start_recovery_ops(
int max, RecoveryCtx *prctx,
- ThreadPool::TPHandle &handle)
+ ThreadPool::TPHandle &handle,
+ int *ops_started)
{
- int started = 0;
+ int& started = *ops_started;
+ started = 0;
+ bool work_in_progress = false;
assert(is_primary());
if (!state_test(PG_STATE_RECOVERING) &&
!state_test(PG_STATE_BACKFILL)) {
+ /* TODO: I think this case is broken and will make do_recovery()
+ * unhappy since we're returning false */
dout(10) << "recovery raced and were queued twice, ignoring!" << dendl;
- return 0;
+ return false;
}
const pg_missing_t &missing = pg_log.get_missing();
started = recover_replicas(max, handle);
}
+ if (started)
+ work_in_progress = true;
+
bool deferred_backfill = false;
if (recovering.empty() &&
state_test(PG_STATE_BACKFILL) &&
}
deferred_backfill = true;
} else {
- started += recover_backfill(max - started, handle);
+ started += recover_backfill(max - started, handle, &work_in_progress);
}
}
osd->logger->inc(l_osd_rop, started);
if (!recovering.empty() ||
- started || recovery_ops_active > 0 || deferred_backfill)
- return started;
+ work_in_progress || recovery_ops_active > 0 || deferred_backfill)
+ return work_in_progress;
assert(recovering.empty());
assert(recovery_ops_active == 0);
int unfound = get_num_unfound();
if (unfound) {
dout(10) << " still have " << unfound << " unfound" << dendl;
- return started;
+ return work_in_progress;
}
if (missing.num_missing() > 0) {
// this shouldn't happen!
osd->clog.error() << info.pgid << " recovery ending with " << missing.num_missing()
<< ": " << missing.missing << "\n";
- return started;
+ return work_in_progress;
}
if (needs_recovery()) {
// this shouldn't happen!
// We already checked num_missing() so we must have missing replicas
osd->clog.error() << info.pgid << " recovery ending with missing replicas\n";
- return started;
+ return work_in_progress;
}
if (state_test(PG_STATE_RECOVERING)) {
*/
int ReplicatedPG::recover_backfill(
int max,
- ThreadPool::TPHandle &handle)
+ ThreadPool::TPHandle &handle, bool *work_started)
{
dout(10) << "recover_backfill (" << max << ")" << dendl;
assert(backfill_target >= 0);
dout(10) << " peer num_objects now " << pinfo.stats.stats.sum.num_objects
<< " / " << info.stats.stats.sum.num_objects << dendl;
+ if (ops)
+ *work_started = true;
return ops;
}
void _clear_recovery_state();
void queue_for_recovery();
- int start_recovery_ops(
+ bool start_recovery_ops(
int max, RecoveryCtx *prctx,
- ThreadPool::TPHandle &handle);
+ ThreadPool::TPHandle &handle, int *started);
int recover_primary(int max, ThreadPool::TPHandle &handle);
int recover_replicas(int max, ThreadPool::TPHandle &handle);
- int recover_backfill(int max, ThreadPool::TPHandle &handle);
+ /**
+ * @param work_started will be set to true if recover_backfill got anywhere
+ * @returns the number of operations started
+ */
+ int recover_backfill(int max, ThreadPool::TPHandle &handle,
+ bool *work_started);
/**
* scan a (hash) range of objects in the current pg