From 1118b44361f184ad7ea4c391962da8814b231bb6 Mon Sep 17 00:00:00 2001 From: xie xingguo Date: Fri, 20 Apr 2018 18:29:26 +0800 Subject: [PATCH] osd/PG: perfer async_recovery_targets in reverse order of cost Theoretically peers which have a longer list of objects to recover shall equivalently take a longer time to recover and hence have a bigger chance to block client ops. Also, to minimize the risk of data loss, we want to bring those broken (inconsistent) peers back to normal as soon as possible. Putting them into the async_recovery_targets queue, however, did quite the oppsite. Signed-off-by: xie xingguo --- src/osd/PG.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 3d6abdff24e..de461de11b2 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -1568,11 +1568,12 @@ void PG::choose_async_recovery_replicated(const map &all_ dout(20) << __func__ << " candidates by cost are: " << candidates_by_cost << dendl; // take out as many osds as we can for async recovery, in order of cost - for (auto weighted_shard : candidates_by_cost) { + for (auto rit = candidates_by_cost.rbegin(); + rit != candidates_by_cost.rend(); ++rit) { if (want->size() <= pool.info.min_size) { break; } - pg_shard_t cur_shard = weighted_shard.second; + pg_shard_t cur_shard = rit->second; vector candidate_want(*want); for (auto it = candidate_want.begin(); it != candidate_want.end(); ++it) { if (*it == cur_shard.osd) { -- 2.39.5