From 4e78d8055be38a8884a3832c5148d4b018b97a5b Mon Sep 17 00:00:00 2001 From: Neha Ojha Date: Thu, 14 Dec 2017 10:40:37 -0800 Subject: [PATCH] PG: cleanup and fixes for async recovery Signed-off-by: Neha Ojha --- src/osd/PG.cc | 23 ++++++++++++++--------- src/osd/PG.h | 2 +- src/osd/PrimaryLogPG.cc | 8 ++++++++ 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 3aadda0640953..337063ddebbfd 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -1478,14 +1478,13 @@ bool PG::recoverable_and_ge_min_size(const vector &want) const pool.info.is_erasure() ? shard_id_t(i) : shard_id_t::NO_SHARD)); } } - // We go incomplete if below min_size for ec_pools since backfill // does not currently maintain rollbackability // Otherwise, we will go "peered", but not "active" if (num_want_acting < pool.info.min_size && (pool.info.is_erasure() || !cct->_conf->osd_allow_recovery_below_min_size)) { - dout(10) << __func__ << "failed, below min size" << dendl; + dout(10) << __func__ << " failed, below min size" << dendl; return false; } @@ -1527,7 +1526,7 @@ void PG::choose_async_recovery_ec(const map &all_info, } } - dout(20) << __func__ << "candidates by cost are: " << candidates_by_cost + dout(20) << __func__ << " candidates by cost are: " << candidates_by_cost << dendl; // take out as many osds as we can for async recovery, in order of cost @@ -1540,7 +1539,7 @@ void PG::choose_async_recovery_ec(const map &all_info, async_recovery->insert(cur_shard); } } - dout(20) << __func__ << "result want=" << *want + dout(20) << __func__ << " result want=" << *want << " async_recovery=" << *async_recovery << dendl; } @@ -1568,7 +1567,7 @@ void PG::choose_async_recovery_replicated(const map &all_ } } - dout(20) << __func__ << "candidates by cost are: " << candidates_by_cost + dout(20) << __func__ << " candidates by cost are: " << candidates_by_cost << dendl; // take out as many osds as we can for async recovery, in order of cost @@ -1578,6 +1577,7 @@ void PG::choose_async_recovery_replicated(const map &all_ for (auto it = candidate_want.begin(); it != candidate_want.end(); ++it) { if (*it == cur_shard.osd) { candidate_want.erase(it); + want->swap(candidate_want); async_recovery->insert(cur_shard); break; } @@ -1586,7 +1586,7 @@ void PG::choose_async_recovery_replicated(const map &all_ break; } } - dout(20) << __func__ << "result want=" << *want + dout(20) << __func__ << " result want=" << *want << " async_recovery=" << *async_recovery << dendl; } @@ -1683,7 +1683,6 @@ bool PG::choose_acting(pg_shard_t &auth_log_shard_id, choose_async_recovery_replicated(all_info, auth_log_shard->second, &want, &want_async_recovery); } } - if (want != acting) { dout(10) << __func__ << " want " << want << " != acting " << acting << ", requesting pg_temp change" << dendl; @@ -1707,6 +1706,10 @@ bool PG::choose_acting(pg_shard_t &auth_log_shard_id, // Caller is GetInfo backfill_targets = want_backfill; } + assert(async_recovery_targets.empty() || async_recovery_targets == want_async_recovery); + if (async_recovery_targets.empty()) { + async_recovery_targets = want_async_recovery; + } // Will not change if already set because up would have had to change // Verify that nothing in backfill is in stray_set for (set::iterator i = want_backfill.begin(); @@ -1714,8 +1717,9 @@ bool PG::choose_acting(pg_shard_t &auth_log_shard_id, ++i) { assert(stray_set.find(*i) == stray_set.end()); } - dout(10) << __func__ << " want " << want << " (== acting) backfill_targets " - << want_backfill << dendl; + dout(10) << "choose_acting want=" << want << " backfill_targets=" + << want_backfill << " async_recovery_targets=" + << async_recovery_targets << dendl; return true; } @@ -2733,6 +2737,7 @@ void PG::clear_recovery_state() finish_recovery_op(soid, true); } + async_recovery_targets.clear(); backfill_targets.clear(); backfill_info.clear(); peer_backfill_info.clear(); diff --git a/src/osd/PG.h b/src/osd/PG.h index ed4907daaa139..b07f2fc070926 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -1167,7 +1167,7 @@ protected: bool backfill_reserved; bool backfill_reserving; - set backfill_targets; + set backfill_targets, async_recovery_targets; bool is_backfill_targets(pg_shard_t osd) { return backfill_targets.count(osd); diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index c9811373be4aa..2fa81d49b65c8 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -940,6 +940,14 @@ int PrimaryLogPG::do_command( f->dump_stream("shard") << *p; f->close_section(); } + if (!async_recovery_targets.empty()) { + f->open_array_section("async_recovery_targets"); + for (set::iterator p = async_recovery_targets.begin(); + p != async_recovery_targets.end(); + ++p) + f->dump_stream("shard") << *p; + f->close_section(); + } if (!acting_recovery_backfill.empty()) { f->open_array_section("acting_recovery_backfill"); for (set::iterator p = acting_recovery_backfill.begin(); -- 2.39.5