From 812bef0b2e1cae5731bb8650651513b128d824ca Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Fri, 5 Apr 2019 18:51:14 -0700 Subject: [PATCH] osd/: refactor to avoid mutable peer_missing refs in PG Signed-off-by: sjust@redhat.com --- src/osd/PG.cc | 11 ++--- src/osd/PG.h | 1 - src/osd/PeeringState.cc | 75 ++++++++++++++++++++++++++++ src/osd/PeeringState.h | 34 +++++++++++++ src/osd/PrimaryLogPG.cc | 105 ++++++++++------------------------------ src/osd/PrimaryLogPG.h | 12 +++-- 6 files changed, 145 insertions(+), 93 deletions(-) diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 1a4370d186a..a2f616e663d 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -192,7 +192,6 @@ PG::PG(OSDService *o, OSDMapRef curmap, last_complete_ondisk(recovery_state.last_complete_ondisk), last_update_applied(recovery_state.last_update_applied), peer_info(recovery_state.peer_info), - peer_missing(recovery_state.peer_missing), peer_last_complete_ondisk(recovery_state.peer_last_complete_ondisk), min_last_complete_ondisk(recovery_state.min_last_complete_ondisk), pg_trim_to(recovery_state.pg_trim_to), @@ -2410,17 +2409,13 @@ void PG::repair_object( dout(0) << __func__ << ": Need version of replica, bad object_info_t: " << soid << dendl; ceph_abort(); } - if (bad_peer != get_primary()) { - peer_missing[bad_peer].add(soid, oi.version, eversion_t(), false); - } else { + + if (bad_peer == get_primary()) { // We should only be scrubbing if the PG is clean. ceph_assert(waiting_for_unreadable_object.empty()); - - pg_log.missing_add(soid, oi.version, eversion_t()); - - pg_log.set_last_requested(0); dout(10) << __func__ << ": primary = " << get_primary() << dendl; } + recovery_state.force_object_missing(bad_peer, soid, oi.version); if (is_ec_pg() || bad_peer == get_primary()) { // we'd better collect all shard for EC pg, and prepare good peers as the diff --git a/src/osd/PG.h b/src/osd/PG.h index 2c3458f1e23..f38ba46ad04 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -189,7 +189,6 @@ protected: eversion_t &last_complete_ondisk; eversion_t &last_update_applied; map &peer_info; - map &peer_missing; map &peer_last_complete_ondisk; eversion_t &min_last_complete_ondisk; eversion_t &pg_trim_to; diff --git a/src/osd/PeeringState.cc b/src/osd/PeeringState.cc index 76bb8311012..4f2fac332f5 100644 --- a/src/osd/PeeringState.cc +++ b/src/osd/PeeringState.cc @@ -3668,6 +3668,81 @@ void PeeringState::adjust_purged_snaps( dirty_big_info = true; } +void PeeringState::on_peer_recover( + pg_shard_t peer, + const hobject_t &soid, + const eversion_t &version) +{ + pl->publish_stats_to_osd(); + // done! + peer_missing[peer].got(soid, version); + missing_loc.add_location(soid, peer); +} + +void PeeringState::begin_peer_recover( + pg_shard_t peer, + const hobject_t soid) +{ + peer_missing[peer].revise_have(soid, eversion_t()); +} + +void PeeringState::force_object_missing( + pg_shard_t peer, + const hobject_t &soid, + eversion_t version) +{ + if (peer != primary) { + peer_missing[peer].add(soid, version, eversion_t(), false); + } else { + pg_log.missing_add(soid, version, eversion_t()); + pg_log.set_last_requested(0); + } +} + +void PeeringState::pre_submit_op( + const hobject_t &hoid, + const vector& logv, + eversion_t at_version) +{ + if (at_version > eversion_t()) { + for (auto &&i : get_acting_recovery_backfill()) { + if (i == primary) continue; + pg_info_t &pinfo = peer_info[i]; + // keep peer_info up to date + if (pinfo.last_complete == pinfo.last_update) + pinfo.last_complete = at_version; + pinfo.last_update = at_version; + } + } + + bool requires_missing_loc = false; + for (auto &&i : get_async_recovery_targets()) { + if (i == primary || !get_peer_missing(i).is_missing(hoid)) + continue; + requires_missing_loc = true; + for (auto &&entry: logv) { + peer_missing[i].add_next_event(entry); + } + } + + if (requires_missing_loc) { + for (auto &&entry: logv) { + psdout(30) << __func__ << " missing_loc before: " + << missing_loc.get_locations(entry.soid) << dendl; + missing_loc.add_missing(entry.soid, entry.version, + eversion_t(), entry.is_delete()); + // clear out missing_loc + missing_loc.clear_location(entry.soid); + for (auto &i: get_actingset()) { + if (!get_peer_missing(i).is_missing(entry.soid)) + missing_loc.add_location(entry.soid, i); + } + psdout(30) << __func__ << " missing_loc after: " + << missing_loc.get_locations(entry.soid) << dendl; + } + } +} + /*------------ Peering State Machine----------------*/ #undef dout_prefix #define dout_prefix (context< PeeringMachine >().dpp->gen_prefix(*_dout) \ diff --git a/src/osd/PeeringState.h b/src/osd/PeeringState.h index 1fb8e5f98d5..8af0d6ed90a 100644 --- a/src/osd/PeeringState.h +++ b/src/osd/PeeringState.h @@ -1525,17 +1525,36 @@ public: bool transaction_applied, bool async); + void pre_submit_op( + const hobject_t &hoid, + const vector& logv, + eversion_t at_version); + void recover_got( const hobject_t &oid, eversion_t v, bool is_delete, ObjectStore::Transaction &t); + void on_peer_recover( + pg_shard_t peer, + const hobject_t &soid, + const eversion_t &version); + + void begin_peer_recover( + pg_shard_t peer, + const hobject_t soid); + void update_backfill_progress( const hobject_t &updated_backfill, const pg_stat_t &updated_stats, bool preserve_local_num_bytes, ObjectStore::Transaction &t); + void force_object_missing( + pg_shard_t peer, + const hobject_t &oid, + eversion_t version); + void dump_history(Formatter *f) const { state_history.dump(f); } @@ -1740,6 +1759,21 @@ public: return info; } + const decltype(peer_info) &get_peer_info() const { + return peer_info; + } + const decltype(peer_missing) &get_peer_missing() const { + return peer_missing; + } + const pg_missing_const_i &get_peer_missing(const pg_shard_t &peer) const { + if (peer == pg_whoami) { + return pg_log.get_missing(); + } else { + assert(peer_missing.count(peer)); + return peer_missing.find(peer)->second; + } + } + bool needs_recovery() const; bool needs_backfill() const; bool all_unfound_are_queried_or_lost(const OSDMapRef osdmap) const; diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index cde245b1337..84e99eb9a40 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -481,24 +481,6 @@ void PrimaryLogPG::on_global_recover( finish_degraded_object(soid); } -void PrimaryLogPG::on_peer_recover( - pg_shard_t peer, - const hobject_t &soid, - const ObjectRecoveryInfo &recovery_info) -{ - publish_stats_to_osd(); - // done! - peer_missing[peer].got(soid, recovery_info.version); - missing_loc.add_location(soid, peer); -} - -void PrimaryLogPG::begin_peer_recover( - pg_shard_t peer, - const hobject_t soid) -{ - peer_missing[peer].revise_have(soid, eversion_t()); -} - void PrimaryLogPG::schedule_recovery_work( GenContext *c) { @@ -561,7 +543,8 @@ bool PrimaryLogPG::should_send_op( << peer_info[peer].last_backfill << ")" << dendl; return should_send; } - if (is_async_recovery_target(peer) && peer_missing[peer].is_missing(hoid)) { + if (is_async_recovery_target(peer) && + recovery_state.get_peer_missing(peer).is_missing(hoid)) { should_send = false; dout(10) << __func__ << " issue_repop shipping empty opt to osd." << peer << ", object " << hoid @@ -643,10 +626,10 @@ bool PrimaryLogPG::is_degraded_or_backfilling_object(const hobject_t& soid) ++i) { if (*i == get_primary()) continue; pg_shard_t peer = *i; - auto peer_missing_entry = peer_missing.find(peer); + auto peer_missing_entry = recovery_state.get_peer_missing().find(peer); // If an object is missing on an async_recovery_target, return false. // This will not block the op and the object is async recovered later. - if (peer_missing_entry != peer_missing.end() && + if (peer_missing_entry != recovery_state.get_peer_missing().end() && peer_missing_entry->second.get_items().count(soid)) { if (is_async_recovery_target(peer)) continue; @@ -667,8 +650,8 @@ bool PrimaryLogPG::is_degraded_or_backfilling_object(const hobject_t& soid) bool PrimaryLogPG::is_degraded_on_async_recovery_target(const hobject_t& soid) { for (auto &i: get_async_recovery_targets()) { - auto peer_missing_entry = peer_missing.find(i); - if (peer_missing_entry != peer_missing.end() && + auto peer_missing_entry = recovery_state.get_peer_missing().find(i); + if (peer_missing_entry != recovery_state.get_peer_missing().end() && peer_missing_entry->second.get_items().count(soid)) { dout(30) << __func__ << " " << soid << dendl; return true; @@ -782,10 +765,10 @@ void PrimaryLogPG::maybe_force_recovery() ++it) { if (*it == get_primary()) continue; pg_shard_t peer = *it; - auto it_missing = peer_missing.find(peer); - if (it_missing != peer_missing.end() && + auto it_missing = recovery_state.get_peer_missing().find(peer); + if (it_missing != recovery_state.get_peer_missing().end() && !it_missing->second.get_rmissing().empty()) { - const auto& min_obj = peer_missing[peer].get_rmissing().begin(); + const auto& min_obj = recovery_state.get_peer_missing(peer).get_rmissing().begin(); dout(20) << __func__ << " peer " << peer << " min_version " << min_obj->first << " oid " << min_obj->second << dendl; if (min_version > min_obj->first) { @@ -10450,18 +10433,6 @@ void PrimaryLogPG::issue_repop(RepGather *repop, OpContext *ctx) << dendl; repop->v = ctx->at_version; - if (ctx->at_version > eversion_t()) { - for (set::iterator i = get_acting_recovery_backfill().begin(); - i != get_acting_recovery_backfill().end(); - ++i) { - if (*i == get_primary()) continue; - pg_info_t &pinfo = peer_info[*i]; - // keep peer_info up to date - if (pinfo.last_complete == pinfo.last_update) - pinfo.last_complete = ctx->at_version; - pinfo.last_update = ctx->at_version; - } - } ctx->op_t->add_obc(ctx->obc); if (ctx->clone_obc) { @@ -10480,34 +10451,10 @@ void PrimaryLogPG::issue_repop(RepGather *repop, OpContext *ctx) projected_log.add(entry); } - bool requires_missing_loc = false; - for (set::iterator i = get_async_recovery_targets().begin(); - i != get_async_recovery_targets().end(); - ++i) { - if (*i == get_primary() || !peer_missing[*i].is_missing(soid)) continue; - requires_missing_loc = true; - for (auto &&entry: ctx->log) { - peer_missing[*i].add_next_event(entry); - } - } - - if (requires_missing_loc) { - for (auto &&entry: ctx->log) { - dout(30) << __func__ << " missing_loc before: " - << missing_loc.get_locations(entry.soid) << dendl; - missing_loc.add_missing(entry.soid, entry.version, - eversion_t(), entry.is_delete()); - // clear out missing_loc - missing_loc.clear_location(entry.soid); - for (auto &i: get_actingset()) { - if (!peer_missing[i].is_missing(entry.soid)) - missing_loc.add_location(entry.soid, i); - } - dout(30) << __func__ << " missing_loc after: " - << missing_loc.get_locations(entry.soid) << dendl; - } - } - + recovery_state.pre_submit_op( + soid, + ctx->log, + ctx->at_version); pgbackend->submit_transaction( soid, ctx->delta_stats, @@ -10657,7 +10604,7 @@ void PrimaryLogPG::submit_log_entries( ++i) { pg_shard_t peer(*i); if (peer == pg_whoami) continue; - ceph_assert(peer_missing.count(peer)); + ceph_assert(recovery_state.get_peer_missing().count(peer)); ceph_assert(peer_info.count(peer)); if (get_osdmap()->require_osd_release >= CEPH_RELEASE_JEWEL) { ceph_assert(repop); @@ -11417,7 +11364,7 @@ int PrimaryLogPG::recover_missing( for (const auto& shard : get_acting_recovery_backfill()) { if (shard == pg_whoami) continue; - if (peer_missing[shard].is_missing(soid)) { + if (recovery_state.get_peer_missing(shard).is_missing(soid)) { dout(20) << __func__ << ": soid " << soid << " needs to be deleted from replica " << shard << dendl; object_missing = true; break; @@ -11651,10 +11598,10 @@ eversion_t PrimaryLogPG::pick_newest_available(const hobject_t& oid) ++i) { if (*i == get_primary()) continue; pg_shard_t peer = *i; - if (!peer_missing[peer].is_missing(oid)) { + if (!recovery_state.get_peer_missing(peer).is_missing(oid)) { continue; } - eversion_t h = peer_missing[peer].get_items().at(oid).have; + eversion_t h = recovery_state.get_peer_missing(peer).get_items().at(oid).have; dout(10) << "pick_newest_available " << oid << " " << h << " on osd." << peer << dendl; if (h > v) v = h; @@ -12590,8 +12537,8 @@ uint64_t PrimaryLogPG::recover_primary(uint64_t max, ThreadPool::TPHandle &handl eversion_t alternate_need = latest->reverting_to; dout(10) << " need to pull prior_version " << alternate_need << " for revert " << item << dendl; - for (map::iterator p = peer_missing.begin(); - p != peer_missing.end(); + for (auto p = recovery_state.get_peer_missing().begin(); + p != recovery_state.get_peer_missing().end(); ++p) if (p->second.is_missing(soid, need) && p->second.get_items().at(soid).have == alternate_need) { @@ -12651,7 +12598,7 @@ bool PrimaryLogPG::primary_error( ++i) { if (*i == get_primary()) continue; pg_shard_t peer = *i; - if (!peer_missing[peer].is_missing(soid, v)) { + if (!recovery_state.get_peer_missing(peer).is_missing(soid, v)) { missing_loc.add_location(soid, peer); dout(10) << info.pgid << " unexpectedly missing " << soid << " v" << v << ", there should be a copy on shard " << peer << dendl; @@ -12766,8 +12713,8 @@ uint64_t PrimaryLogPG::recover_replicas(uint64_t max, ThreadPool::TPHandle &hand if (p == get_primary()) { continue; } - auto pm = peer_missing.find(p); - ceph_assert(pm != peer_missing.end()); + auto pm = recovery_state.get_peer_missing().find(p); + ceph_assert(pm != recovery_state.get_peer_missing().end()); auto nm = pm->second.num_missing(); if (nm != 0) { if (is_async_recovery_target(p)) { @@ -12791,8 +12738,8 @@ uint64_t PrimaryLogPG::recover_replicas(uint64_t max, ThreadPool::TPHandle &hand for (auto &replica: replicas_by_num_missing) { pg_shard_t &peer = replica.second; ceph_assert(peer != get_primary()); - map::const_iterator pm = peer_missing.find(peer); - ceph_assert(pm != peer_missing.end()); + auto pm = recovery_state.get_peer_missing().find(peer); + ceph_assert(pm != recovery_state.get_peer_missing().end()); map::const_iterator pi = peer_info.find(peer); ceph_assert(pi != peer_info.end()); size_t m_sz = pm->second.num_missing(); @@ -13287,9 +13234,7 @@ int PrimaryLogPG::prep_backfill_object_push( backfills_in_flight.insert(oid); for (unsigned int i = 0 ; i < peers.size(); ++i) { - map::iterator bpm = peer_missing.find(peers[i]); - ceph_assert(bpm != peer_missing.end()); - bpm->second.add(oid, eversion_t(), eversion_t(), false); + recovery_state.force_object_missing(peers[i], oid, eversion_t()); } ceph_assert(!recovering.count(oid)); diff --git a/src/osd/PrimaryLogPG.h b/src/osd/PrimaryLogPG.h index 96fa050e525..7953989902e 100644 --- a/src/osd/PrimaryLogPG.h +++ b/src/osd/PrimaryLogPG.h @@ -282,10 +282,14 @@ public: pg_shard_t peer, const hobject_t &oid, const ObjectRecoveryInfo &recovery_info - ) override; + ) override { + recovery_state.on_peer_recover(peer, oid, recovery_info.version); + } void begin_peer_recover( pg_shard_t peer, - const hobject_t oid) override; + const hobject_t oid) override { + recovery_state.begin_peer_recover(peer, oid); + } void on_global_recover( const hobject_t &oid, const object_stat_sum_t &stat_diff, @@ -351,11 +355,11 @@ public: return missing_loc.get_missing_locs(); } const map &get_shard_missing() const override { - return peer_missing; + return recovery_state.get_peer_missing(); } using PGBackend::Listener::get_shard_missing; const map &get_shard_info() const override { - return peer_info; + return recovery_state.get_peer_info(); } using PGBackend::Listener::get_shard_info; const pg_missing_tracker_t &get_local_missing() const override { -- 2.47.3