From 4e14a380becd61d823a1c86e2dbb41dc8e3a5834 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 7 Jun 2018 08:33:46 -0500 Subject: [PATCH] osd/PG: normalize query processing in Stray and ReplicaActive A stray PG may end up in ReplicaActive if it is participating in backfill. However, whether it is or isn't, we should treat queries the same. Otherwise we end up with weird behaviors like: - osd's stray pg moves to ReplicaActive (gets info+log from primary) - osd goes down and back up - primary restarts peering, request FULLLOG to find missing objects - osd ignores FULLLOG because it is ReplicaActive and not Stray Fixes: http://tracker.ceph.com/issues/24373 Reported-by: Kouya Shimura Signed-off-by: Sage Weil --- src/osd/PG.cc | 44 ++++++++++++++++++++++++-------------------- src/osd/PG.h | 2 +- 2 files changed, 25 insertions(+), 21 deletions(-) diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 3df79d54698..1b013813c17 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -5717,6 +5717,26 @@ void PG::fulfill_log( osd->send_message_osd_cluster(mlog, con.get()); } +void PG::fulfill_query(const MQuery& query, RecoveryCtx *rctx) +{ + if (query.query.type == pg_query_t::INFO) { + pair notify_info; + update_history(query.query.history); + fulfill_info(query.from, query.query, notify_info); + rctx->send_notify( + notify_info.first, + pg_notify_t( + notify_info.first.shard, pg_whoami.shard, + query.query_epoch, + get_osdmap()->get_epoch(), + notify_info.second), + past_intervals); + } else { + update_history(query.query.history); + fulfill_log(query.from, query.query, query.query_epoch); + } +} + void PG::check_full_transition(OSDMapRef lastmap, OSDMapRef osdmap) { bool changed = false; @@ -8334,13 +8354,11 @@ boost::statechart::result PG::RecoveryState::ReplicaActive::react(const ActMap&) return discard_event(); } -boost::statechart::result PG::RecoveryState::ReplicaActive::react(const MQuery& query) +boost::statechart::result PG::RecoveryState::ReplicaActive::react( + const MQuery& query) { PG *pg = context< RecoveryMachine >().pg; - if (query.query.type == pg_query_t::MISSING) { - pg->update_history(query.query.history); - pg->fulfill_log(query.from, query.query, query.query_epoch); - } // else: from prior to activation, safe to ignore + pg->fulfill_query(query, context().get_recovery_ctx()); return discard_event(); } @@ -8434,21 +8452,7 @@ boost::statechart::result PG::RecoveryState::Stray::react(const MInfoRec& infoev boost::statechart::result PG::RecoveryState::Stray::react(const MQuery& query) { PG *pg = context< RecoveryMachine >().pg; - if (query.query.type == pg_query_t::INFO) { - pair notify_info; - pg->update_history(query.query.history); - pg->fulfill_info(query.from, query.query, notify_info); - context< RecoveryMachine >().send_notify( - notify_info.first, - pg_notify_t( - notify_info.first.shard, pg->pg_whoami.shard, - query.query_epoch, - pg->get_osdmap()->get_epoch(), - notify_info.second), - pg->past_intervals); - } else { - pg->fulfill_log(query.from, query.query, query.query_epoch); - } + pg->fulfill_query(query, context().get_recovery_ctx()); return discard_event(); } diff --git a/src/osd/PG.h b/src/osd/PG.h index c9656412b38..31cda1b7128 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -2941,7 +2941,7 @@ protected: void fulfill_info(pg_shard_t from, const pg_query_t &query, pair ¬ify_info); void fulfill_log(pg_shard_t from, const pg_query_t &query, epoch_t query_epoch); - + void fulfill_query(const MQuery& q, RecoveryCtx *rctx); void check_full_transition(OSDMapRef lastmap, OSDMapRef osdmap); bool should_restart_peering( -- 2.47.3