]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/PG: normalize query processing in Stray and ReplicaActive 22545/head
authorSage Weil <sage@redhat.com>
Thu, 7 Jun 2018 13:33:46 +0000 (08:33 -0500)
committerNathan Cutler <ncutler@suse.com>
Wed, 13 Jun 2018 10:15:51 +0000 (12:15 +0200)
A stray PG may end up in ReplicaActive if it is participating in backfill.
However, whether it is or isn't, we should treat queries the same.
Otherwise we end up with weird behaviors like:

 - osd's stray pg moves to ReplicaActive (gets info+log from primary)
 - osd goes down and back up
 - primary restarts peering, request FULLLOG to find missing objects
 - osd ignores FULLLOG because it is ReplicaActive and not Stray

Fixes: http://tracker.ceph.com/issues/24373
Reported-by: Kouya Shimura <kouya@jp.fujitsu.com>
Signed-off-by: Sage Weil <sage@redhat.com>
(cherry picked from commit 4e14a380becd61d823a1c86e2dbb41dc8e3a5834)

src/osd/PG.cc
src/osd/PG.h

index 80ae339a089ec205571a679716944094f7139de9..f8fdc05e483ef7c81760a60252416c79c0eb8982 100644 (file)
@@ -5710,6 +5710,26 @@ void PG::fulfill_log(
   osd->send_message_osd_cluster(mlog, con.get());
 }
 
+void PG::fulfill_query(const MQuery& query, RecoveryCtx *rctx)
+{
+  if (query.query.type == pg_query_t::INFO) {
+    pair<pg_shard_t, pg_info_t> notify_info;
+    update_history(query.query.history);
+    fulfill_info(query.from, query.query, notify_info);
+    rctx->send_notify(
+      notify_info.first,
+      pg_notify_t(
+       notify_info.first.shard, pg_whoami.shard,
+       query.query_epoch,
+       get_osdmap()->get_epoch(),
+       notify_info.second),
+      past_intervals);
+  } else {
+    update_history(query.query.history);
+    fulfill_log(query.from, query.query, query.query_epoch);
+  }
+}
+
 void PG::check_full_transition(OSDMapRef lastmap, OSDMapRef osdmap)
 {
   bool changed = false;
@@ -8316,13 +8336,11 @@ boost::statechart::result PG::RecoveryState::ReplicaActive::react(const ActMap&)
   return discard_event();
 }
 
-boost::statechart::result PG::RecoveryState::ReplicaActive::react(const MQuery& query)
+boost::statechart::result PG::RecoveryState::ReplicaActive::react(
+  const MQuery& query)
 {
   PG *pg = context< RecoveryMachine >().pg;
-  if (query.query.type == pg_query_t::MISSING) {
-    pg->update_history(query.query.history);
-    pg->fulfill_log(query.from, query.query, query.query_epoch);
-  } // else: from prior to activation, safe to ignore
+  pg->fulfill_query(query, context<RecoveryMachine>().get_recovery_ctx());
   return discard_event();
 }
 
@@ -8416,21 +8434,7 @@ boost::statechart::result PG::RecoveryState::Stray::react(const MInfoRec& infoev
 boost::statechart::result PG::RecoveryState::Stray::react(const MQuery& query)
 {
   PG *pg = context< RecoveryMachine >().pg;
-  if (query.query.type == pg_query_t::INFO) {
-    pair<pg_shard_t, pg_info_t> notify_info;
-    pg->update_history(query.query.history);
-    pg->fulfill_info(query.from, query.query, notify_info);
-    context< RecoveryMachine >().send_notify(
-      notify_info.first,
-      pg_notify_t(
-       notify_info.first.shard, pg->pg_whoami.shard,
-       query.query_epoch,
-       pg->get_osdmap()->get_epoch(),
-       notify_info.second),
-      pg->past_intervals);
-  } else {
-    pg->fulfill_log(query.from, query.query, query.query_epoch);
-  }
+  pg->fulfill_query(query, context<RecoveryMachine>().get_recovery_ctx());
   return discard_event();
 }
 
index fab472b030c6aaf55663da3fe9f51a23da26d4a5..f4eb14a3eb1c09c27a61c6d924fe130085031158 100644 (file)
@@ -2939,7 +2939,7 @@ protected:
   void fulfill_info(pg_shard_t from, const pg_query_t &query,
                    pair<pg_shard_t, pg_info_t> &notify_info);
   void fulfill_log(pg_shard_t from, const pg_query_t &query, epoch_t query_epoch);
-
+  void fulfill_query(const MQuery& q, RecoveryCtx *rctx);
   void check_full_transition(OSDMapRef lastmap, OSDMapRef osdmap);
 
   bool should_restart_peering(