crimson/osd: discard op if pg changes or sender is down

author Kefu Chai <kchai@redhat.com>

Sat, 5 Sep 2020 12:00:42 +0000 (20:00 +0800)

committer Kefu Chai <kchai@redhat.com>

Sat, 5 Sep 2020 12:37:08 +0000 (20:37 +0800)
author Kefu Chai <kchai@redhat.com>
Sat, 5 Sep 2020 12:00:42 +0000 (20:00 +0800)
committer Kefu Chai <kchai@redhat.com>
Sat, 5 Sep 2020 12:37:08 +0000 (20:37 +0800)
diff --git a/src/crimson/osd/pg.cc b/src/crimson/osd/pg.cc

index c4b3f5df18af7bf3144f8d70d10c82d4cda09162..0c5c580e8f420c8456abaf03010cfaaf19e997d3 100644 (file)
--- a/src/crimson/osd/pg.cc
+++ b/src/crimson/osd/pg.cc
@@ -918,7 +918,41 @@ seastar::future<> PG::handle_rep_op(Ref<MOSDRepOp> req)
  void PG::handle_rep_op_reply(crimson::net::Connection* conn,
                              const MOSDRepOpReply& m)
  {
-  backend->got_rep_op_reply(m);
+  if (is_valid_rep_op_reply(m)) {
+    backend->got_rep_op_reply(m);
+  }
+}
+
+bool PG::is_valid_rep_op_reply(const MOSDRepOpReply& reply) const
+{
+  // if a repop is replied after a replica goes down in a new osdmap, and
+  // before the pg advances to this new osdmap, the repop replies before this
+  // repop can be discarded by that replica OSD, because the primary resets the
+  // connection to it when handling the new osdmap marking it down, and also
+  // resets the messenger sesssion when the replica reconnects. to avoid the
+  // out-of-order replies, the messages from that replica should be discarded.
+  const auto osdmap = peering_state.get_osdmap();
+  const int from_osd = reply.get_source().num();
+  if (osdmap->is_down(from_osd)) {
+    return false;
+  }
+  // Mostly, this overlaps with the old_peering_msg
+  // condition.  An important exception is pushes
+  // sent by replicas not in the acting set, since
+  // if such a replica goes down it does not cause
+  // a new interval.
+  if (osdmap->get_down_at(from_osd) >= reply.map_epoch) {
+    return false;
+  }
+  // same pg?
+  //  if pg changes *at all*, we reset and repeer!
+  if (epoch_t lpr = peering_state.get_last_peering_reset();
+      lpr > reply.map_epoch) {
+    logger().debug("{}: pg changed {} after {}, dropping",
+                   __func__, get_info().history, reply.map_epoch);
+    return false;
+  }
+  return true;
  }
  
  seastar::future<> PG::stop()
diff --git a/src/crimson/osd/pg.h b/src/crimson/osd/pg.h

index 1f8e0a0bf614c16b60d58af6948dffadbb5a5294..cc34fb7dfad00a347a09202484682054b36b0c75 100644 (file)
--- a/src/crimson/osd/pg.h
+++ b/src/crimson/osd/pg.h
@@ -665,6 +665,7 @@ private:
      return seastar::make_ready_future<bool>(true);
    }
  
+  bool is_valid_rep_op_reply(const MOSDRepOpReply& reply) const;
    bool is_missing_object(const hobject_t& soid) const {
      return peering_state.get_pg_log().get_missing().get_items().count(soid);
    }
author	Kefu Chai <kchai@redhat.com>
	Sat, 5 Sep 2020 12:00:42 +0000 (20:00 +0800)
committer	Kefu Chai <kchai@redhat.com>
	Sat, 5 Sep 2020 12:37:08 +0000 (20:37 +0800)
src/crimson/osd/pg.cc		patch \| blob \| history
src/crimson/osd/pg.h		patch \| blob \| history