]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/osd: send "mark me down" messages to monitor when shutting down
authorXuehan Xu <xxhdx1985126@163.com>
Mon, 1 Jun 2020 10:50:00 +0000 (18:50 +0800)
committerXuehan Xu <xxhdx1985126@163.com>
Thu, 18 Jun 2020 01:50:50 +0000 (09:50 +0800)
Signed-off-by: Xuehan Xu <xxhdx1985126@163.com>
src/crimson/osd/osd.cc
src/crimson/osd/osd.h
src/crimson/osd/state.h

index 5a082d6c756e675851d8bdca9ed057aafcfab4c2..0bf20f0f2b3a36f5a737ef2b12b674e1427c64bf 100644 (file)
@@ -10,6 +10,7 @@
 #include <boost/smart_ptr/make_local_shared.hpp>
 #include <fmt/format.h>
 #include <fmt/ostream.h>
+#include <seastar/core/timer.hh>
 
 #include "common/pick_address.h"
 #include "include/util.h"
@@ -19,6 +20,7 @@
 #include "messages/MOSDBeacon.h"
 #include "messages/MOSDBoot.h"
 #include "messages/MOSDMap.h"
+#include "messages/MOSDMarkMeDown.h"
 #include "messages/MOSDOp.h"
 #include "messages/MOSDPGLog.h"
 #include "messages/MOSDPGPull.h"
@@ -457,42 +459,44 @@ seastar::future<> OSD::stop()
 {
   logger().info("stop");
   // see also OSD::shutdown()
-  state.set_stopping();
-
-  if (!public_msgr->dispatcher_chain_empty()) {
-    public_msgr->remove_dispatcher(*this);
-    public_msgr->remove_dispatcher(*mgrc);
-    public_msgr->remove_dispatcher(*monc);
-  }
-  if (!cluster_msgr->dispatcher_chain_empty()) {
-    cluster_msgr->remove_dispatcher(*this);
-    cluster_msgr->remove_dispatcher(*mgrc);
-    cluster_msgr->remove_dispatcher(*monc);
-  }
-  auto gate_close_fut = gate.close();
-  return asok->stop().then([this] {
-    return heartbeat->stop();
-  }).then([this] {
-    return store->umount();
-  }).then([this] {
-    return store->stop();
-  }).then([this] {
-    return seastar::parallel_for_each(pg_map.get_pgs(),
-      [](auto& p) {
-      return p.second->stop();
+  return prepare_to_stop().then([this] {
+    state.set_stopping();
+    logger().debug("prepared to stop");
+    if (!public_msgr->dispatcher_chain_empty()) {
+      public_msgr->remove_dispatcher(*this);
+      public_msgr->remove_dispatcher(*mgrc);
+      public_msgr->remove_dispatcher(*monc);
+    }
+    if (!cluster_msgr->dispatcher_chain_empty()) {
+      cluster_msgr->remove_dispatcher(*this);
+      cluster_msgr->remove_dispatcher(*mgrc);
+      cluster_msgr->remove_dispatcher(*monc);
+    }
+    auto gate_close_fut = gate.close();
+    return asok->stop().then([this] {
+      return heartbeat->stop();
+    }).then([this] {
+      return store->umount();
+    }).then([this] {
+      return store->stop();
+    }).then([this] {
+      return seastar::parallel_for_each(pg_map.get_pgs(),
+       [](auto& p) {
+       return p.second->stop();
+      });
+    }).then([this] {
+      return monc->stop();
+    }).then([this] {
+      return mgrc->stop();
+    }).then([fut=std::move(gate_close_fut)]() mutable {
+      return std::move(fut);
+    }).then([this] {
+      return when_all_succeed(
+         public_msgr->shutdown(),
+         cluster_msgr->shutdown());
+    }).handle_exception([](auto ep) {
+      logger().error("error while stopping osd: {}", ep);
     });
-  }).then([this] {
-    return monc->stop();
-  }).then([this] {
-    return mgrc->stop();
-  }).then([fut=std::move(gate_close_fut)]() mutable {
-    return std::move(fut);
-  }).then([this] {
-    return when_all_succeed(
-       public_msgr->shutdown(),
-       cluster_msgr->shutdown());
-  }).handle_exception([](auto ep) {
-    logger().error("error while stopping osd: {}", ep);
   });
 }
 
@@ -624,6 +628,8 @@ seastar::future<> OSD::ms_dispatch(crimson::net::Connection* conn, MessageRef m)
       return seastar::now();
     case MSG_COMMAND:
       return handle_command(conn, boost::static_pointer_cast<MCommand>(m));
+    case MSG_OSD_MARK_ME_DOWN:
+      return handle_mark_me_down(conn, boost::static_pointer_cast<MOSDMarkMeDown>(m));
     case MSG_OSD_PG_PULL:
       [[fallthrough]];
     case MSG_OSD_PG_PUSH:
@@ -970,6 +976,11 @@ seastar::future<> OSD::committed_osd_maps(version_t first,
         heartbeat_timer.arm_periodic(
           std::chrono::seconds(TICK_INTERVAL));
       }
+    } else if (!osdmap->is_up(whoami)) {
+      if (state.is_prestop()) {
+       got_stop_ack();
+       return seastar::now();
+      }
     }
     check_osdmap_features();
     // yay!
@@ -1036,6 +1047,15 @@ seastar::future<> OSD::handle_rep_op_reply(crimson::net::Connection* conn,
   return seastar::now();
 }
 
+seastar::future<> OSD::handle_mark_me_down(crimson::net::Connection* conn,
+                                          Ref<MOSDMarkMeDown> m)
+{
+  if (state.is_prestop()) {
+    got_stop_ack();
+  }
+  return seastar::now();
+}
+
 seastar::future<> OSD::handle_recovery_subreq(crimson::net::Connection* conn,
                                   Ref<MOSDFastDispatchOp> m)
 {
@@ -1180,4 +1200,29 @@ blocking_future<Ref<PG>> OSD::wait_for_pg(
   return pg_map.get_pg(pgid).first;
 }
 
+seastar::future<> OSD::prepare_to_stop()
+{
+  if (osdmap && osdmap->is_up(whoami)) {
+    state.set_prestop();
+    return monc->send_message(
+         make_message<MOSDMarkMeDown>(
+           monc->get_fsid(),
+           whoami,
+           osdmap->get_addrs(whoami),
+           osdmap->get_epoch(),
+           true)).then([this] {
+      const auto timeout =
+       std::chrono::duration_cast<std::chrono::milliseconds>(
+         std::chrono::duration<double>(
+           local_conf().get_val<double>("osd_mon_shutdown_timeout")));
+      return seastar::with_timeout(
+      seastar::timer<>::clock::now() + timeout,
+      stop_acked.get_future());
+    }).handle_exception_type([this](seastar::timed_out_error&) {
+      return seastar::now();
+    });
+  }
+  return seastar::now();
+}
+
 }
index 05d5ad9adfc79c0c984fbbb060924051ab92e021..cb6c0db3b0423dc683a2b1b4b718d9366365afa5 100644 (file)
@@ -181,7 +181,8 @@ private:
                                      Ref<MOSDPeeringOp> m);
   seastar::future<> handle_recovery_subreq(crimson::net::Connection* conn,
                                           Ref<MOSDFastDispatchOp> m);
-
+  seastar::future<> handle_mark_me_down(crimson::net::Connection* conn,
+                                       Ref<MOSDMarkMeDown> m);
 
   seastar::future<> committed_osd_maps(version_t first,
                                        version_t last,
@@ -205,6 +206,12 @@ public:
 private:
   PGMap pg_map;
   crimson::common::Gated gate;
+
+  seastar::promise<> stop_acked;
+  void got_stop_ack() {
+    stop_acked.set_value();
+  }
+  seastar::future<> prepare_to_stop();
 public:
   blocking_future<Ref<PG>> get_or_create_pg(
     spg_t pgid,
index 4c445348d301216962dcd5350a96868a040dd181..ba48cd36f12c86f1d7b038db5f94f20eb8b87f87 100644 (file)
@@ -15,6 +15,7 @@ class OSDState {
     PREBOOT,
     BOOTING,
     ACTIVE,
+    PRESTOP,
     STOPPING,
     WAITING_FOR_HEALTHY,
   };
@@ -34,6 +35,9 @@ public:
   bool is_active() const {
     return state == State::ACTIVE;
   }
+  bool is_prestop() const {
+    return state == State::PRESTOP;
+  }
   bool is_stopping() const {
     return state == State::STOPPING;
   }
@@ -49,6 +53,9 @@ public:
   void set_active() {
     state = State::ACTIVE;
   }
+  void set_prestop() {
+    state = State::PRESTOP;
+  }
   void set_stopping() {
     state = State::STOPPING;
   }
@@ -58,6 +65,7 @@ public:
     case State::PREBOOT: return "preboot";
     case State::BOOTING: return "booting";
     case State::ACTIVE: return "active";
+    case State::PRESTOP: return "prestop";
     case State::STOPPING: return "stopping";
     case State::WAITING_FOR_HEALTHY: return "waiting_for_healthy";
     default: return "???";