]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
crimson/osd: add minimal PG recovery FSM
authorKefu Chai <kchai@redhat.com>
Mon, 25 Feb 2019 16:26:36 +0000 (00:26 +0800)
committerKefu Chai <kchai@redhat.com>
Fri, 22 Mar 2019 05:24:15 +0000 (13:24 +0800)
after this change, PG is able to go clean as primary

Signed-off-by: Kefu Chai <kchai@redhat.com>
src/crimson/osd/CMakeLists.txt
src/crimson/osd/pg.cc
src/crimson/osd/pg.h
src/crimson/osd/recovery_events.h [new file with mode: 0644]
src/crimson/osd/recovery_machine.cc [new file with mode: 0644]
src/crimson/osd/recovery_machine.h [new file with mode: 0644]
src/crimson/osd/recovery_state.cc [new file with mode: 0644]
src/crimson/osd/recovery_state.h [new file with mode: 0644]
src/crimson/osd/recovery_states.cc [new file with mode: 0644]
src/crimson/osd/recovery_states.h [new file with mode: 0644]

index ac821dea2b483c79f5fb45d8d97d185e3889c17d..9045b61112db6802dc3c8a45defcddea5b584bdc 100644 (file)
@@ -5,6 +5,9 @@ add_executable(crimson-osd
   osd.cc
   osd_meta.cc
   pg.cc
-  pg_meta.cc)
+  pg_meta.cc
+  recovery_machine.cc
+  recovery_state.cc
+  recovery_states.cc)
 target_link_libraries(crimson-osd
   crimson-common crimson-os crimson fmt::fmt)
index 98b87e61c4fd6911ef5929382d467342b5e52f0d..684120af57902d678af792f7ec49265d8a39261c 100644 (file)
 
 #include "messages/MOSDPGInfo.h"
 #include "messages/MOSDPGLog.h"
+#include "messages/MOSDPGNotify.h"
+#include "messages/MOSDPGQuery.h"
+
 #include "osd/OSDMap.h"
 
+#include "crimson/net/Connection.h"
+#include "crimson/net/Messenger.h"
 #include "crimson/os/cyan_store.h"
 #include "crimson/osd/pg_meta.h"
 
+#include "recovery_events.h"
+#include "recovery_state.h"
+
 namespace {
   seastar::logger& logger() {
     return ceph::get_logger(ceph_subsys_osd);
   }
+  template<typename Message, typename... Args>
+  Ref<Message> make_message(Args&&... args)
+  {
+    return {new Message{std::forward<Args>(args)...}, false};
+  }
 }
 
+using recovery::AdvMap;
+using recovery::ActMap;
+using recovery::Initialize;
+
 PG::PG(spg_t pgid,
        pg_shard_t pg_shard,
        pg_pool_t&& pool,
@@ -32,6 +49,7 @@ PG::PG(spg_t pgid,
   : pgid{pgid},
     whoami{pg_shard},
     pool{std::move(pool)},
+    recovery_state{*this},
     info{pgid},
     osdmap{osdmap},
     msgr{msgr}
@@ -61,6 +79,9 @@ seastar::future<> PG::read_state(ceph::os::CyanStore* store)
       info.stats.acting = acting;
       info.stats.acting_primary = primary.osd;
       info.stats.mapping_epoch = info.history.same_interval_since;
+      recovery_state.handle_event(Initialize{});
+      // note: we don't activate here because we know the OSD will advance maps
+      // during boot.
       return seastar::now();
     });
 }
@@ -770,19 +791,60 @@ void PG::maybe_mark_clean()
 
 seastar::future<> PG::do_peering_event(std::unique_ptr<PGPeeringEvent> evt)
 {
-  // todo
-  return seastar::now();
+  return dispatch_context(recovery_state.handle_event(evt->get_event()));
+}
+
+seastar::future<> PG::dispatch_context(recovery::Context&& ctx)
+{
+  return seastar::do_with(recovery::Context{ctx}, [this](auto& todo) {
+    return seastar::when_all_succeed(
+      seastar::parallel_for_each(std::move(todo.notifies),
+        [this](auto& osd_notifies) {
+          auto& [peer, notifies] = osd_notifies;
+          auto m = make_message<MOSDPGNotify>(get_osdmap_epoch(),
+                                              std::move(notifies));
+          return send_to_osd(peer, m, get_osdmap_epoch());
+        }),
+      seastar::parallel_for_each(std::move(todo.queries),
+        [this](auto& osd_queries) {
+          auto& [peer, queries] = osd_queries;
+          auto m = make_message<MOSDPGQuery>(get_osdmap_epoch(),
+                                             std::move(queries));
+          return send_to_osd(peer, m, get_osdmap_epoch());
+        }),
+      seastar::parallel_for_each(std::move(todo.infos),
+        [this](auto& osd_infos) {
+          auto& [peer, infos] = osd_infos;
+          auto m = make_message<MOSDPGInfo>(get_osdmap_epoch(),
+                                            std::move(infos));
+          return send_to_osd(peer, m, get_osdmap_epoch());
+        })
+    );
+  });
 }
 
 seastar::future<> PG::handle_advance_map(cached_map_t next_map)
 {
-  // todo
+  auto last_map = std::move(osdmap);
+  osdmap = std::move(next_map);
+  vector<int> new_up, new_acting;
+  int up_primary, acting_primary;
+  osdmap->pg_to_up_acting_osds(pgid.pgid,
+                               &new_up,
+                               &up_primary,
+                               &new_acting,
+                               &acting_primary);
+  logger().info("handle_advance_map {}/{} -- {}/{}",
+                new_up, new_acting, up_primary, acting_primary);
+  recovery_state.handle_event(AdvMap{osdmap, last_map,
+                                     std::move(new_up), up_primary,
+                                     std::move(new_acting), acting_primary});
   return seastar::now();
 }
 
 seastar::future<> PG::handle_activate_map()
 {
-  // todo
+  recovery_state.handle_event(ActMap{});
   return seastar::now();
 }
 
@@ -804,6 +866,46 @@ std::ostream& operator<<(std::ostream& os, const PG& pg)
   return os;
 }
 
+void PG::reply_pg_query(const MQuery& query, recovery::Context* ctx)
+{
+  switch (query.query.type) {
+  case pg_query_t::INFO:
+    return reply_pg_query_for_info(query, ctx);
+  case pg_query_t::LOG:
+    return reply_pg_query_for_log(query, false);
+  case pg_query_t::FULLLOG:
+    return reply_pg_query_for_log(query, true);
+  }
+}
+
+void PG::reply_pg_query_for_info(const MQuery& query, recovery::Context* ctx)
+{
+  recovery::Context::notify_t notify{pg_notify_t{query.from.shard,
+                                                 whoami.shard,
+                                                 query.query_epoch,
+                                                 get_osdmap_epoch(),
+                                                 info},
+                                     past_intervals};
+  ctx->notifies[query.from.osd].push_back(std::move(notify));
+}
+
+void PG::reply_pg_query_for_log(const MQuery& query, bool full)
+{
+  auto m = make_message<MOSDPGLog>(query.from.shard,
+                                   whoami.shard,
+                                   get_osdmap_epoch(),
+                                   info,
+                                   query.query_epoch);
+  // todo:
+  // m->missing = pg_log.get_missing();
+  if (full) {
+    // m->log = pg_log.get_log();
+  } else {
+    // maybe partial
+  }
+  send_to_osd(query.from.osd, m, get_osdmap_epoch());
+}
+
 seastar::future<> PG::send_to_osd(int peer, Ref<Message> m, epoch_t from_epoch)
 {
   if (osdmap->is_down(peer) || osdmap->get_info(peer).up_from > from_epoch) {
index 5ab4807abeb270b91f074ed090bcda4cd555e1f1..f2e8deb8222e8ec21e2022de4e33cee26860c6a2 100644 (file)
@@ -9,10 +9,15 @@
 #include <seastar/core/future.hh>
 
 #include "osd/osd_types.h"
+#include "recovery_state.h"
 
 template<typename T> using Ref = boost::intrusive_ptr<T>;
 class OSDMap;
+class MQuery;
 class PGPeeringEvent;
+namespace recovery {
+  class Context;
+}
 
 namespace ceph::net {
   class Messenger;
@@ -99,15 +104,20 @@ public:
   void maybe_mark_clean();
 
   seastar::future<> do_peering_event(std::unique_ptr<PGPeeringEvent> evt);
+  seastar::future<> dispatch_context(recovery::Context&& ctx);
   seastar::future<> handle_advance_map(cached_map_t next_map);
   seastar::future<> handle_activate_map();
   seastar::future<> share_pg_info();
+  void reply_pg_query(const MQuery& query, recovery::Context* ctx);
 
   void print(ostream& os) const;
 
 private:
   seastar::future<> activate_peer(pg_shard_t peer);
+  void reply_pg_query_for_info(const MQuery& query, recovery::Context* ctx);
+  void reply_pg_query_for_log(const MQuery& query, bool full);
   seastar::future<> send_to_osd(int peer, Ref<Message> m, epoch_t from_epoch);
+
   void update_primary_state(const std::vector<int>& new_up,
                            int new_up_primary,
                            const std::vector<int>& new_acting,
@@ -119,6 +129,7 @@ private:
 
   epoch_t last_peering_reset = 0;
   epoch_t need_up_thru = 0;
+  recovery::State recovery_state;
 
   bool should_notify_primary = false;
 
diff --git a/src/crimson/osd/recovery_events.h b/src/crimson/osd/recovery_events.h
new file mode 100644 (file)
index 0000000..9653ad9
--- /dev/null
@@ -0,0 +1,61 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <vector>
+
+#include <boost/smart_ptr/local_shared_ptr.hpp>
+#include <boost/statechart/custom_reaction.hpp>
+#include <boost/statechart/event.hpp>
+#include <boost/statechart/simple_state.hpp>
+#include <boost/statechart/state.hpp>
+#include <boost/statechart/state_machine.hpp>
+#include <boost/statechart/transition.hpp>
+#include <boost/statechart/event_base.hpp>
+
+#include "osd/PGPeeringEvent.h"
+
+namespace recovery {
+
+struct AdvMap : boost::statechart::event< AdvMap > {
+  // TODO: use foreign_ptr<> once the osdmap cache needs to be shared across
+  //       cores
+  using OSDMapRef = boost::local_shared_ptr<OSDMap>;
+  OSDMapRef osdmap;
+  OSDMapRef last_map;
+  std::vector<int> new_up, new_acting;
+  int up_primary, acting_primary;
+  AdvMap(OSDMapRef osdmap, OSDMapRef last_map,
+        const std::vector<int>& new_up, int up_primary,
+        const std::vector<int>& new_acting, int acting_primary):
+    osdmap(osdmap),
+    last_map(last_map),
+    new_up(new_up),
+    new_acting(new_acting),
+    up_primary(up_primary),
+    acting_primary(acting_primary) {}
+  void print(std::ostream *out) const {
+    *out << "AdvMap";
+  }
+};
+
+struct ActMap : boost::statechart::event< ActMap > {
+  ActMap() : boost::statechart::event< ActMap >() {}
+  void print(std::ostream *out) const {
+    *out << "ActMap";
+  }
+};
+
+struct Activate : boost::statechart::event< Activate > {
+  epoch_t activation_epoch;
+  explicit Activate(epoch_t q) : boost::statechart::event< Activate >(),
+                                activation_epoch(q) {}
+  void print(std::ostream *out) const {
+    *out << "Activate from " << activation_epoch;
+  }
+};
+
+struct Initialize : boost::statechart::event<Initialize> {};
+
+}
diff --git a/src/crimson/osd/recovery_machine.cc b/src/crimson/osd/recovery_machine.cc
new file mode 100644 (file)
index 0000000..ab82cd1
--- /dev/null
@@ -0,0 +1,26 @@
+#include "recovery_machine.h"
+#include "recovery_state.h"
+#include "pg.h"
+
+namespace recovery
+{
+void Machine::send_notify(pg_shard_t to,
+                          const pg_notify_t& info,
+                          const PastIntervals& pi)
+{
+  state.context.notifies[to.osd].emplace_back(info, pi);
+}
+
+void Machine::send_query(pg_shard_t to,
+                         const pg_query_t& query)
+{
+  spg_t pgid{pg.get_info().pgid.pgid, to.shard};
+  state.context.queries[to.osd].emplace(pgid, query);
+}
+
+recovery::Context* Machine::get_context()
+{
+  return &state.context;
+}
+
+}
diff --git a/src/crimson/osd/recovery_machine.h b/src/crimson/osd/recovery_machine.h
new file mode 100644 (file)
index 0000000..fef37cd
--- /dev/null
@@ -0,0 +1,33 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <boost/statechart/state_machine.hpp>
+
+#include "osd/osd_types.h"
+
+class PG;
+
+namespace recovery {
+
+struct Initial;
+class State;
+class Context;
+
+struct Machine : public boost::statechart::state_machine<Machine,
+                                                        Initial> {
+  Machine(State& state, PG& pg)
+    : state{state},
+      pg{pg}
+  {}
+  void send_notify(pg_shard_t to,
+                  const pg_notify_t& info,
+                  const PastIntervals& pi);
+  void send_query(pg_shard_t to,
+                 const pg_query_t& query);
+  recovery::Context* get_context();
+  State& state;
+  PG& pg;
+};
+}
diff --git a/src/crimson/osd/recovery_state.cc b/src/crimson/osd/recovery_state.cc
new file mode 100644 (file)
index 0000000..b05b431
--- /dev/null
@@ -0,0 +1,20 @@
+#include "recovery_state.h"
+#include "recovery_states.h"
+#include "messages/MOSDPGLog.h" // MLogRec needs this
+
+namespace recovery {
+
+State::State(PG& pg)
+  : machine{*this, pg}
+{
+    machine.initiate();
+}
+
+Context State::handle_event(const boost::statechart::event_base& evt)
+{
+  machine.process_event(evt);
+  Context pending;
+  std::swap(pending, context);
+  return pending;
+}
+}
diff --git a/src/crimson/osd/recovery_state.h b/src/crimson/osd/recovery_state.h
new file mode 100644 (file)
index 0000000..67382db
--- /dev/null
@@ -0,0 +1,43 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <vector>
+#include <map>
+
+#include "recovery_machine.h"
+
+class PG;
+
+namespace recovery {
+
+// RecoveryMachine::handle_event() could send multiple notifications to a
+// certain peer OSD before it reaches the last state. for better performance,
+// we send them in batch. the pending messages are collected in RecoveryCtx
+// before being dispatched upon returning of handle_event().
+struct Context
+{
+  using osd_id_t = int;
+
+  using notify_t = std::pair<pg_notify_t, PastIntervals>;
+  std::map<osd_id_t, std::vector<notify_t>> notifies;
+
+  using queries_t = std::map<spg_t, pg_query_t>;
+  std::map<osd_id_t, queries_t> queries;
+
+  using infos_t = std::vector<pair<pg_notify_t, PastIntervals>>;
+  std::map<osd_id_t, infos_t> infos;
+};
+
+/// Encapsulates PG recovery process,
+class State {
+public:
+  explicit State(PG& pg);
+  Context handle_event(const boost::statechart::event_base& evt);
+private:
+  friend class Machine;
+  Machine machine;
+  Context context;
+};
+}
diff --git a/src/crimson/osd/recovery_states.cc b/src/crimson/osd/recovery_states.cc
new file mode 100644 (file)
index 0000000..0d2bd34
--- /dev/null
@@ -0,0 +1,647 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "recovery_states.h"
+
+#include "crimson/common/log.h"
+#include "messages/MOSDPGLog.h"
+#include "osd/OSDMap.h"
+#include "pg.h"
+
+namespace {
+  seastar::logger& logger() {
+    return ceph::get_logger(ceph_subsys_osd);
+  }
+}
+
+namespace recovery {
+
+/*------Crashed-------*/
+Crashed::Crashed(my_context ctx)
+  : my_base(ctx)
+{
+  ceph_abort_msg("we got a bad state machine event");
+}
+
+/*------Initial-------*/
+
+Initial::Initial(my_context ctx)
+  : my_base(ctx)
+{
+  logger().info("Initial");
+}
+
+boost::statechart::result Initial::react(const MNotifyRec& notify)
+{
+  logger().info("Active <MNotifyRec>");
+  // todo: process info from replica
+  auto& pg = context<Machine>().pg;
+  pg.update_last_peering_reset();
+  return transit<Primary>();
+}
+
+boost::statechart::result Initial::react(const MInfoRec& i)
+{
+  // todo
+  logger().info("<MInfoRec> transitioning from Initial to Stray");
+  post_event(i);
+  return transit<Stray>();
+}
+
+boost::statechart::result Initial::react(const MLogRec& i)
+{
+  // todo
+  logger().info("<MLogRec> transitioning from Initial to Stray");
+  post_event(i);
+  return transit<Stray>();
+}
+
+void Initial::exit()
+{}
+
+/*--------Reset---------*/
+Reset::Reset(my_context ctx)
+  : my_base(ctx)
+{
+  logger().info("Entering Reset");
+  auto& pg = context<Machine>().pg;
+  pg.update_last_peering_reset();
+}
+
+boost::statechart::result Reset::react(const AdvMap& advmap)
+{
+  logger().info("Reset advmap");
+  auto& pg = context<Machine>().pg;
+  if (pg.should_restart_peering(advmap.up_primary,
+                               advmap.acting_primary,
+                               advmap.new_up,
+                               advmap.new_acting,
+                               advmap.last_map,
+                               advmap.osdmap)) {
+    pg.start_peering_interval(advmap.up_primary,
+                             advmap.acting_primary,
+                             advmap.new_up,
+                             advmap.new_acting,
+                             advmap.last_map);
+  }
+  return discard_event();
+}
+
+boost::statechart::result Reset::react(const ActMap&)
+{
+  auto& pg = context<Machine>().pg;
+  if (pg.should_send_notify()) {
+    context<Machine>().send_notify(pg.get_primary(),
+                                  pg.get_notify(pg.get_osdmap_epoch()),
+                                  pg.get_past_intervals());
+  }
+  return transit<Started>();
+}
+
+void Reset::exit()
+{
+  logger().info("Leaving Reset");
+}
+
+/*------Started-------*/
+Started::Started(my_context ctx)
+  : my_base(ctx)
+{
+  logger().info("Entering Started");
+}
+
+boost::statechart::result Started::react(const AdvMap& advmap)
+{
+  auto& pg = context<Machine>().pg;
+  logger().info("Started <AdvMap>");
+  if (pg.should_restart_peering(advmap.up_primary,
+                               advmap.acting_primary,
+                               advmap.new_up,
+                               advmap.new_acting,
+                               advmap.last_map,
+                               advmap.osdmap)) {
+    logger().info("should_restart_peering, transitioning to Reset");
+    post_event(advmap);
+    return transit<Reset>();
+  }
+  return discard_event();
+}
+
+void Started::exit()
+{
+  logger().info("Leaving Started");
+}
+
+/*-------Start---------*/
+Start::Start(my_context ctx)
+  : my_base(ctx)
+{
+  auto& pg = context<Machine>().pg;
+  if (pg.is_primary()) {
+    logger().info("Start transitioning to Primary");
+    post_event(MakePrimary{});
+  } else { // is_stray
+    logger().info("Start transitioning to Stray");
+    post_event(MakeStray{});
+  }
+}
+
+void Start::exit()
+{
+  logger().info("Leaving Start");
+}
+
+/*---------Primary--------*/
+Primary::Primary(my_context ctx)
+  : my_base(ctx)
+{
+  logger().info("Entering Primary");
+}
+
+boost::statechart::result Primary::react(const MNotifyRec& notevt)
+{
+  // todo
+  auto& pg = context<Machine>().pg;
+  pg.proc_replica_info(notevt.from,
+                      notevt.notify.info,
+                      notevt.notify.epoch_sent);
+  logger().info("Primary <MNotifyRec> {}", pg);
+  return discard_event();
+}
+
+boost::statechart::result Primary::react(const ActMap&)
+{
+  // todo
+  auto& pg = context<Machine>().pg;
+  logger().info("Primary <ActMap> {}", pg);
+  return discard_event();
+}
+
+void Primary::exit()
+{
+  auto& pg = context<Machine>().pg;
+  pg.clear_primary_state();
+  pg.clear_state(PG_STATE_CREATING);
+  logger().info("Leaving Primary: {}", pg);
+}
+
+/*---------Peering--------*/
+Peering::Peering(my_context ctx)
+  : my_base(ctx)
+{
+  auto& pg = context<Machine>().pg;
+  pg.set_state(PG_STATE_PEERING);
+  logger().info("Entering Peering");
+}
+
+boost::statechart::result Peering::react(const AdvMap& advmap)
+{
+  logger().info("Peering <AdvMap>");
+  context<Machine>().pg.update_need_up_thru(advmap.osdmap.get());
+  return forward_event();
+}
+
+void Peering::exit()
+{
+  auto& pg = context<Machine>().pg;
+  logger().info("Leaving Peering: {}", pg);
+  pg.clear_state(PG_STATE_PEERING);
+}
+
+/*--------GetInfo---------*/
+GetInfo::GetInfo(my_context ctx)
+  : my_base(ctx)
+{
+  logger().info("Entering GetInfo");
+  context<Machine>().pg.update_need_up_thru();
+  post_event(GotInfo{});
+}
+
+boost::statechart::result GetInfo::react(const MNotifyRec& infoevt)
+{
+  logger().info("GetInfo <MNotifyRec>");
+  // todo: depends on get_infos()
+  post_event(GotInfo());
+  return discard_event();
+}
+
+void GetInfo::exit()
+{
+  logger().info("Leaving GetInfo");
+  // todo
+}
+
+/*------GetLog------------*/
+GetLog::GetLog(my_context ctx)
+  : my_base(ctx)
+{
+  logger().info("Entering GetLog");
+  auto& pg = context<Machine>().pg;
+
+  PG::choose_acting_t adjust_acting;
+  tie(adjust_acting, auth_log_shard) = pg.choose_acting();
+  switch (adjust_acting) {
+  case PG::choose_acting_t::dont_change:
+    break;
+  case PG::choose_acting_t::should_change:
+    // post_event(NeedActingChange());
+    return;
+  case PG::choose_acting_t::pg_incomplete:
+    // post_event(IsIncomplete());
+    return;
+  }
+  // am i the best?
+  if (auth_log_shard == pg.get_whoami()) {
+    post_event(GotLog{});
+    return;
+  } else {
+    // todo: request log from peer
+    return;
+  }
+}
+
+boost::statechart::result GetLog::react(const AdvMap& advmap)
+{
+  // make sure our log source didn't go down.  we need to check
+  // explicitly because it may not be part of the prior set, which
+  // means the Peering state check won't catch it going down.
+  if (!advmap.osdmap->is_up(auth_log_shard.osd)) {
+    logger().info("GetLog: auth_log_shard osd.{} went down",
+                 auth_log_shard.osd);
+    post_event(advmap);
+    return transit<Reset>();
+  }
+
+  // let the Peering state do its checks.
+  return forward_event();
+}
+
+boost::statechart::result GetLog::react(const MLogRec& logevt)
+{
+  assert(!msg);
+  if (logevt.from != auth_log_shard) {
+    logger().info("GetLog: discarding log from non-auth_log_shard osd.{}",
+                 logevt.from);
+    return discard_event();
+  }
+  logger().info("GetLog: received master log from osd.{}",
+               logevt.from);
+  msg = logevt.msg;
+  post_event(GotLog{});
+  return discard_event();
+}
+
+boost::statechart::result GetLog::react(const GotLog&)
+{
+  logger().info("leaving GetLog");
+  return transit<GetMissing>();
+}
+
+void GetLog::exit()
+{}
+
+Recovered::Recovered(my_context ctx)
+  : my_base(ctx)
+{
+  logger().info("Entering Recovered");
+  if (context<Active>().all_replicas_activated) {
+    logger().info("all_replicas_activated");
+    post_event(GoClean{});
+  }
+}
+
+boost::statechart::result Recovered::react(const AllReplicasActivated&)
+{
+  logger().info("Recovered <AllReplicasActivated>");
+  post_event(GoClean{});
+  return forward_event();
+}
+
+void Recovered::exit()
+{
+  logger().info("Leaving Recovered");
+}
+
+Clean::Clean(my_context ctx)
+  : my_base(ctx)
+{
+  logger().info("Entering Clean");
+  auto& pg = context<Machine>().pg;
+  pg.maybe_mark_clean();
+}
+
+void Clean::exit()
+{
+  logger().info("Leaving Clean");
+  auto& pg = context<Machine>().pg;
+  pg.clear_state(PG_STATE_CLEAN);
+}
+
+/*---------Active---------*/
+Active::Active(my_context ctx)
+  : my_base(ctx)
+{
+  logger().info("Entering Activate");
+  auto& pg = context<Machine>().pg;
+  assert(pg.is_primary());
+  pg.activate(pg.get_osdmap_epoch());
+  logger().info("Activate Finished");
+}
+
+boost::statechart::result Active::react(const AdvMap& advmap)
+{
+  auto& pg = context<Machine>().pg;
+  if (pg.should_restart_peering(advmap.up_primary,
+                               advmap.acting_primary,
+                               advmap.new_up,
+                               advmap.new_acting,
+                               advmap.last_map,
+                               advmap.osdmap)) {
+    logger().info("Active advmap interval change, fast return");
+    return forward_event();
+  }
+  logger().info("Active advmap");
+  return forward_event();
+}
+
+boost::statechart::result Active::react(const ActMap&)
+{
+  return forward_event();
+}
+
+boost::statechart::result Active::react(const MNotifyRec& notevt)
+{
+  logger().info("Active <MNotifyRec>");
+  auto& pg = context<Machine>().pg;
+  pg.proc_replica_info(notevt.from,
+                      notevt.notify.info,
+                      notevt.notify.epoch_sent);
+  return discard_event();
+}
+
+boost::statechart::result Active::react(const MInfoRec& infoevt)
+{
+  logger().info("Active <MInfoRec>");
+  auto& pg = context<Machine>().pg;
+  assert(pg.is_primary());
+
+  if (pg.is_last_activated_peer(infoevt.from)) {
+    post_event(AllReplicasActivated{});
+  }
+  return discard_event();
+}
+
+boost::statechart::result Active::react(const MLogRec& logevt)
+{
+  logger().info("Active <MLogRec>");
+  auto& pg = context<Machine>().pg;
+  pg.proc_replica_log(logevt.from,
+                     logevt.msg->info,
+                     logevt.msg->log,
+                     logevt.msg->missing);
+  // todo
+  return discard_event();
+}
+
+boost::statechart::result Active::react(const AllReplicasActivated &evt)
+{
+  logger().info("Active <AllReplicasActivated>");
+  all_replicas_activated = true;
+  auto& pg = context<Machine>().pg;
+  pg.clear_state(PG_STATE_ACTIVATING);
+  pg.clear_state(PG_STATE_CREATING);
+  pg.on_activated();
+  pg.share_pg_info();
+  post_event(AllReplicasRecovered{});
+  return discard_event();
+}
+
+void Active::exit()
+{
+  auto& pg = context<Machine>().pg;
+  pg.clear_state(PG_STATE_ACTIVATING);
+  pg.clear_state(PG_STATE_DEGRADED);
+  pg.clear_state(PG_STATE_UNDERSIZED);
+
+  logger().info("Leaving Active");
+}
+
+/*------Activating--------*/
+Activating::Activating(my_context ctx)
+  : my_base(ctx)
+{
+  logger().info("Entering Activating");
+}
+
+void Activating::exit()
+{
+  logger().info("Leaving Activating");
+}
+
+/*------ReplicaActive-----*/
+ReplicaActive::ReplicaActive(my_context ctx)
+  : my_base(ctx)
+{
+  logger().info("Entering ReplicaActive");
+}
+
+
+boost::statechart::result ReplicaActive::react(const Activate& actevt)
+{
+  auto& pg = context<Machine>().pg;
+  logger().info("In ReplicaActive, about to call activate");
+  pg.activate(actevt.activation_epoch);
+  logger().info("Activate Finished");
+  return discard_event();
+}
+
+boost::statechart::result ReplicaActive::react(const MInfoRec& infoevt)
+{
+  return discard_event();
+}
+
+boost::statechart::result ReplicaActive::react(const MLogRec& logevt)
+{
+  return discard_event();
+}
+
+boost::statechart::result ReplicaActive::react(const ActMap&)
+{
+  auto& pg = context<Machine>().pg;
+  if (pg.should_send_notify()) {
+    context<Machine>().send_notify(pg.get_primary(),
+                                  pg.get_notify(pg.get_osdmap_epoch()),
+                                  pg.get_past_intervals());
+  };
+  return discard_event();
+}
+
+boost::statechart::result ReplicaActive::react(const MQuery& query)
+{
+  auto& pg = context<Machine>().pg;
+  context<Machine>().send_notify(query.from,
+                                pg.get_notify(query.query_epoch),
+                                pg.get_past_intervals());
+  return discard_event();
+}
+
+void ReplicaActive::exit()
+{}
+
+/*---RepNotRecovering----*/
+RepNotRecovering::RepNotRecovering(my_context ctx)
+  : my_base(ctx)
+{}
+
+void RepNotRecovering::exit()
+{}
+
+/*-------Stray---*/
+Stray::Stray(my_context ctx)
+  : my_base(ctx)
+{
+  auto& pg = context<Machine>().pg;
+  assert(!pg.is_primary());
+  logger().info("{}, Entering Stray", pg);
+}
+
+boost::statechart::result Stray::react(const MLogRec& logevt)
+{
+  auto& pg = context<Machine>().pg;
+  logger().info("{} Stray <MLogRec>", pg);
+  MOSDPGLog* msg = logevt.msg.get();
+  logger().info("got info+log from osd.{} {} {}",
+                logevt.from, msg->info, msg->log);
+  if (msg->info.last_backfill == hobject_t()) {
+    // todo: restart backfill
+  } else {
+    // todo: merge log
+  }
+  post_event(Activate{logevt.msg->info.last_epoch_started});
+  return transit<ReplicaActive>();
+}
+
+boost::statechart::result Stray::react(const MInfoRec& infoevt)
+{
+  auto& pg = context<Machine>().pg;
+  logger().info("{} Stray <MInfoRec>", pg);
+  logger().info("got info from osd.{} {}",
+                infoevt.from, infoevt.info);
+  // todo
+  post_event(Activate{infoevt.info.last_epoch_started});
+  return transit<ReplicaActive>();
+}
+
+boost::statechart::result Stray::react(const MQuery& query)
+{
+  auto& pg = context<Machine>().pg;
+  logger().info("{} Stray <MQuery>", pg);
+  pg.reply_pg_query(query, context<Machine>().get_context());
+  return discard_event();
+}
+
+boost::statechart::result Stray::react(const ActMap&)
+{
+  auto& pg = context<Machine>().pg;
+  logger().info("{} Stray <ActMap>", pg);
+  if (pg.should_send_notify()) {
+    context<Machine>().send_notify(pg.get_primary(),
+                                  pg.get_notify(pg.get_osdmap_epoch()),
+                                  pg.get_past_intervals());
+  }
+  return discard_event();
+}
+
+void Stray::exit()
+{
+  logger().info("Leaving Stray");
+}
+
+/*------Down--------*/
+Down::Down(my_context ctx)
+  : my_base(ctx)
+{}
+
+void Down::exit()
+{
+}
+
+boost::statechart::result Down::react(const MNotifyRec& infoevt)
+{
+  // todo
+  return discard_event();
+}
+
+/*------GetMissing--------*/
+GetMissing::GetMissing(my_context ctx)
+  : my_base(ctx)
+{
+  logger().info("Entering GetMissing");
+  auto& pg = context<Machine>().pg;
+  if (pg.get_need_up_thru() != 0) {
+    logger().info("still need up_thru update before going active");
+    post_event(NeedUpThru{});
+  } else {
+    // all good!
+    post_event(Activate{pg.get_osdmap_epoch()});
+  }
+}
+
+boost::statechart::result GetMissing::react(const MLogRec& logevt)
+{
+  logger().info("GetMissing <MLogRec>");
+  auto& pg = context<Machine>().pg;
+  pg.proc_replica_log(logevt.from,
+                     logevt.msg->info,
+                     logevt.msg->log,
+                     logevt.msg->missing);
+  if (pg.get_need_up_thru() != 0) {
+    logger().info(" still need up_thru update before going active");
+    post_event(NeedUpThru{});
+  } else {
+    logger().info("Got last missing, don't need missing posting Activate");
+    post_event(Activate{pg.get_osdmap_epoch()});
+  }
+  return discard_event();
+}
+
+void GetMissing::exit()
+{
+  logger().info("Leaving GetMissing");
+}
+
+/*------WaitUpThru--------*/
+WaitUpThru::WaitUpThru(my_context ctx)
+  : my_base(ctx)
+{
+  logger().info("Entering WaitUpThru");
+}
+
+boost::statechart::result WaitUpThru::react(const ActMap& am)
+{
+  logger().info("WaitUpThru <ActMap>");
+  auto& pg = context<Machine>().pg;
+  if (!pg.get_need_up_thru()) {
+    logger().info("WaitUpThru: no need up thru!");
+    post_event(Activate{pg.get_osdmap_epoch()});
+  }
+  return forward_event();
+}
+
+boost::statechart::result WaitUpThru::react(const MLogRec& logevt)
+{
+  logger().info("WaitUpThru: <MLogRec>");
+  auto& pg = context<Machine>().pg;
+  pg.proc_replica_log(logevt.from,
+                     logevt.msg->info,
+                     logevt.msg->log,
+                     logevt.msg->missing);
+  return discard_event();
+}
+
+void WaitUpThru::exit()
+{
+  logger().info("Leaving WaitUpThru");
+}
+
+}
diff --git a/src/crimson/osd/recovery_states.h b/src/crimson/osd/recovery_states.h
new file mode 100644 (file)
index 0000000..b363f9f
--- /dev/null
@@ -0,0 +1,335 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <boost/statechart/custom_reaction.hpp>
+#include <boost/statechart/event.hpp>
+#include <boost/statechart/event_base.hpp>
+#include <boost/statechart/simple_state.hpp>
+#include <boost/statechart/state.hpp>
+#include <boost/statechart/transition.hpp>
+
+#include "recovery_machine.h"
+#include "recovery_events.h"
+
+// Initial
+// Reset
+// Start
+//   Started
+//     Primary
+//       WaitActingChange
+//       Peering
+//         GetInfo
+//         GetLog
+//         GetMissing
+//         WaitUpThru
+//         Incomplete
+//       Active
+//         Activating
+//         Clean
+//         Recovered
+//         Backfilling
+//         WaitRemoteBackfillReserved
+//         WaitLocalBackfillReserved
+//         NotBackfilling
+//         NotRecovering
+//         Recovering
+//         WaitRemoteRecoveryReserved
+//         WaitLocalRecoveryReserved
+//     ReplicaActive
+//       RepNotRecovering
+//       RepRecovering
+//       RepWaitBackfillReserved
+//       RepWaitRecoveryReserved
+//     Stray
+//     ToDelete
+//       WaitDeleteReserved
+//       Deleting
+// Crashed
+
+namespace recovery {
+
+struct Crashed : boost::statechart::state<Crashed, Machine> {
+  explicit Crashed(my_context ctx);
+};
+
+struct Reset;
+
+struct Initial : boost::statechart::state<Initial, Machine> {
+  explicit Initial(my_context ctx);
+  void exit();
+
+  using reactions = boost::mpl::list <
+    boost::statechart::transition<Initialize, Reset>,
+    boost::statechart::custom_reaction<NullEvt>,
+    boost::statechart::transition<boost::statechart::event_base, Crashed>
+    >;
+
+  boost::statechart::result react(const MNotifyRec&);
+  boost::statechart::result react(const MInfoRec&);
+  boost::statechart::result react(const MLogRec&);
+  boost::statechart::result react(const boost::statechart::event_base&) {
+    return discard_event();
+  }
+};
+
+struct Reset : boost::statechart::state<Reset, Machine> {
+  explicit Reset(my_context ctx);
+  void exit();
+
+  using reactions = boost::mpl::list <
+    boost::statechart::custom_reaction<AdvMap>,
+    boost::statechart::custom_reaction<ActMap>,
+    boost::statechart::custom_reaction<NullEvt>,
+    boost::statechart::transition<boost::statechart::event_base, Crashed>
+    >;
+  boost::statechart::result react(const AdvMap&);
+  boost::statechart::result react(const ActMap&);
+  boost::statechart::result react(const boost::statechart::event_base&) {
+    return discard_event();
+  }
+};
+
+struct Start;
+
+struct Started : boost::statechart::state<Started, Machine, Start> {
+  explicit Started(my_context ctx);
+  void exit();
+
+  using reactions = boost::mpl::list <
+    boost::statechart::custom_reaction<AdvMap>,
+    // ignored
+    boost::statechart::custom_reaction<NullEvt>,
+    // crash
+    boost::statechart::transition<boost::statechart::event_base, Crashed>
+    >;
+  boost::statechart::result react(const AdvMap&);
+  boost::statechart::result react(const boost::statechart::event_base&) {
+    return discard_event();
+  }
+};
+
+struct Primary;
+struct Stray;
+
+struct MakePrimary : boost::statechart::event<MakePrimary> {};
+struct MakeStray : boost::statechart::event<MakeStray> {};
+
+struct Start : boost::statechart::state<Start, Started> {
+  explicit Start(my_context ctx);
+  void exit();
+
+  using reactions = boost::mpl::list <
+    boost::statechart::transition<MakePrimary, Primary>,
+    boost::statechart::transition<MakeStray, Stray>
+    >;
+};
+
+struct Peering;
+struct WaitActingChange;
+
+struct Primary : boost::statechart::state<Primary, Started, Peering> {
+  explicit Primary(my_context ctx);
+  void exit();
+
+  using reactions = boost::mpl::list <
+    boost::statechart::custom_reaction<ActMap>,
+    boost::statechart::custom_reaction<MNotifyRec>
+    >;
+  boost::statechart::result react(const ActMap&);
+  boost::statechart::result react(const MNotifyRec&);
+};
+
+struct GetInfo;
+struct Active;
+
+struct Peering : boost::statechart::state<Peering, Primary, GetInfo> {
+  PastIntervals::PriorSet prior_set;
+  /// need osd_find_best_info_ignore_history_les
+  bool history_les_bound = false;
+
+  explicit Peering(my_context ctx);
+  void exit();
+
+  using reactions = boost::mpl::list <
+    boost::statechart::transition<Activate, Active>,
+    boost::statechart::custom_reaction<AdvMap>
+    >;
+  boost::statechart::result react(const AdvMap &advmap);
+};
+
+struct Activating;
+
+struct AllReplicasActivated : boost::statechart::event<AllReplicasActivated> {};
+
+struct Active : boost::statechart::state<Active, Primary, Activating> {
+  explicit Active(my_context ctx);
+  void exit();
+
+  bool all_replicas_activated = false;
+
+  using reactions = boost::mpl::list <
+    boost::statechart::custom_reaction< ActMap >,
+    boost::statechart::custom_reaction< AdvMap >,
+    boost::statechart::custom_reaction< MInfoRec >,
+    boost::statechart::custom_reaction< MNotifyRec >,
+    boost::statechart::custom_reaction< MLogRec >,
+    boost::statechart::custom_reaction< AllReplicasActivated >
+    >;
+  boost::statechart::result react(const ActMap&);
+  boost::statechart::result react(const AdvMap&);
+  boost::statechart::result react(const MInfoRec& infoevt);
+  boost::statechart::result react(const MNotifyRec& notevt);
+  boost::statechart::result react(const MLogRec& logevt);
+  boost::statechart::result react(const AllReplicasActivated&);
+};
+
+struct GoClean : boost::statechart::event<GoClean> {};
+
+struct Clean : boost::statechart::state<Clean, Active> {
+  explicit Clean(my_context ctx);
+  void exit();
+  boost::statechart::result react(const boost::statechart::event_base&) {
+    return discard_event();
+  }
+};
+
+struct Recovered : boost::statechart::state<Recovered, Active> {
+  using reactions = boost::mpl::list<
+    boost::statechart::transition<GoClean, Clean>,
+    boost::statechart::custom_reaction<AllReplicasActivated>
+    >;
+  explicit Recovered(my_context ctx);
+  void exit();
+  boost::statechart::result react(const AllReplicasActivated&);
+};
+
+struct AllReplicasRecovered : boost::statechart::event<AllReplicasRecovered>
+{};
+
+struct Activating : boost::statechart::state<Activating, Active> {
+  using reactions = boost::mpl::list <
+    boost::statechart::transition< AllReplicasRecovered, Recovered >
+    >;
+  explicit Activating(my_context ctx);
+  void exit();
+};
+
+struct RepNotRecovering;
+
+struct ReplicaActive : boost::statechart::state<ReplicaActive, Started, RepNotRecovering> {
+  explicit ReplicaActive(my_context ctx);
+  void exit();
+
+  using reactions = boost::mpl::list <
+    boost::statechart::custom_reaction<ActMap>,
+    boost::statechart::custom_reaction<MQuery>,
+    boost::statechart::custom_reaction<MInfoRec>,
+    boost::statechart::custom_reaction<MLogRec>,
+    boost::statechart::custom_reaction<Activate>
+    >;
+  boost::statechart::result react(const MInfoRec& infoevt);
+  boost::statechart::result react(const MLogRec& logevt);
+  boost::statechart::result react(const ActMap&);
+  boost::statechart::result react(const MQuery&);
+  boost::statechart::result react(const Activate&);
+};
+
+struct RepNotRecovering : boost::statechart::state<RepNotRecovering, ReplicaActive> {
+  explicit RepNotRecovering(my_context ctx);
+  void exit();
+};
+
+struct Stray : boost::statechart::state<Stray, Started> {
+  explicit Stray(my_context ctx);
+  void exit();
+
+  using reactions = boost::mpl::list <
+    boost::statechart::custom_reaction<MQuery>,
+    boost::statechart::custom_reaction<MLogRec>,
+    boost::statechart::custom_reaction<MInfoRec>,
+    boost::statechart::custom_reaction<ActMap>
+    >;
+  boost::statechart::result react(const MQuery& query);
+  boost::statechart::result react(const MLogRec& logevt);
+  boost::statechart::result react(const MInfoRec& infoevt);
+  boost::statechart::result react(const ActMap&);
+};
+
+struct GetLog;
+struct Down;
+
+struct GotInfo : boost::statechart::event<GotInfo> {};
+struct IsDown : boost::statechart::event<IsDown> {};
+
+struct GetInfo : boost::statechart::state<GetInfo, Peering> {
+  std::set<pg_shard_t> peer_info_requested;
+
+  explicit GetInfo(my_context ctx);
+  void exit();
+  void get_infos();
+
+  using reactions = boost::mpl::list <
+    boost::statechart::transition<GotInfo, GetLog>,
+    boost::statechart::custom_reaction<MNotifyRec>,
+    boost::statechart::transition<IsDown, Down>
+    >;
+  boost::statechart::result react(const MNotifyRec& infoevt);
+};
+
+struct GotLog : boost::statechart::event<GotLog> {};
+
+struct GetLog : boost::statechart::state<GetLog, Peering> {
+  pg_shard_t auth_log_shard;
+  boost::intrusive_ptr<MOSDPGLog> msg;
+
+  explicit GetLog(my_context ctx);
+  void exit();
+
+  using reactions = boost::mpl::list <
+    boost::statechart::custom_reaction<MLogRec>,
+    boost::statechart::custom_reaction<GotLog>,
+    boost::statechart::custom_reaction<AdvMap>
+    >;
+  boost::statechart::result react(const AdvMap&);
+  boost::statechart::result react(const MLogRec& logevt);
+  boost::statechart::result react(const GotLog&);
+};
+
+struct NeedUpThru : boost::statechart::event<NeedUpThru> {};
+struct WaitUpThru;
+
+struct GetMissing : boost::statechart::state<GetMissing, Peering> {
+  explicit GetMissing(my_context ctx);
+  void exit();
+
+  using reactions = boost::mpl::list <
+    boost::statechart::custom_reaction<MLogRec>,
+    boost::statechart::transition<NeedUpThru, WaitUpThru>
+    >;
+  boost::statechart::result react(const MLogRec& logevt);
+};
+
+struct WaitUpThru : boost::statechart::state<WaitUpThru, Peering> {
+  explicit WaitUpThru(my_context ctx);
+  void exit();
+
+  using reactions = boost::mpl::list <
+    boost::statechart::custom_reaction<ActMap>,
+    boost::statechart::custom_reaction<MLogRec>
+    >;
+  boost::statechart::result react(const ActMap& am);
+  boost::statechart::result react(const MLogRec& logrec);
+};
+
+struct Down : boost::statechart::state<Down, Peering> {
+  explicit Down(my_context ctx);
+  using reactions = boost::mpl::list <
+    boost::statechart::custom_reaction<MNotifyRec>
+    >;
+  boost::statechart::result react(const MNotifyRec& infoevt);
+  void exit();
+};
+
+}