osd.cc
osd_meta.cc
pg.cc
- pg_meta.cc)
+ pg_meta.cc
+ recovery_machine.cc
+ recovery_state.cc
+ recovery_states.cc)
target_link_libraries(crimson-osd
crimson-common crimson-os crimson fmt::fmt)
#include "messages/MOSDPGInfo.h"
#include "messages/MOSDPGLog.h"
+#include "messages/MOSDPGNotify.h"
+#include "messages/MOSDPGQuery.h"
+
#include "osd/OSDMap.h"
+#include "crimson/net/Connection.h"
+#include "crimson/net/Messenger.h"
#include "crimson/os/cyan_store.h"
#include "crimson/osd/pg_meta.h"
+#include "recovery_events.h"
+#include "recovery_state.h"
+
namespace {
seastar::logger& logger() {
return ceph::get_logger(ceph_subsys_osd);
}
+ template<typename Message, typename... Args>
+ Ref<Message> make_message(Args&&... args)
+ {
+ return {new Message{std::forward<Args>(args)...}, false};
+ }
}
+using recovery::AdvMap;
+using recovery::ActMap;
+using recovery::Initialize;
+
PG::PG(spg_t pgid,
pg_shard_t pg_shard,
pg_pool_t&& pool,
: pgid{pgid},
whoami{pg_shard},
pool{std::move(pool)},
+ recovery_state{*this},
info{pgid},
osdmap{osdmap},
msgr{msgr}
info.stats.acting = acting;
info.stats.acting_primary = primary.osd;
info.stats.mapping_epoch = info.history.same_interval_since;
+ recovery_state.handle_event(Initialize{});
+ // note: we don't activate here because we know the OSD will advance maps
+ // during boot.
return seastar::now();
});
}
seastar::future<> PG::do_peering_event(std::unique_ptr<PGPeeringEvent> evt)
{
- // todo
- return seastar::now();
+ return dispatch_context(recovery_state.handle_event(evt->get_event()));
+}
+
+seastar::future<> PG::dispatch_context(recovery::Context&& ctx)
+{
+ return seastar::do_with(recovery::Context{ctx}, [this](auto& todo) {
+ return seastar::when_all_succeed(
+ seastar::parallel_for_each(std::move(todo.notifies),
+ [this](auto& osd_notifies) {
+ auto& [peer, notifies] = osd_notifies;
+ auto m = make_message<MOSDPGNotify>(get_osdmap_epoch(),
+ std::move(notifies));
+ return send_to_osd(peer, m, get_osdmap_epoch());
+ }),
+ seastar::parallel_for_each(std::move(todo.queries),
+ [this](auto& osd_queries) {
+ auto& [peer, queries] = osd_queries;
+ auto m = make_message<MOSDPGQuery>(get_osdmap_epoch(),
+ std::move(queries));
+ return send_to_osd(peer, m, get_osdmap_epoch());
+ }),
+ seastar::parallel_for_each(std::move(todo.infos),
+ [this](auto& osd_infos) {
+ auto& [peer, infos] = osd_infos;
+ auto m = make_message<MOSDPGInfo>(get_osdmap_epoch(),
+ std::move(infos));
+ return send_to_osd(peer, m, get_osdmap_epoch());
+ })
+ );
+ });
}
seastar::future<> PG::handle_advance_map(cached_map_t next_map)
{
- // todo
+ auto last_map = std::move(osdmap);
+ osdmap = std::move(next_map);
+ vector<int> new_up, new_acting;
+ int up_primary, acting_primary;
+ osdmap->pg_to_up_acting_osds(pgid.pgid,
+ &new_up,
+ &up_primary,
+ &new_acting,
+ &acting_primary);
+ logger().info("handle_advance_map {}/{} -- {}/{}",
+ new_up, new_acting, up_primary, acting_primary);
+ recovery_state.handle_event(AdvMap{osdmap, last_map,
+ std::move(new_up), up_primary,
+ std::move(new_acting), acting_primary});
return seastar::now();
}
seastar::future<> PG::handle_activate_map()
{
- // todo
+ recovery_state.handle_event(ActMap{});
return seastar::now();
}
return os;
}
+void PG::reply_pg_query(const MQuery& query, recovery::Context* ctx)
+{
+ switch (query.query.type) {
+ case pg_query_t::INFO:
+ return reply_pg_query_for_info(query, ctx);
+ case pg_query_t::LOG:
+ return reply_pg_query_for_log(query, false);
+ case pg_query_t::FULLLOG:
+ return reply_pg_query_for_log(query, true);
+ }
+}
+
+void PG::reply_pg_query_for_info(const MQuery& query, recovery::Context* ctx)
+{
+ recovery::Context::notify_t notify{pg_notify_t{query.from.shard,
+ whoami.shard,
+ query.query_epoch,
+ get_osdmap_epoch(),
+ info},
+ past_intervals};
+ ctx->notifies[query.from.osd].push_back(std::move(notify));
+}
+
+void PG::reply_pg_query_for_log(const MQuery& query, bool full)
+{
+ auto m = make_message<MOSDPGLog>(query.from.shard,
+ whoami.shard,
+ get_osdmap_epoch(),
+ info,
+ query.query_epoch);
+ // todo:
+ // m->missing = pg_log.get_missing();
+ if (full) {
+ // m->log = pg_log.get_log();
+ } else {
+ // maybe partial
+ }
+ send_to_osd(query.from.osd, m, get_osdmap_epoch());
+}
+
seastar::future<> PG::send_to_osd(int peer, Ref<Message> m, epoch_t from_epoch)
{
if (osdmap->is_down(peer) || osdmap->get_info(peer).up_from > from_epoch) {
#include <seastar/core/future.hh>
#include "osd/osd_types.h"
+#include "recovery_state.h"
template<typename T> using Ref = boost::intrusive_ptr<T>;
class OSDMap;
+class MQuery;
class PGPeeringEvent;
+namespace recovery {
+ class Context;
+}
namespace ceph::net {
class Messenger;
void maybe_mark_clean();
seastar::future<> do_peering_event(std::unique_ptr<PGPeeringEvent> evt);
+ seastar::future<> dispatch_context(recovery::Context&& ctx);
seastar::future<> handle_advance_map(cached_map_t next_map);
seastar::future<> handle_activate_map();
seastar::future<> share_pg_info();
+ void reply_pg_query(const MQuery& query, recovery::Context* ctx);
void print(ostream& os) const;
private:
seastar::future<> activate_peer(pg_shard_t peer);
+ void reply_pg_query_for_info(const MQuery& query, recovery::Context* ctx);
+ void reply_pg_query_for_log(const MQuery& query, bool full);
seastar::future<> send_to_osd(int peer, Ref<Message> m, epoch_t from_epoch);
+
void update_primary_state(const std::vector<int>& new_up,
int new_up_primary,
const std::vector<int>& new_acting,
epoch_t last_peering_reset = 0;
epoch_t need_up_thru = 0;
+ recovery::State recovery_state;
bool should_notify_primary = false;
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <vector>
+
+#include <boost/smart_ptr/local_shared_ptr.hpp>
+#include <boost/statechart/custom_reaction.hpp>
+#include <boost/statechart/event.hpp>
+#include <boost/statechart/simple_state.hpp>
+#include <boost/statechart/state.hpp>
+#include <boost/statechart/state_machine.hpp>
+#include <boost/statechart/transition.hpp>
+#include <boost/statechart/event_base.hpp>
+
+#include "osd/PGPeeringEvent.h"
+
+namespace recovery {
+
+struct AdvMap : boost::statechart::event< AdvMap > {
+ // TODO: use foreign_ptr<> once the osdmap cache needs to be shared across
+ // cores
+ using OSDMapRef = boost::local_shared_ptr<OSDMap>;
+ OSDMapRef osdmap;
+ OSDMapRef last_map;
+ std::vector<int> new_up, new_acting;
+ int up_primary, acting_primary;
+ AdvMap(OSDMapRef osdmap, OSDMapRef last_map,
+ const std::vector<int>& new_up, int up_primary,
+ const std::vector<int>& new_acting, int acting_primary):
+ osdmap(osdmap),
+ last_map(last_map),
+ new_up(new_up),
+ new_acting(new_acting),
+ up_primary(up_primary),
+ acting_primary(acting_primary) {}
+ void print(std::ostream *out) const {
+ *out << "AdvMap";
+ }
+};
+
+struct ActMap : boost::statechart::event< ActMap > {
+ ActMap() : boost::statechart::event< ActMap >() {}
+ void print(std::ostream *out) const {
+ *out << "ActMap";
+ }
+};
+
+struct Activate : boost::statechart::event< Activate > {
+ epoch_t activation_epoch;
+ explicit Activate(epoch_t q) : boost::statechart::event< Activate >(),
+ activation_epoch(q) {}
+ void print(std::ostream *out) const {
+ *out << "Activate from " << activation_epoch;
+ }
+};
+
+struct Initialize : boost::statechart::event<Initialize> {};
+
+}
--- /dev/null
+#include "recovery_machine.h"
+#include "recovery_state.h"
+#include "pg.h"
+
+namespace recovery
+{
+void Machine::send_notify(pg_shard_t to,
+ const pg_notify_t& info,
+ const PastIntervals& pi)
+{
+ state.context.notifies[to.osd].emplace_back(info, pi);
+}
+
+void Machine::send_query(pg_shard_t to,
+ const pg_query_t& query)
+{
+ spg_t pgid{pg.get_info().pgid.pgid, to.shard};
+ state.context.queries[to.osd].emplace(pgid, query);
+}
+
+recovery::Context* Machine::get_context()
+{
+ return &state.context;
+}
+
+}
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <boost/statechart/state_machine.hpp>
+
+#include "osd/osd_types.h"
+
+class PG;
+
+namespace recovery {
+
+struct Initial;
+class State;
+class Context;
+
+struct Machine : public boost::statechart::state_machine<Machine,
+ Initial> {
+ Machine(State& state, PG& pg)
+ : state{state},
+ pg{pg}
+ {}
+ void send_notify(pg_shard_t to,
+ const pg_notify_t& info,
+ const PastIntervals& pi);
+ void send_query(pg_shard_t to,
+ const pg_query_t& query);
+ recovery::Context* get_context();
+ State& state;
+ PG& pg;
+};
+}
--- /dev/null
+#include "recovery_state.h"
+#include "recovery_states.h"
+#include "messages/MOSDPGLog.h" // MLogRec needs this
+
+namespace recovery {
+
+State::State(PG& pg)
+ : machine{*this, pg}
+{
+ machine.initiate();
+}
+
+Context State::handle_event(const boost::statechart::event_base& evt)
+{
+ machine.process_event(evt);
+ Context pending;
+ std::swap(pending, context);
+ return pending;
+}
+}
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <vector>
+#include <map>
+
+#include "recovery_machine.h"
+
+class PG;
+
+namespace recovery {
+
+// RecoveryMachine::handle_event() could send multiple notifications to a
+// certain peer OSD before it reaches the last state. for better performance,
+// we send them in batch. the pending messages are collected in RecoveryCtx
+// before being dispatched upon returning of handle_event().
+struct Context
+{
+ using osd_id_t = int;
+
+ using notify_t = std::pair<pg_notify_t, PastIntervals>;
+ std::map<osd_id_t, std::vector<notify_t>> notifies;
+
+ using queries_t = std::map<spg_t, pg_query_t>;
+ std::map<osd_id_t, queries_t> queries;
+
+ using infos_t = std::vector<pair<pg_notify_t, PastIntervals>>;
+ std::map<osd_id_t, infos_t> infos;
+};
+
+/// Encapsulates PG recovery process,
+class State {
+public:
+ explicit State(PG& pg);
+ Context handle_event(const boost::statechart::event_base& evt);
+private:
+ friend class Machine;
+ Machine machine;
+ Context context;
+};
+}
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "recovery_states.h"
+
+#include "crimson/common/log.h"
+#include "messages/MOSDPGLog.h"
+#include "osd/OSDMap.h"
+#include "pg.h"
+
+namespace {
+ seastar::logger& logger() {
+ return ceph::get_logger(ceph_subsys_osd);
+ }
+}
+
+namespace recovery {
+
+/*------Crashed-------*/
+Crashed::Crashed(my_context ctx)
+ : my_base(ctx)
+{
+ ceph_abort_msg("we got a bad state machine event");
+}
+
+/*------Initial-------*/
+
+Initial::Initial(my_context ctx)
+ : my_base(ctx)
+{
+ logger().info("Initial");
+}
+
+boost::statechart::result Initial::react(const MNotifyRec& notify)
+{
+ logger().info("Active <MNotifyRec>");
+ // todo: process info from replica
+ auto& pg = context<Machine>().pg;
+ pg.update_last_peering_reset();
+ return transit<Primary>();
+}
+
+boost::statechart::result Initial::react(const MInfoRec& i)
+{
+ // todo
+ logger().info("<MInfoRec> transitioning from Initial to Stray");
+ post_event(i);
+ return transit<Stray>();
+}
+
+boost::statechart::result Initial::react(const MLogRec& i)
+{
+ // todo
+ logger().info("<MLogRec> transitioning from Initial to Stray");
+ post_event(i);
+ return transit<Stray>();
+}
+
+void Initial::exit()
+{}
+
+/*--------Reset---------*/
+Reset::Reset(my_context ctx)
+ : my_base(ctx)
+{
+ logger().info("Entering Reset");
+ auto& pg = context<Machine>().pg;
+ pg.update_last_peering_reset();
+}
+
+boost::statechart::result Reset::react(const AdvMap& advmap)
+{
+ logger().info("Reset advmap");
+ auto& pg = context<Machine>().pg;
+ if (pg.should_restart_peering(advmap.up_primary,
+ advmap.acting_primary,
+ advmap.new_up,
+ advmap.new_acting,
+ advmap.last_map,
+ advmap.osdmap)) {
+ pg.start_peering_interval(advmap.up_primary,
+ advmap.acting_primary,
+ advmap.new_up,
+ advmap.new_acting,
+ advmap.last_map);
+ }
+ return discard_event();
+}
+
+boost::statechart::result Reset::react(const ActMap&)
+{
+ auto& pg = context<Machine>().pg;
+ if (pg.should_send_notify()) {
+ context<Machine>().send_notify(pg.get_primary(),
+ pg.get_notify(pg.get_osdmap_epoch()),
+ pg.get_past_intervals());
+ }
+ return transit<Started>();
+}
+
+void Reset::exit()
+{
+ logger().info("Leaving Reset");
+}
+
+/*------Started-------*/
+Started::Started(my_context ctx)
+ : my_base(ctx)
+{
+ logger().info("Entering Started");
+}
+
+boost::statechart::result Started::react(const AdvMap& advmap)
+{
+ auto& pg = context<Machine>().pg;
+ logger().info("Started <AdvMap>");
+ if (pg.should_restart_peering(advmap.up_primary,
+ advmap.acting_primary,
+ advmap.new_up,
+ advmap.new_acting,
+ advmap.last_map,
+ advmap.osdmap)) {
+ logger().info("should_restart_peering, transitioning to Reset");
+ post_event(advmap);
+ return transit<Reset>();
+ }
+ return discard_event();
+}
+
+void Started::exit()
+{
+ logger().info("Leaving Started");
+}
+
+/*-------Start---------*/
+Start::Start(my_context ctx)
+ : my_base(ctx)
+{
+ auto& pg = context<Machine>().pg;
+ if (pg.is_primary()) {
+ logger().info("Start transitioning to Primary");
+ post_event(MakePrimary{});
+ } else { // is_stray
+ logger().info("Start transitioning to Stray");
+ post_event(MakeStray{});
+ }
+}
+
+void Start::exit()
+{
+ logger().info("Leaving Start");
+}
+
+/*---------Primary--------*/
+Primary::Primary(my_context ctx)
+ : my_base(ctx)
+{
+ logger().info("Entering Primary");
+}
+
+boost::statechart::result Primary::react(const MNotifyRec& notevt)
+{
+ // todo
+ auto& pg = context<Machine>().pg;
+ pg.proc_replica_info(notevt.from,
+ notevt.notify.info,
+ notevt.notify.epoch_sent);
+ logger().info("Primary <MNotifyRec> {}", pg);
+ return discard_event();
+}
+
+boost::statechart::result Primary::react(const ActMap&)
+{
+ // todo
+ auto& pg = context<Machine>().pg;
+ logger().info("Primary <ActMap> {}", pg);
+ return discard_event();
+}
+
+void Primary::exit()
+{
+ auto& pg = context<Machine>().pg;
+ pg.clear_primary_state();
+ pg.clear_state(PG_STATE_CREATING);
+ logger().info("Leaving Primary: {}", pg);
+}
+
+/*---------Peering--------*/
+Peering::Peering(my_context ctx)
+ : my_base(ctx)
+{
+ auto& pg = context<Machine>().pg;
+ pg.set_state(PG_STATE_PEERING);
+ logger().info("Entering Peering");
+}
+
+boost::statechart::result Peering::react(const AdvMap& advmap)
+{
+ logger().info("Peering <AdvMap>");
+ context<Machine>().pg.update_need_up_thru(advmap.osdmap.get());
+ return forward_event();
+}
+
+void Peering::exit()
+{
+ auto& pg = context<Machine>().pg;
+ logger().info("Leaving Peering: {}", pg);
+ pg.clear_state(PG_STATE_PEERING);
+}
+
+/*--------GetInfo---------*/
+GetInfo::GetInfo(my_context ctx)
+ : my_base(ctx)
+{
+ logger().info("Entering GetInfo");
+ context<Machine>().pg.update_need_up_thru();
+ post_event(GotInfo{});
+}
+
+boost::statechart::result GetInfo::react(const MNotifyRec& infoevt)
+{
+ logger().info("GetInfo <MNotifyRec>");
+ // todo: depends on get_infos()
+ post_event(GotInfo());
+ return discard_event();
+}
+
+void GetInfo::exit()
+{
+ logger().info("Leaving GetInfo");
+ // todo
+}
+
+/*------GetLog------------*/
+GetLog::GetLog(my_context ctx)
+ : my_base(ctx)
+{
+ logger().info("Entering GetLog");
+ auto& pg = context<Machine>().pg;
+
+ PG::choose_acting_t adjust_acting;
+ tie(adjust_acting, auth_log_shard) = pg.choose_acting();
+ switch (adjust_acting) {
+ case PG::choose_acting_t::dont_change:
+ break;
+ case PG::choose_acting_t::should_change:
+ // post_event(NeedActingChange());
+ return;
+ case PG::choose_acting_t::pg_incomplete:
+ // post_event(IsIncomplete());
+ return;
+ }
+ // am i the best?
+ if (auth_log_shard == pg.get_whoami()) {
+ post_event(GotLog{});
+ return;
+ } else {
+ // todo: request log from peer
+ return;
+ }
+}
+
+boost::statechart::result GetLog::react(const AdvMap& advmap)
+{
+ // make sure our log source didn't go down. we need to check
+ // explicitly because it may not be part of the prior set, which
+ // means the Peering state check won't catch it going down.
+ if (!advmap.osdmap->is_up(auth_log_shard.osd)) {
+ logger().info("GetLog: auth_log_shard osd.{} went down",
+ auth_log_shard.osd);
+ post_event(advmap);
+ return transit<Reset>();
+ }
+
+ // let the Peering state do its checks.
+ return forward_event();
+}
+
+boost::statechart::result GetLog::react(const MLogRec& logevt)
+{
+ assert(!msg);
+ if (logevt.from != auth_log_shard) {
+ logger().info("GetLog: discarding log from non-auth_log_shard osd.{}",
+ logevt.from);
+ return discard_event();
+ }
+ logger().info("GetLog: received master log from osd.{}",
+ logevt.from);
+ msg = logevt.msg;
+ post_event(GotLog{});
+ return discard_event();
+}
+
+boost::statechart::result GetLog::react(const GotLog&)
+{
+ logger().info("leaving GetLog");
+ return transit<GetMissing>();
+}
+
+void GetLog::exit()
+{}
+
+Recovered::Recovered(my_context ctx)
+ : my_base(ctx)
+{
+ logger().info("Entering Recovered");
+ if (context<Active>().all_replicas_activated) {
+ logger().info("all_replicas_activated");
+ post_event(GoClean{});
+ }
+}
+
+boost::statechart::result Recovered::react(const AllReplicasActivated&)
+{
+ logger().info("Recovered <AllReplicasActivated>");
+ post_event(GoClean{});
+ return forward_event();
+}
+
+void Recovered::exit()
+{
+ logger().info("Leaving Recovered");
+}
+
+Clean::Clean(my_context ctx)
+ : my_base(ctx)
+{
+ logger().info("Entering Clean");
+ auto& pg = context<Machine>().pg;
+ pg.maybe_mark_clean();
+}
+
+void Clean::exit()
+{
+ logger().info("Leaving Clean");
+ auto& pg = context<Machine>().pg;
+ pg.clear_state(PG_STATE_CLEAN);
+}
+
+/*---------Active---------*/
+Active::Active(my_context ctx)
+ : my_base(ctx)
+{
+ logger().info("Entering Activate");
+ auto& pg = context<Machine>().pg;
+ assert(pg.is_primary());
+ pg.activate(pg.get_osdmap_epoch());
+ logger().info("Activate Finished");
+}
+
+boost::statechart::result Active::react(const AdvMap& advmap)
+{
+ auto& pg = context<Machine>().pg;
+ if (pg.should_restart_peering(advmap.up_primary,
+ advmap.acting_primary,
+ advmap.new_up,
+ advmap.new_acting,
+ advmap.last_map,
+ advmap.osdmap)) {
+ logger().info("Active advmap interval change, fast return");
+ return forward_event();
+ }
+ logger().info("Active advmap");
+ return forward_event();
+}
+
+boost::statechart::result Active::react(const ActMap&)
+{
+ return forward_event();
+}
+
+boost::statechart::result Active::react(const MNotifyRec& notevt)
+{
+ logger().info("Active <MNotifyRec>");
+ auto& pg = context<Machine>().pg;
+ pg.proc_replica_info(notevt.from,
+ notevt.notify.info,
+ notevt.notify.epoch_sent);
+ return discard_event();
+}
+
+boost::statechart::result Active::react(const MInfoRec& infoevt)
+{
+ logger().info("Active <MInfoRec>");
+ auto& pg = context<Machine>().pg;
+ assert(pg.is_primary());
+
+ if (pg.is_last_activated_peer(infoevt.from)) {
+ post_event(AllReplicasActivated{});
+ }
+ return discard_event();
+}
+
+boost::statechart::result Active::react(const MLogRec& logevt)
+{
+ logger().info("Active <MLogRec>");
+ auto& pg = context<Machine>().pg;
+ pg.proc_replica_log(logevt.from,
+ logevt.msg->info,
+ logevt.msg->log,
+ logevt.msg->missing);
+ // todo
+ return discard_event();
+}
+
+boost::statechart::result Active::react(const AllReplicasActivated &evt)
+{
+ logger().info("Active <AllReplicasActivated>");
+ all_replicas_activated = true;
+ auto& pg = context<Machine>().pg;
+ pg.clear_state(PG_STATE_ACTIVATING);
+ pg.clear_state(PG_STATE_CREATING);
+ pg.on_activated();
+ pg.share_pg_info();
+ post_event(AllReplicasRecovered{});
+ return discard_event();
+}
+
+void Active::exit()
+{
+ auto& pg = context<Machine>().pg;
+ pg.clear_state(PG_STATE_ACTIVATING);
+ pg.clear_state(PG_STATE_DEGRADED);
+ pg.clear_state(PG_STATE_UNDERSIZED);
+
+ logger().info("Leaving Active");
+}
+
+/*------Activating--------*/
+Activating::Activating(my_context ctx)
+ : my_base(ctx)
+{
+ logger().info("Entering Activating");
+}
+
+void Activating::exit()
+{
+ logger().info("Leaving Activating");
+}
+
+/*------ReplicaActive-----*/
+ReplicaActive::ReplicaActive(my_context ctx)
+ : my_base(ctx)
+{
+ logger().info("Entering ReplicaActive");
+}
+
+
+boost::statechart::result ReplicaActive::react(const Activate& actevt)
+{
+ auto& pg = context<Machine>().pg;
+ logger().info("In ReplicaActive, about to call activate");
+ pg.activate(actevt.activation_epoch);
+ logger().info("Activate Finished");
+ return discard_event();
+}
+
+boost::statechart::result ReplicaActive::react(const MInfoRec& infoevt)
+{
+ return discard_event();
+}
+
+boost::statechart::result ReplicaActive::react(const MLogRec& logevt)
+{
+ return discard_event();
+}
+
+boost::statechart::result ReplicaActive::react(const ActMap&)
+{
+ auto& pg = context<Machine>().pg;
+ if (pg.should_send_notify()) {
+ context<Machine>().send_notify(pg.get_primary(),
+ pg.get_notify(pg.get_osdmap_epoch()),
+ pg.get_past_intervals());
+ };
+ return discard_event();
+}
+
+boost::statechart::result ReplicaActive::react(const MQuery& query)
+{
+ auto& pg = context<Machine>().pg;
+ context<Machine>().send_notify(query.from,
+ pg.get_notify(query.query_epoch),
+ pg.get_past_intervals());
+ return discard_event();
+}
+
+void ReplicaActive::exit()
+{}
+
+/*---RepNotRecovering----*/
+RepNotRecovering::RepNotRecovering(my_context ctx)
+ : my_base(ctx)
+{}
+
+void RepNotRecovering::exit()
+{}
+
+/*-------Stray---*/
+Stray::Stray(my_context ctx)
+ : my_base(ctx)
+{
+ auto& pg = context<Machine>().pg;
+ assert(!pg.is_primary());
+ logger().info("{}, Entering Stray", pg);
+}
+
+boost::statechart::result Stray::react(const MLogRec& logevt)
+{
+ auto& pg = context<Machine>().pg;
+ logger().info("{} Stray <MLogRec>", pg);
+ MOSDPGLog* msg = logevt.msg.get();
+ logger().info("got info+log from osd.{} {} {}",
+ logevt.from, msg->info, msg->log);
+ if (msg->info.last_backfill == hobject_t()) {
+ // todo: restart backfill
+ } else {
+ // todo: merge log
+ }
+ post_event(Activate{logevt.msg->info.last_epoch_started});
+ return transit<ReplicaActive>();
+}
+
+boost::statechart::result Stray::react(const MInfoRec& infoevt)
+{
+ auto& pg = context<Machine>().pg;
+ logger().info("{} Stray <MInfoRec>", pg);
+ logger().info("got info from osd.{} {}",
+ infoevt.from, infoevt.info);
+ // todo
+ post_event(Activate{infoevt.info.last_epoch_started});
+ return transit<ReplicaActive>();
+}
+
+boost::statechart::result Stray::react(const MQuery& query)
+{
+ auto& pg = context<Machine>().pg;
+ logger().info("{} Stray <MQuery>", pg);
+ pg.reply_pg_query(query, context<Machine>().get_context());
+ return discard_event();
+}
+
+boost::statechart::result Stray::react(const ActMap&)
+{
+ auto& pg = context<Machine>().pg;
+ logger().info("{} Stray <ActMap>", pg);
+ if (pg.should_send_notify()) {
+ context<Machine>().send_notify(pg.get_primary(),
+ pg.get_notify(pg.get_osdmap_epoch()),
+ pg.get_past_intervals());
+ }
+ return discard_event();
+}
+
+void Stray::exit()
+{
+ logger().info("Leaving Stray");
+}
+
+/*------Down--------*/
+Down::Down(my_context ctx)
+ : my_base(ctx)
+{}
+
+void Down::exit()
+{
+}
+
+boost::statechart::result Down::react(const MNotifyRec& infoevt)
+{
+ // todo
+ return discard_event();
+}
+
+/*------GetMissing--------*/
+GetMissing::GetMissing(my_context ctx)
+ : my_base(ctx)
+{
+ logger().info("Entering GetMissing");
+ auto& pg = context<Machine>().pg;
+ if (pg.get_need_up_thru() != 0) {
+ logger().info("still need up_thru update before going active");
+ post_event(NeedUpThru{});
+ } else {
+ // all good!
+ post_event(Activate{pg.get_osdmap_epoch()});
+ }
+}
+
+boost::statechart::result GetMissing::react(const MLogRec& logevt)
+{
+ logger().info("GetMissing <MLogRec>");
+ auto& pg = context<Machine>().pg;
+ pg.proc_replica_log(logevt.from,
+ logevt.msg->info,
+ logevt.msg->log,
+ logevt.msg->missing);
+ if (pg.get_need_up_thru() != 0) {
+ logger().info(" still need up_thru update before going active");
+ post_event(NeedUpThru{});
+ } else {
+ logger().info("Got last missing, don't need missing posting Activate");
+ post_event(Activate{pg.get_osdmap_epoch()});
+ }
+ return discard_event();
+}
+
+void GetMissing::exit()
+{
+ logger().info("Leaving GetMissing");
+}
+
+/*------WaitUpThru--------*/
+WaitUpThru::WaitUpThru(my_context ctx)
+ : my_base(ctx)
+{
+ logger().info("Entering WaitUpThru");
+}
+
+boost::statechart::result WaitUpThru::react(const ActMap& am)
+{
+ logger().info("WaitUpThru <ActMap>");
+ auto& pg = context<Machine>().pg;
+ if (!pg.get_need_up_thru()) {
+ logger().info("WaitUpThru: no need up thru!");
+ post_event(Activate{pg.get_osdmap_epoch()});
+ }
+ return forward_event();
+}
+
+boost::statechart::result WaitUpThru::react(const MLogRec& logevt)
+{
+ logger().info("WaitUpThru: <MLogRec>");
+ auto& pg = context<Machine>().pg;
+ pg.proc_replica_log(logevt.from,
+ logevt.msg->info,
+ logevt.msg->log,
+ logevt.msg->missing);
+ return discard_event();
+}
+
+void WaitUpThru::exit()
+{
+ logger().info("Leaving WaitUpThru");
+}
+
+}
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <boost/statechart/custom_reaction.hpp>
+#include <boost/statechart/event.hpp>
+#include <boost/statechart/event_base.hpp>
+#include <boost/statechart/simple_state.hpp>
+#include <boost/statechart/state.hpp>
+#include <boost/statechart/transition.hpp>
+
+#include "recovery_machine.h"
+#include "recovery_events.h"
+
+// Initial
+// Reset
+// Start
+// Started
+// Primary
+// WaitActingChange
+// Peering
+// GetInfo
+// GetLog
+// GetMissing
+// WaitUpThru
+// Incomplete
+// Active
+// Activating
+// Clean
+// Recovered
+// Backfilling
+// WaitRemoteBackfillReserved
+// WaitLocalBackfillReserved
+// NotBackfilling
+// NotRecovering
+// Recovering
+// WaitRemoteRecoveryReserved
+// WaitLocalRecoveryReserved
+// ReplicaActive
+// RepNotRecovering
+// RepRecovering
+// RepWaitBackfillReserved
+// RepWaitRecoveryReserved
+// Stray
+// ToDelete
+// WaitDeleteReserved
+// Deleting
+// Crashed
+
+namespace recovery {
+
+struct Crashed : boost::statechart::state<Crashed, Machine> {
+ explicit Crashed(my_context ctx);
+};
+
+struct Reset;
+
+struct Initial : boost::statechart::state<Initial, Machine> {
+ explicit Initial(my_context ctx);
+ void exit();
+
+ using reactions = boost::mpl::list <
+ boost::statechart::transition<Initialize, Reset>,
+ boost::statechart::custom_reaction<NullEvt>,
+ boost::statechart::transition<boost::statechart::event_base, Crashed>
+ >;
+
+ boost::statechart::result react(const MNotifyRec&);
+ boost::statechart::result react(const MInfoRec&);
+ boost::statechart::result react(const MLogRec&);
+ boost::statechart::result react(const boost::statechart::event_base&) {
+ return discard_event();
+ }
+};
+
+struct Reset : boost::statechart::state<Reset, Machine> {
+ explicit Reset(my_context ctx);
+ void exit();
+
+ using reactions = boost::mpl::list <
+ boost::statechart::custom_reaction<AdvMap>,
+ boost::statechart::custom_reaction<ActMap>,
+ boost::statechart::custom_reaction<NullEvt>,
+ boost::statechart::transition<boost::statechart::event_base, Crashed>
+ >;
+ boost::statechart::result react(const AdvMap&);
+ boost::statechart::result react(const ActMap&);
+ boost::statechart::result react(const boost::statechart::event_base&) {
+ return discard_event();
+ }
+};
+
+struct Start;
+
+struct Started : boost::statechart::state<Started, Machine, Start> {
+ explicit Started(my_context ctx);
+ void exit();
+
+ using reactions = boost::mpl::list <
+ boost::statechart::custom_reaction<AdvMap>,
+ // ignored
+ boost::statechart::custom_reaction<NullEvt>,
+ // crash
+ boost::statechart::transition<boost::statechart::event_base, Crashed>
+ >;
+ boost::statechart::result react(const AdvMap&);
+ boost::statechart::result react(const boost::statechart::event_base&) {
+ return discard_event();
+ }
+};
+
+struct Primary;
+struct Stray;
+
+struct MakePrimary : boost::statechart::event<MakePrimary> {};
+struct MakeStray : boost::statechart::event<MakeStray> {};
+
+struct Start : boost::statechart::state<Start, Started> {
+ explicit Start(my_context ctx);
+ void exit();
+
+ using reactions = boost::mpl::list <
+ boost::statechart::transition<MakePrimary, Primary>,
+ boost::statechart::transition<MakeStray, Stray>
+ >;
+};
+
+struct Peering;
+struct WaitActingChange;
+
+struct Primary : boost::statechart::state<Primary, Started, Peering> {
+ explicit Primary(my_context ctx);
+ void exit();
+
+ using reactions = boost::mpl::list <
+ boost::statechart::custom_reaction<ActMap>,
+ boost::statechart::custom_reaction<MNotifyRec>
+ >;
+ boost::statechart::result react(const ActMap&);
+ boost::statechart::result react(const MNotifyRec&);
+};
+
+struct GetInfo;
+struct Active;
+
+struct Peering : boost::statechart::state<Peering, Primary, GetInfo> {
+ PastIntervals::PriorSet prior_set;
+ /// need osd_find_best_info_ignore_history_les
+ bool history_les_bound = false;
+
+ explicit Peering(my_context ctx);
+ void exit();
+
+ using reactions = boost::mpl::list <
+ boost::statechart::transition<Activate, Active>,
+ boost::statechart::custom_reaction<AdvMap>
+ >;
+ boost::statechart::result react(const AdvMap &advmap);
+};
+
+struct Activating;
+
+struct AllReplicasActivated : boost::statechart::event<AllReplicasActivated> {};
+
+struct Active : boost::statechart::state<Active, Primary, Activating> {
+ explicit Active(my_context ctx);
+ void exit();
+
+ bool all_replicas_activated = false;
+
+ using reactions = boost::mpl::list <
+ boost::statechart::custom_reaction< ActMap >,
+ boost::statechart::custom_reaction< AdvMap >,
+ boost::statechart::custom_reaction< MInfoRec >,
+ boost::statechart::custom_reaction< MNotifyRec >,
+ boost::statechart::custom_reaction< MLogRec >,
+ boost::statechart::custom_reaction< AllReplicasActivated >
+ >;
+ boost::statechart::result react(const ActMap&);
+ boost::statechart::result react(const AdvMap&);
+ boost::statechart::result react(const MInfoRec& infoevt);
+ boost::statechart::result react(const MNotifyRec& notevt);
+ boost::statechart::result react(const MLogRec& logevt);
+ boost::statechart::result react(const AllReplicasActivated&);
+};
+
+struct GoClean : boost::statechart::event<GoClean> {};
+
+struct Clean : boost::statechart::state<Clean, Active> {
+ explicit Clean(my_context ctx);
+ void exit();
+ boost::statechart::result react(const boost::statechart::event_base&) {
+ return discard_event();
+ }
+};
+
+struct Recovered : boost::statechart::state<Recovered, Active> {
+ using reactions = boost::mpl::list<
+ boost::statechart::transition<GoClean, Clean>,
+ boost::statechart::custom_reaction<AllReplicasActivated>
+ >;
+ explicit Recovered(my_context ctx);
+ void exit();
+ boost::statechart::result react(const AllReplicasActivated&);
+};
+
+struct AllReplicasRecovered : boost::statechart::event<AllReplicasRecovered>
+{};
+
+struct Activating : boost::statechart::state<Activating, Active> {
+ using reactions = boost::mpl::list <
+ boost::statechart::transition< AllReplicasRecovered, Recovered >
+ >;
+ explicit Activating(my_context ctx);
+ void exit();
+};
+
+struct RepNotRecovering;
+
+struct ReplicaActive : boost::statechart::state<ReplicaActive, Started, RepNotRecovering> {
+ explicit ReplicaActive(my_context ctx);
+ void exit();
+
+ using reactions = boost::mpl::list <
+ boost::statechart::custom_reaction<ActMap>,
+ boost::statechart::custom_reaction<MQuery>,
+ boost::statechart::custom_reaction<MInfoRec>,
+ boost::statechart::custom_reaction<MLogRec>,
+ boost::statechart::custom_reaction<Activate>
+ >;
+ boost::statechart::result react(const MInfoRec& infoevt);
+ boost::statechart::result react(const MLogRec& logevt);
+ boost::statechart::result react(const ActMap&);
+ boost::statechart::result react(const MQuery&);
+ boost::statechart::result react(const Activate&);
+};
+
+struct RepNotRecovering : boost::statechart::state<RepNotRecovering, ReplicaActive> {
+ explicit RepNotRecovering(my_context ctx);
+ void exit();
+};
+
+struct Stray : boost::statechart::state<Stray, Started> {
+ explicit Stray(my_context ctx);
+ void exit();
+
+ using reactions = boost::mpl::list <
+ boost::statechart::custom_reaction<MQuery>,
+ boost::statechart::custom_reaction<MLogRec>,
+ boost::statechart::custom_reaction<MInfoRec>,
+ boost::statechart::custom_reaction<ActMap>
+ >;
+ boost::statechart::result react(const MQuery& query);
+ boost::statechart::result react(const MLogRec& logevt);
+ boost::statechart::result react(const MInfoRec& infoevt);
+ boost::statechart::result react(const ActMap&);
+};
+
+struct GetLog;
+struct Down;
+
+struct GotInfo : boost::statechart::event<GotInfo> {};
+struct IsDown : boost::statechart::event<IsDown> {};
+
+struct GetInfo : boost::statechart::state<GetInfo, Peering> {
+ std::set<pg_shard_t> peer_info_requested;
+
+ explicit GetInfo(my_context ctx);
+ void exit();
+ void get_infos();
+
+ using reactions = boost::mpl::list <
+ boost::statechart::transition<GotInfo, GetLog>,
+ boost::statechart::custom_reaction<MNotifyRec>,
+ boost::statechart::transition<IsDown, Down>
+ >;
+ boost::statechart::result react(const MNotifyRec& infoevt);
+};
+
+struct GotLog : boost::statechart::event<GotLog> {};
+
+struct GetLog : boost::statechart::state<GetLog, Peering> {
+ pg_shard_t auth_log_shard;
+ boost::intrusive_ptr<MOSDPGLog> msg;
+
+ explicit GetLog(my_context ctx);
+ void exit();
+
+ using reactions = boost::mpl::list <
+ boost::statechart::custom_reaction<MLogRec>,
+ boost::statechart::custom_reaction<GotLog>,
+ boost::statechart::custom_reaction<AdvMap>
+ >;
+ boost::statechart::result react(const AdvMap&);
+ boost::statechart::result react(const MLogRec& logevt);
+ boost::statechart::result react(const GotLog&);
+};
+
+struct NeedUpThru : boost::statechart::event<NeedUpThru> {};
+struct WaitUpThru;
+
+struct GetMissing : boost::statechart::state<GetMissing, Peering> {
+ explicit GetMissing(my_context ctx);
+ void exit();
+
+ using reactions = boost::mpl::list <
+ boost::statechart::custom_reaction<MLogRec>,
+ boost::statechart::transition<NeedUpThru, WaitUpThru>
+ >;
+ boost::statechart::result react(const MLogRec& logevt);
+};
+
+struct WaitUpThru : boost::statechart::state<WaitUpThru, Peering> {
+ explicit WaitUpThru(my_context ctx);
+ void exit();
+
+ using reactions = boost::mpl::list <
+ boost::statechart::custom_reaction<ActMap>,
+ boost::statechart::custom_reaction<MLogRec>
+ >;
+ boost::statechart::result react(const ActMap& am);
+ boost::statechart::result react(const MLogRec& logrec);
+};
+
+struct Down : boost::statechart::state<Down, Peering> {
+ explicit Down(my_context ctx);
+ using reactions = boost::mpl::list <
+ boost::statechart::custom_reaction<MNotifyRec>
+ >;
+ boost::statechart::result react(const MNotifyRec& infoevt);
+ void exit();
+};
+
+}