From: Greg Farnum Date: Mon, 24 Jun 2019 20:33:21 +0000 (-0700) Subject: elector: Update Elector and ElectionLogic function documentation X-Git-Tag: v15.1.0~1791^2~5 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=521a622ea9357b17800e1847891a7c5d4e66a58a;p=ceph.git elector: Update Elector and ElectionLogic function documentation The world has changed, and now so have the nice docs. Signed-off-by: Greg Farnum --- diff --git a/src/mon/ElectionLogic.h b/src/mon/ElectionLogic.h index f5451194b92..0d60115a6c8 100644 --- a/src/mon/ElectionLogic.h +++ b/src/mon/ElectionLogic.h @@ -21,50 +21,328 @@ class ElectionOwner { public: + /** + * Write down the given epoch in persistent storage, such that it + * can later be retrieved by read_persisted_epoch even across process + * or machine restarts. + * + * @param e The epoch to write + */ virtual void persist_epoch(epoch_t e) = 0; + /** + * Retrieve the most-previously-persisted epoch. + * + * @returns The latest epoch passed to persist_epoch() + */ virtual epoch_t read_persisted_epoch() = 0; + /** + * Validate that the persistent store is working by committing + * to it. (There is no interface for retrieving the value; this + * tests local functionality before doing things like triggering + * elections to try and join a quorum.) + */ virtual void validate_store() = 0; + /** + * Notify the ElectionOwner that ElectionLogic has increased its + * election epoch. This resets an election (either on local loss or victory, + * or when trying a new election round) and the ElectionOwner + * should reset any tracking of its own to match. (The ElectionLogic + * will further trigger sending election messages if that is + * appropriate.) + */ virtual void notify_bump_epoch() = 0; + /** + * Notify the ElectionOwner we must start a new election. + */ virtual void trigger_new_election() = 0; + /** + * Retrieve this Paxos instance's rank. + */ virtual int get_my_rank() = 0; + /** + * Send a PROPOSE message to all our peers. This happens when + * we have started a new election (which may mean attempting to + * override a current one). + * + * @param e The election epoch of our proposal. + */ virtual void propose_to_peers(epoch_t e) = 0; + /** + * The election has failed and we aren't sure what the state of the + * quorum is, so reset the entire system as if from scratch. + */ virtual void reset_election() = 0; + /** + * Ask the ElectionOwner if we-the-Monitor have ever participated in the + * quorum (including across process restarts!). + * + * @returns true if we have participated, false otherwise + */ virtual bool ever_participated() = 0; + /** + * Ask the ElectionOwner for the size of the Paxos set. This includes + * those monitors which may not be in the current quorum! + */ virtual unsigned paxos_size() = 0; + /** + * Tell the ElectionOwner we have started a new election. + * + * The ElectionOwner is responsible for timing out the election (by invoking + * end_election_period()) if it takes too long (as defined by the ElectionOwner). + * This function is the opportunity to do that and to clean up any other external + * election state it may be maintaining. + */ virtual void _start() = 0; + /** + * Tell the ElectionOwner to defer to the identified peer. Tell that peer + * we have deferred to it. + * + * @post we sent an ack message to @p who + */ virtual void _defer_to(int who) = 0; + /** + * We have won an election, so have the ElectionOwner message that to + * our new quorum! + * + * @param quorum The ranks of our peers which deferred to us and + * must be told of our victory + */ virtual void message_victory(const set& quorum) = 0; - virtual bool is_current_member(int ) = 0; + /** + * Query the ElectionOwner about if a given rank is in the + * currently active quorum. + * @param rank the Paxos rank whose status we are checking + * @returns true if the rank is in our current quorum, false otherwise. + */ + virtual bool is_current_member(int rank) = 0; virtual ~ElectionOwner() {} }; +/** + * This class maintains local state for running an election + * between Paxos instances. It receives input requests + * and calls back out to its ElectionOwner to do persistence + * and message other entities. + */ + class ElectionLogic { ElectionOwner *elector; CephContext *cct; + /** + * Latest epoch we've seen. + * + * @remarks if its value is odd, we're electing; if it's even, then we're + * stable. + */ epoch_t epoch = 0; + /** + * Indicates who we have acked + */ int leader_acked; public: + /** + * Indicates if we are participating in the quorum. + * + * @remarks By default, we are created as participating. We may stop + * participating if something explicitly sets our value + * false, though. If that happens, it will + * have to set participating=true and invoke start() for us to resume + * participating in the quorum. + */ bool participating; + /** + * Indicates if we are the ones being elected. + * + * We always attempt to be the one being elected if we are the ones starting + * the election. If we are not the ones that started it, we will only attempt + * to be elected if we think we might have a chance (i.e., the other guy's + * rank is lower than ours). + */ bool electing_me; + /** + * Set containing all those that acked our proposal to become the Leader. + * + * If we are acked by ElectionOwner::paxos_size() peers, we will declare + * victory. + */ set acked_me; ElectionLogic(ElectionOwner *e, CephContext *c) : elector(e), cct(c), leader_acked(-1), participating(true), electing_me(false) {} + /** + * If there are no other peers in this Paxos group, ElectionOwner + * can simply declare victory and we will make it so. + * + * @pre paxos_size() is 1 + * @pre get_my_rank is 0 + */ void declare_standalone_victory(); + /** + * Start a new election by proposing ourselves as the new Leader. + * + * Basically, send propose messages to all the peers. + * + * @pre participating is true + * @post epoch is an odd value + * @post electing_me is true + * @post We have invoked propose_to_peers() on our ElectionOwner + * @post We have invoked _start() on our ElectionOwner + */ void start(); + /** + * ElectionOwner has decided the election has taken too long and expired. + * + * This will happen when no one declared victory or started a new election + * during the allowed time span. + * + * When the election expires, we will check if we were the ones who won, and + * if so we will declare victory. If that is not the case, then we assume + * that the one we deferred to didn't declare victory quickly enough (in fact, + * as far as we know, it may even be dead); so, just propose ourselves as the + * Leader. + */ void end_election_period(); + /** + * Handle a proposal from some other node proposing asking to become + * the Leader. + * + * If the message appears to be old (i.e., its epoch is lower than our epoch), + * then we may take one of two actions: + * + * @li Ignore it because it's nothing more than an old proposal + * @li Start new elections if we verify that it was sent by a monitor from + * outside the quorum; given its old state, it's fair to assume it just + * started, so we should start new elections so it may rejoin + * + * If we did not ignore the received message, then we know that this message + * was sent by some other node proposing itself to become the Leader. So, we + * will take one of the following actions: + * + * @li Ignore it because we already acked another node with higher rank + * @li Ignore it and start a new election because we outrank it + * @li Defer to it because it outranks us and the node we previously + * acked, if any + * + * @pre Message epoch is from the current or a newer epoch + * @param mepoch The epoch of the proposal + * @param from The rank proposing itself as leader + */ void receive_propose(epoch_t mepoch, int from); + /** + * Handle a message from some other participant Acking us as the Leader. + * + * When we receive such a message, one of three thing may be happening: + * @li We received a message with a newer epoch, which means we must have + * somehow lost track of what was going on (maybe we rebooted), thus we + * will start a new election + * @li We consider ourselves in the run for the Leader (i.e., @p electing_me + * is true), and we are actually being Acked by someone; thus simply add + * the one acking us to the @p acked_me set. If we do now have acks from + * all the participants, then we can declare victory + * @li We already deferred the election to somebody else, so we will just + * ignore this message + * + * @pre Message epoch is from the current or a newer epoch + * @post Election is on-going if we deferred to somebody else + * @post Election is on-going if we are still waiting for further Acks + * @post Election is not on-going if we are victorious + * @post Election is not on-going if we must start a new one + * + * @param from The rank which acked us + * @param from_epoch The election epoch the ack belongs to + */ void receive_ack(int from, epoch_t from_epoch); + /** + * Handle a message from some other participant declaring Victory. + * + * We just got a message from someone declaring themselves Victorious, thus + * the new Leader. + * + * However, if the message's epoch happens to be different from our epoch+1, + * then it means we lost track of something and we must start a new election. + * + * If that is not the case, then we will simply update our epoch to the one + * in the message and invoke start() to reset the quorum. + * + * @pre from_epoch is the current or a newer epoch + * @post Election is not on-going + * @post Updated @p epoch + * @post We are a peon in a new quorum if we lost the election + * + * @param from The victory-claiming rank + * @param from_epoch The election epoch in which they claim victory + */ bool receive_victory_claim(int from, epoch_t from_epoch); + /** + * Obtain our epoch + * + * @returns Our current epoch number + */ epoch_t get_epoch() { return epoch; } private: + /** + * Initiate the ElectionLogic class. + * + * Basically, we will simply read whatever epoch value we have in our stable + * storage, or consider it to be 1 if none is read. + * + * @post @p epoch is set to 1 or higher. + */ void init(); + /** + * Update our epoch. + * + * If we come across a higher epoch, we simply update ours, also making + * sure we are no longer being elected (even though we could have been, + * we no longer are since we no longer are on that old epoch). + * + * @pre Our epoch is not larger than @p e + * @post Our epoch equals @p e + * + * @param e Epoch to which we will update our epoch + */ void bump_epoch(epoch_t e); + /** + * Defer the current election to some other monitor. + * + * This means that we will ack some other monitor and drop out from the run + * to become the Leader. We will only defer an election if the monitor we + * are deferring to outranks us. + * + * @pre @p who outranks us (i.e., who < our rank) + * @pre @p who outranks any other monitor we have deferred to in the past + * @post electing_me is false + * @post leader_acked equals @p who + * @post we triggered ElectionOwner's _defer_to() on @p who + * + * @param who Some other monitor's numeric identifier. + */ void defer(int who); + /** + * Declare Victory. + * + * We won. Or at least we believe we won, but for all intents and purposes + * that does not matter. What matters is that we Won. + * + * That said, we must now bump our epoch to reflect that the election is over + * and then we must let everybody in the quorum know we are their brand new + * Leader. + * + * Actually, the quorum will be now defined as the group of monitors that + * acked us during the election process. + * + * @pre Election is on-going + * @pre electing_me is true + * @post electing_me is false + * @post epoch is bumped up into an even value + * @post Election is not on-going + * @post We have a quorum, composed of the monitors that acked us + * @post We invoked message_victory() on the ElectionOwner + */ void declare_victory(); }; diff --git a/src/mon/Elector.h b/src/mon/Elector.h index e6c39d35d6d..b2762dd3bfb 100644 --- a/src/mon/Elector.h +++ b/src/mon/Elector.h @@ -27,7 +27,8 @@ class Monitor; /** - * This class is responsible for maintaining the local state when electing + * This class is responsible for handling messages and maintaining + * an ElectionLogic which holds the local state when electing * a new Leader. We may win or we may lose. If we win, it means we became the * Leader; if we lose, it means we are a Peon. */ @@ -90,57 +91,16 @@ class Elector : public ElectionOwner { */ void cancel_timer(); - /** - * Latest epoch we've seen. - * - * @remarks if its value is odd, we're electing; if it's even, then we're - * stable. - */ - //epoch_t epoch; - - /** - * Indicates if we are participating in the quorum. - * - * @remarks By default, we are created as participating. We may stop - * participating if the Monitor explicitly calls - * Elector::stop_participating though. If that happens, it will - * have to call Elector::start_participating for us to resume - * participating in the quorum. - */ - // bool participating; - // electing me /** * @defgroup Elector_h_electing_me_vars We are being elected * @{ */ /** - * Indicates if we are the ones being elected. - * - * We always attempt to be the one being elected if we are the ones starting - * the election. If we are not the ones that started it, we will only attempt - * to be elected if we think we might have a chance (i.e., the other guy's - * rank is lower than ours). - */ - // bool electing_me; - /** - * Set containing all those that acked our proposal to become the Leader. - * - * If we are acked by everyone in the MonMap, we will declare - * victory. Also note each peer's feature set. + * Map containing info of all those that acked our proposal to become the Leader. + * Note each peer's info. */ map peer_info; - /** - * @} - */ - /** - * @defgroup Elector_h_electing_them_vars We are electing another guy - * @{ - */ - /** - * Indicates who we have acked - */ - // int leader_acked; /** * @} */ @@ -149,48 +109,24 @@ class Elector : public ElectionOwner { * Handle a message from some other node proposing itself to become it * the Leader. * - * If the message appears to be old (i.e., its epoch is lower than our epoch), - * then we may take one of two actions: - * - * @li Ignore it because it's nothing more than an old proposal - * @li Start new elections if we verify that it was sent by a monitor from - * outside the quorum; given its old state, it's fair to assume it just - * started, so we should start new elections so it may rejoin - * - * If we did not ignore the received message, then we know that this message - * was sent by some other node proposing itself to become the Leader. So, we - * will take one of the following actions: - * - * @li Ignore it because we already acked another node with higher rank - * @li Ignore it and start a new election because we outrank it - * @li Defer to it because it outranks us and the node we previously - * acked, if any - * + * We validate that the sending Monitor is allowed to participate based on + * its supported features, then pass the request to our ElectionLogic. * * @invariant The received message is an operation of type OP_PROPOSE * + * @pre Message epoch is from the current or a newer epoch + * * @param m A message sent by another participant in the quorum. */ void handle_propose(MonOpRequestRef op); /** * Handle a message from some other participant Acking us as the Leader. * - * When we receive such a message, one of three thing may be happening: - * @li We received a message with a newer epoch, which means we must have - * somehow lost track of what was going on (maybe we rebooted), thus we - * will start a new election - * @li We consider ourselves in the run for the Leader (i.e., @p electing_me - * is true), and we are actually being Acked by someone; thus simply add - * the one acking us to the @p acked_me set. If we do now have acks from - * all the participants, then we can declare victory - * @li We already deferred the election to somebody else, so we will just - * ignore this message - * - * @pre Election is on-going - * @post Election is on-going if we deferred to somebody else - * @post Election is on-going if we are still waiting for further Acks - * @post Election is not on-going if we are victorious - * @post Election is not on-going if we must start a new one + * We validate that the sending Monitor is allowed to participate based on + * its supported features, add it to peer_info, and pass the ack to our + * ElectionLogic. + * + * @pre Message epoch is from the current or a newer epoch * * @param m A message with an operation type of OP_ACK */ @@ -201,14 +137,11 @@ class Elector : public ElectionOwner { * We just got a message from someone declaring themselves Victorious, thus * the new Leader. * - * However, if the message's epoch happens to be different from our epoch+1, - * then it means we lost track of something and we must start a new election. - * - * If that is not the case, then we will simply update our epoch to the one - * in the message, cancel our @p expire_event timer and inform our Monitor - * that we lost the election and provide it with the new quorum. + * We pass the Victory to our ElectionLogic, and if it confirms the + * victory we lose the election and start following this Leader. Otherwise, + * drop the message. * - * @pre Election in on-going + * @pre Message epoch is from the current or a newer epoch * @post Election is not on-going * @post Updated @p epoch * @post We have a new quorum if we lost the election @@ -244,44 +177,41 @@ class Elector : public ElectionOwner { public: /** - * Update our epoch. - * - * If we come across a higher epoch, we simply update ours, also making - * sure we are no longer being elected (even though we could have been, - * we no longer are since we no longer are on that old epoch). - * - * @pre Our epoch is lower than @p e - * @post Our epoch equals @p e - * - * @param e Epoch to which we will update our epoch + * @defgroup Elector_h_ElectionOwner Functions from the ElectionOwner interface + * @{ */ + /* Commit the given epoch to our MonStore */ + void persist_epoch(epoch_t e); + /* Read the epoch out of our MonStore */ + epoch_t read_persisted_epoch(); + /* Write a nonsense key "election_writeable_test" to our MonStore */ + void validate_store(); + /* Reset my tracking. Currently, just call Monitor::join_election() */ void notify_bump_epoch(); - + /* Call a new election: Invoke Monitor::start_election() */ + void trigger_new_election(); + /* Retrieve rank from the Monitor */ + int get_my_rank(); + /* Send MMonElection OP_PROPOSE to every monitor in the map. */ + void propose_to_peers(epoch_t e); + /* bootstrap() the Monitor */ + void reset_election(); + /* Retrieve the Monitor::has_ever_joined member */ + bool ever_participated(); + /* Retrieve monmap->size() */ + unsigned paxos_size(); /** - * Start new elections by proposing ourselves as the new Leader. + * Reset the expire_event timer so we can limit the amount of time we + * will be electing. Clean up our peer_info. * - * Basically, send propose messages to all the monitors in the MonMap and - * then reset the expire_event timer so we can limit the amount of time we - * will be going at it. - * - * @pre participating is true - * @post epoch is an odd value - * @post electing_me is true - * @post we sent propose messages to all the monitors in the MonMap * @post we reset the expire_event timer */ void _start(); /** - * Defer the current election to some other monitor. - * - * This means that we will ack some other monitor and drop out from the run - * to become the Leader. We will only defer an election if the monitor we - * are deferring to outranks us. + * Send an MMonElection message deferring to the identified monitor. We + * also increase the election timeout so the monitor we defer to + * has some time to gather deferrals and actually win. (FIXME: necessary to protocol?) * - * @pre @p who outranks us (i.e., who < our rank) - * @pre @p who outranks any other monitor we have deferred to in the past - * @post electing_me is false - * @post leader_acked equals @p who * @post we sent an ack message to @p who * @post we reset the expire_event timer * @@ -289,53 +219,17 @@ class Elector : public ElectionOwner { */ void _defer_to(int who); /** - * The election has taken too long and has expired. - * - * This will happen when no one declared victory or started a new election - * during the time span allowed by the expire_event timer. - * - * When the election expires, we will check if we were the ones who won, and - * if so we will declare victory. If that is not the case, then we assume - * that the one we deferred to didn't declare victory quickly enough (in fact, - * as far as we know, we may even be dead); so, just propose ourselves as the - * Leader. - */ - // void expire(); - /** - * Declare Victory. - * - * We won. Or at least we believe we won, but for all intentions and purposes - * that does not matter. What matters is that we Won. - * - * That said, we must now bump our epoch to reflect that the election is over - * and then we must let everybody in the quorum know we are their brand new - * Leader. And we will also cancel our expire_event timer. - * - * Actually, the quorum will be now defined as the group of monitors that - * acked us during the election process. - * - * @pre Election is on-going - * @pre electing_me is true - * @post electing_me is false - * @post epoch is bumped up into an even value - * @post Election is not on-going - * @post We have a quorum, composed of the monitors that acked us - * @post We sent a message of type OP_VICTORY to each quorum member. + * Our ElectionLogic told us we won an election! Identify the quorum + * features, tell our new peons we've won, and invoke Monitor::win_election(). */ void message_victory(const set& quorum); - - void persist_epoch(epoch_t e); - epoch_t read_persisted_epoch(); - void validate_store(); - void trigger_new_election(); - int get_my_rank(); - void propose_to_peers(epoch_t e); - void reset_election(); - bool ever_participated(); - unsigned paxos_size(); + /* Check if rank is in mon->quorum */ + bool is_current_member(int rank); + /* + * @} + */ Elector *elector; - bool is_current_member(int rank); /** * Create an Elector class @@ -345,15 +239,6 @@ class Elector : public ElectionOwner { explicit Elector(Monitor *m); virtual ~Elector() {} - /** - * Initiate the Elector class. - * - * Basically, we will simply read whatever epoch value we have in our stable - * storage, or consider it to be 1 if none is read. - * - * @post @p epoch is set to 1 or higher. - */ - // void init(); /** * Inform this class it is supposed to shutdown. * @@ -364,12 +249,16 @@ class Elector : public ElectionOwner { void shutdown(); /** - * Obtain our epoch + * Obtain our epoch from ElectionLogic. * * @returns Our current epoch number */ epoch_t get_epoch() { return logic.get_epoch(); } + /** + * If the Monitor knows there are no Paxos peers (so + * we are rank 0 and there are no others) we can declare victory. + */ void declare_standalone_victory() { logic.declare_standalone_victory(); } @@ -389,7 +278,7 @@ class Elector : public ElectionOwner { /** * Call an election. * - * This function simply calls Elector::start. + * This function simply calls ElectionLogic::start. */ void call_election() { logic.start();