From f3f05b996237fe82d8bea28c42c8baef0b5bf943 Mon Sep 17 00:00:00 2001 From: Venky Shankar Date: Mon, 4 Dec 2017 01:29:48 -0500 Subject: [PATCH] rbd-mirror: rollback state transitions in image policy Introduce an error path in state transitions for reinitiating image mapping possibly to a new peer. Also, a minor cleanup in remove_instances() to check for pending actions if any (rather than scanning the actions list for possible remove action). Signed-off-by: Venky Shankar --- src/test/rbd_mirror/test_mock_ImageMap.cc | 36 +++--- src/tools/rbd_mirror/ImageMap.cc | 8 +- src/tools/rbd_mirror/ImageMap.h | 19 +++ src/tools/rbd_mirror/image_map/Action.cc | 10 +- src/tools/rbd_mirror/image_map/Action.h | 2 +- src/tools/rbd_mirror/image_map/Policy.cc | 115 +++++++++--------- src/tools/rbd_mirror/image_map/Policy.h | 24 +++- .../rbd_mirror/image_map/StateTransition.cc | 18 +-- .../rbd_mirror/image_map/StateTransition.h | 20 ++- 9 files changed, 147 insertions(+), 105 deletions(-) diff --git a/src/test/rbd_mirror/test_mock_ImageMap.cc b/src/test/rbd_mirror/test_mock_ImageMap.cc index 2514c686b4a..090fb94f74c 100644 --- a/src/test/rbd_mirror/test_mock_ImageMap.cc +++ b/src/test/rbd_mirror/test_mock_ImageMap.cc @@ -1029,7 +1029,7 @@ TEST_F(TestMockImageMap, RemoveInstanceWithRemoveImage) { ASSERT_EQ(0, when_shut_down(mock_image_map.get())); } -TEST_F(TestMockImageMap, AddErrorWithRetryAndResume) { +TEST_F(TestMockImageMap, AddErrorAndRemoveImage) { MockThreads mock_threads(m_threads); expect_work_queue(mock_threads); @@ -1052,11 +1052,6 @@ TEST_F(TestMockImageMap, AddErrorWithRetryAndResume) { }; std::set global_image_ids_ack(global_image_ids); - std::set remove_global_image_ids{ - "global id 1" - }; - std::set remove_global_image_ids_ack(remove_global_image_ids); - // UPDATE_MAPPING+ACQUIRE expect_add_event(mock_threads); MockUpdateRequest mock_update_request; @@ -1088,22 +1083,29 @@ TEST_F(TestMockImageMap, AddErrorWithRetryAndResume) { mock_listener, shuffled_global_image_ids, 0); remote_peer_ack_listener_wait(mock_image_map.get(), shuffled_global_image_ids, 0); + mock_image_map->update_instances_removed({"9876"}); + // instance blacklisted -- ACQUIRE request fails - remote_peer_ack_nowait(mock_image_map.get(), shuffled_global_image_ids, -EBLACKLISTED); + update_map_and_acquire(mock_threads, mock_update_request, + mock_listener, shuffled_global_image_ids, 0); + remote_peer_ack_listener_wait(mock_image_map.get(), shuffled_global_image_ids, -EBLACKLISTED); - expect_add_event(mock_threads); - expect_update_request(mock_update_request, 0); - expect_add_event(mock_threads); - listener_acquire_images(mock_listener, shuffled_global_image_ids); + // new peer acks acquire request + remote_peer_ack_nowait(mock_image_map.get(), shuffled_global_image_ids, 0); + wait_for_scheduled_task(); - // remove the dead instance - mock_image_map->update_instances_removed({"9876"}); + std::set shuffled_global_image_ids_ack(shuffled_global_image_ids); - ASSERT_TRUE(wait_for_map_update(1)); - ASSERT_TRUE(wait_for_listener_notify(shuffled_global_image_ids.size())); + // remove image + listener_remove_images(mock_listener, "uuid1", shuffled_global_image_ids); + expect_add_event(mock_threads); + listener_release_images(mock_listener, shuffled_global_image_ids); + update_map_request(mock_threads, mock_update_request, shuffled_global_image_ids, 0); - // remote peer ACKs image acquire request - remote_peer_ack_nowait(mock_image_map.get(), shuffled_global_image_ids, 0); + mock_image_map->update_images("uuid1", {}, std::move(shuffled_global_image_ids)); + ASSERT_TRUE(wait_for_listener_notify(shuffled_global_image_ids_ack.size() * 2)); + + remote_peer_ack_wait(mock_image_map.get(), shuffled_global_image_ids_ack, 0); wait_for_scheduled_task(); ASSERT_EQ(0, when_shut_down(mock_image_map.get())); diff --git a/src/tools/rbd_mirror/ImageMap.cc b/src/tools/rbd_mirror/ImageMap.cc index 48f34cf3dcf..4e5b851360f 100644 --- a/src/tools/rbd_mirror/ImageMap.cc +++ b/src/tools/rbd_mirror/ImageMap.cc @@ -324,9 +324,7 @@ void ImageMap::schedule_add_action(const std::string &global_image_id) { // in order of state-machine execution, so its easier to follow Context *on_update = new C_UpdateMap(this, global_image_id); - Context *on_acquire = new FunctionContext([this, global_image_id](int r) { - queue_acquire_image(global_image_id); - }); + Context *on_acquire = new C_AcquireImage(this, global_image_id); Context *on_finish = new FunctionContext([this, global_image_id](int r) { handle_add_action(global_image_id, r); }); @@ -365,9 +363,7 @@ void ImageMap::schedule_shuffle_action(const std::string &global_image_id) { queue_release_image(global_image_id); }); Context *on_update = new C_UpdateMap(this, global_image_id); - Context *on_acquire = new FunctionContext([this, global_image_id](int r) { - queue_acquire_image(global_image_id); - }); + Context *on_acquire = new C_AcquireImage(this, global_image_id); Context *on_finish = new FunctionContext([this, global_image_id](int r) { handle_shuffle_action(global_image_id, r); }); diff --git a/src/tools/rbd_mirror/ImageMap.h b/src/tools/rbd_mirror/ImageMap.h index b1fb891c8e3..a10b8406051 100644 --- a/src/tools/rbd_mirror/ImageMap.h +++ b/src/tools/rbd_mirror/ImageMap.h @@ -155,6 +155,25 @@ private: } }; + struct C_AcquireImage : Context { + ImageMap *image_map; + std::string global_image_id; + + C_AcquireImage(ImageMap *image_map, const std::string &global_image_id) + : image_map(image_map), + global_image_id(global_image_id) { + } + + void finish(int r) override { + image_map->queue_acquire_image(global_image_id); + } + + // maybe called more than once + void complete(int r) override { + finish(r); + } + }; + // async op-tracker helper routines void start_async_op() { m_async_op_tracker.start_op(); diff --git a/src/tools/rbd_mirror/image_map/Action.cc b/src/tools/rbd_mirror/image_map/Action.cc index 8fd19e8ef87..12055d06198 100644 --- a/src/tools/rbd_mirror/image_map/Action.cc +++ b/src/tools/rbd_mirror/image_map/Action.cc @@ -58,16 +58,10 @@ void Action::execute_state_callback(StateTransition::State state) { } } -void Action::state_callback_complete(StateTransition::State state, bool delete_context) { - Context *on_state = nullptr; - +void Action::state_callback_complete(StateTransition::State state) { auto it = context_map.find(state); if (it != context_map.end()) { - std::swap(it->second, on_state); - } - - if (on_state && delete_context) { - delete on_state; + it->second = nullptr; } } diff --git a/src/tools/rbd_mirror/image_map/Action.h b/src/tools/rbd_mirror/image_map/Action.h index ba9c2c29763..6414f66517a 100644 --- a/src/tools/rbd_mirror/image_map/Action.h +++ b/src/tools/rbd_mirror/image_map/Action.h @@ -21,7 +21,7 @@ public: Context *on_finish); void execute_state_callback(StateTransition::State state); - void state_callback_complete(StateTransition::State state, bool delete_context); + void state_callback_complete(StateTransition::State state); void execute_completion_callback(int r); StateTransition::ActionType get_action_type() const; diff --git a/src/tools/rbd_mirror/image_map/Policy.cc b/src/tools/rbd_mirror/image_map/Policy.cc index c4b5b9bd41b..b809324918f 100644 --- a/src/tools/rbd_mirror/image_map/Policy.cc +++ b/src/tools/rbd_mirror/image_map/Policy.cc @@ -121,7 +121,7 @@ void Policy::remove_instances(const std::vector &instance_ids, for (auto const &instance : instance_ids) { dout(10) << ": removing instance_id=" << instance << dendl; for (auto const &global_image_id : m_map[instance]) { - if (!remove_pending(global_image_id)) { + if (!actions_pending(global_image_id, m_map_lock)) { remap_global_image_ids->emplace(global_image_id); } } @@ -146,14 +146,16 @@ void Policy::start_next_action(const std::string &global_image_id) { StateTransition::ActionType action_type = action.get_action_type(); action_state.transition = StateTransition::transit(action_type, action_state.current_state); + StateTransition::State next_state = action_state.transition.next_state; + dout(10) << ": global_image_id=" << global_image_id << ", action=" << action - << ", currrent_state=" << action_state.current_state << ", next_state=" - << action_state.transition.next_state << dendl; + << ", current_state=" << action_state.current_state << ", next_state=" + << next_state << dendl; // invoke state context callback - pre_execute_state_callback(global_image_id, action_type, action_state.transition.next_state); + pre_execute_state_callback(global_image_id, action_type, next_state); m_map_lock.put_write(); - action.execute_state_callback(action_state.transition.next_state); + action.execute_state_callback(next_state); m_map_lock.get_write(); } @@ -171,9 +173,8 @@ bool Policy::finish_action(const std::string &global_image_id, int r) { Action &action = action_state.actions.front(); bool complete; - if (r == 0) { - post_execute_state_callback(global_image_id, action_state.transition.next_state); - complete = perform_transition(&action_state, &action); + if (can_transit(action_state, r)) { + complete = perform_transition(global_image_id, &action_state, &action, r != 0); } else { complete = abort_or_retry(&action_state, &action); } @@ -204,47 +205,60 @@ bool Policy::queue_action(const std::string &global_image_id, const Action &acti return it->second.actions.size() == 1; } -bool Policy::is_transition_complete(StateTransition::ActionType action_type, StateTransition::State *state) { - assert(m_map_lock.is_locked()); - - dout(10) << ": action_type=" << action_type << ", state=" << *state << dendl; +void Policy::rollback(ActionState *action_state) { + dout(20) << dendl; + assert(m_map_lock.is_wlocked()); - bool complete = false; - switch (action_type) { - case StateTransition::ACTION_TYPE_ADD: - case StateTransition::ACTION_TYPE_SHUFFLE: - complete = *state == StateTransition::STATE_ASSOCIATED; - break; - case StateTransition::ACTION_TYPE_REMOVE: - if (*state == StateTransition::STATE_REMOVE_MAPPING) { - complete = true; - *state = StateTransition::STATE_UNASSIGNED; - } - break; - default: - derr << "UNKNOWN (" << static_cast(action_type) << ")" << dendl; - assert(false); - } + assert(action_state->transition.error_state); + StateTransition::State state = action_state->transition.error_state.get(); - return complete; + dout(10) << ": rolling back state=" << action_state->current_state << " -> " + << state << dendl; + action_state->current_state = state; } -bool Policy::perform_transition(ActionState *action_state, Action *action) { +bool Policy::advance(const std::string &global_image_id, + ActionState *action_state, Action *action) { dout(20) << dendl; assert(m_map_lock.is_wlocked()); StateTransition::State state = action_state->transition.next_state; - // delete context based on retry_on_error flag - action->state_callback_complete(state, action_state->transition.retry_on_error); + if (!is_state_retriable(state)) { + action->state_callback_complete(state); + } - bool complete = is_transition_complete(action->get_action_type(), &state); - dout(10) << ": advancing state: " << action_state->current_state << " -> " - << state << dendl; + post_execute_state_callback(global_image_id, state); + bool reached_final_state = false; + if (action_state->transition.final_state) { + reached_final_state = true; + state = action_state->transition.final_state.get(); + assert(is_idle_state(state)); + } + + dout(10) << ": advancing state=" << action_state->current_state << " -> " + << state << dendl; action_state->current_state = state; - if (is_idle_state(state)) { - action_state->last_idle_state = state; - dout(10) << ": tranisition reached idle state=" << state << dendl; + + return reached_final_state; +} + +bool Policy::perform_transition(const std::string &global_image_id, + ActionState *action_state, Action *action, bool transition_error) { + dout(20) << dendl; + assert(m_map_lock.is_wlocked()); + + bool complete = false; + if (transition_error) { + rollback(action_state); + } else { + complete = advance(global_image_id, action_state, action); + } + + if (is_idle_state(action_state->current_state)) { + action_state->last_idle_state = action_state->current_state; + dout(10) << ": transition reached idle state=" << action_state->current_state + << dendl; } return complete; @@ -254,10 +268,13 @@ bool Policy::abort_or_retry(ActionState *action_state, Action *action) { dout(20) << dendl; assert(m_map_lock.is_wlocked()); - bool complete = !action_state->transition.retry_on_error; + StateTransition::State state = action_state->transition.next_state; + bool complete = !is_state_retriable(state); + if (complete) { - // we aborted, so the context need not be freed - action->state_callback_complete(action_state->transition.next_state, false); + // we aborted, so the context need not be freed later + action->state_callback_complete(state); + if (action_state->last_idle_state) { dout(10) << ": using last idle state=" << action_state->last_idle_state.get() << " as current state" << dendl; @@ -304,8 +321,8 @@ void Policy::post_execute_state_callback(const std::string &global_image_id, Sta case StateTransition::STATE_UPDATE_MAPPING: case StateTransition::STATE_REMOVE_MAPPING: break; - case StateTransition::STATE_UNASSIGNED: default: + case StateTransition::STATE_UNASSIGNED: assert(false); } } @@ -320,20 +337,6 @@ bool Policy::actions_pending(const std::string &global_image_id, const RWLock &l return !it->second.actions.empty(); } -bool Policy::remove_pending(const std::string &global_image_id) { - dout(20) << ": global_image_id=" << global_image_id << dendl; - assert(m_map_lock.is_locked()); - - auto it = m_actions.find(global_image_id); - assert(it != m_actions.end()); - - auto r_it = std::find_if(it->second.actions.rbegin(), it->second.actions.rend(), - [](const Action &action) { - return (action.get_action_type() == StateTransition::ACTION_TYPE_REMOVE); - }); - return r_it != it->second.actions.rend(); -} - Policy::LookupInfo Policy::lookup(const std::string &global_image_id, const RWLock &lock) { assert(m_map_lock.is_locked()); diff --git a/src/tools/rbd_mirror/image_map/Policy.h b/src/tools/rbd_mirror/image_map/Policy.h index 6a715a6363a..a4467fac4a9 100644 --- a/src/tools/rbd_mirror/image_map/Policy.h +++ b/src/tools/rbd_mirror/image_map/Policy.h @@ -71,6 +71,19 @@ private: utime_t map_time; // (re)mapped time }; + // for the lack of a better function name + bool is_state_retriable(StateTransition::State state) { + return state == StateTransition::STATE_UPDATE_MAPPING || + state == StateTransition::STATE_REMOVE_MAPPING || + state == StateTransition::STATE_ASSOCIATED; + } + // can the state machine transit advance (on success) or rollback + // (on failure). + bool can_transit(const ActionState &action_state, int r) { + assert(m_map_lock.is_locked()); + return r == 0 || action_state.transition.error_state; + } + void set_image_mapped_timestamp(const std::string &global_image_id, utime_t time) { assert(m_map_lock.is_wlocked()); @@ -91,7 +104,8 @@ private: std::set m_dead_instances; bool is_idle_state(StateTransition::State state) { - if (state == StateTransition::STATE_ASSOCIATED || + if (state == StateTransition::STATE_UNASSIGNED || + state == StateTransition::STATE_ASSOCIATED || state == StateTransition::STATE_DISASSOCIATED) { return true; } @@ -115,7 +129,6 @@ private: bool queue_action(const std::string &global_image_id, const Action &action); bool actions_pending(const std::string &global_image_id, const RWLock &lock); - bool remove_pending(const std::string &glolbal_image_id); LookupInfo lookup(const std::string &global_image_id, const RWLock &lock); void map(const std::string &global_image_id, @@ -132,8 +145,11 @@ private: StateTransition::ActionType action_type, StateTransition::State state); void post_execute_state_callback(const std::string &global_image_id, StateTransition::State state); - bool is_transition_complete(StateTransition::ActionType action_type, StateTransition::State *state); - bool perform_transition(ActionState *action_state, Action *action); + void rollback(ActionState *action_state); + bool advance(const std::string &global_image_id, ActionState *action_state, Action *action); + + bool perform_transition(const std::string &global_image_id, ActionState *action_state, + Action *action, bool transition_error); bool abort_or_retry(ActionState *action_state, Action *action); protected: diff --git a/src/tools/rbd_mirror/image_map/StateTransition.cc b/src/tools/rbd_mirror/image_map/StateTransition.cc index 4d7cfd9d773..b075e7f9a46 100644 --- a/src/tools/rbd_mirror/image_map/StateTransition.cc +++ b/src/tools/rbd_mirror/image_map/StateTransition.cc @@ -55,17 +55,17 @@ std::ostream &operator<<(std::ostream &os, const StateTransition::State &state) const StateTransition::TransitionTable StateTransition::transition_table[] = { // action_type current_state Transition // ------------------------------------------------------------------------------- - ACTION_TYPE_ADD, STATE_UNASSIGNED, Transition(STATE_UPDATE_MAPPING, true), - ACTION_TYPE_ADD, STATE_ASSOCIATED, Transition(STATE_ASSOCIATED, false), - ACTION_TYPE_ADD, STATE_DISASSOCIATED, Transition(STATE_UPDATE_MAPPING, true), - ACTION_TYPE_ADD, STATE_UPDATE_MAPPING, Transition(STATE_ASSOCIATED, false), + ACTION_TYPE_ADD, STATE_UNASSIGNED, Transition(STATE_UPDATE_MAPPING), + ACTION_TYPE_ADD, STATE_UPDATE_MAPPING, Transition(STATE_ASSOCIATED, STATE_ASSOCIATED, + STATE_UNASSIGNED), - ACTION_TYPE_REMOVE, STATE_ASSOCIATED, Transition(STATE_DISASSOCIATED, false), - ACTION_TYPE_REMOVE, STATE_DISASSOCIATED, Transition(STATE_REMOVE_MAPPING, true), + ACTION_TYPE_REMOVE, STATE_ASSOCIATED, Transition(STATE_DISASSOCIATED), + ACTION_TYPE_REMOVE, STATE_DISASSOCIATED, Transition(STATE_REMOVE_MAPPING, STATE_UNASSIGNED), - ACTION_TYPE_SHUFFLE, STATE_ASSOCIATED, Transition(STATE_DISASSOCIATED, false), - ACTION_TYPE_SHUFFLE, STATE_DISASSOCIATED, Transition(STATE_UPDATE_MAPPING, true), - ACTION_TYPE_SHUFFLE, STATE_UPDATE_MAPPING, Transition(STATE_ASSOCIATED, false), + ACTION_TYPE_SHUFFLE, STATE_ASSOCIATED, Transition(STATE_DISASSOCIATED), + ACTION_TYPE_SHUFFLE, STATE_DISASSOCIATED, Transition(STATE_UPDATE_MAPPING), + ACTION_TYPE_SHUFFLE, STATE_UPDATE_MAPPING, Transition(STATE_ASSOCIATED, STATE_ASSOCIATED, + STATE_DISASSOCIATED), }; const StateTransition::Transition &StateTransition::transit(ActionType action_type, State state) { diff --git a/src/tools/rbd_mirror/image_map/StateTransition.h b/src/tools/rbd_mirror/image_map/StateTransition.h index c9f6ee544e5..7cc9db051ea 100644 --- a/src/tools/rbd_mirror/image_map/StateTransition.h +++ b/src/tools/rbd_mirror/image_map/StateTransition.h @@ -4,6 +4,8 @@ #ifndef CEPH_RBD_MIRROR_IMAGE_MAP_STATE_TRANSITION_H #define CEPH_RBD_MIRROR_IMAGE_MAP_STATE_TRANSITION_H +#include + namespace rbd { namespace mirror { namespace image_map { @@ -27,13 +29,23 @@ public: struct Transition { State next_state; - bool retry_on_error; + boost::optional final_state; + boost::optional error_state; - Transition() { + Transition() + : Transition(STATE_UNASSIGNED, boost::none, boost::none) { + } + Transition(State next_state) + : Transition(next_state, boost::none, boost::none) { + } + Transition(State next_state, State final_state) + : Transition(next_state, final_state, boost::none) { } - Transition(State next_state, bool retry_on_error) + Transition(State next_state, boost::optional final_state, + boost::optional error_state) : next_state(next_state), - retry_on_error(retry_on_error) { + final_state(final_state), + error_state(error_state) { } }; -- 2.39.5