]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
rbd-mirror: rollback state transitions in image policy
authorVenky Shankar <vshankar@redhat.com>
Mon, 4 Dec 2017 06:29:48 +0000 (01:29 -0500)
committerVenky Shankar <vshankar@redhat.com>
Thu, 11 Jan 2018 15:47:40 +0000 (10:47 -0500)
Introduce an error path in state transitions for reinitiating
image mapping possibly to a new peer. Also, a minor cleanup
in remove_instances() to check for pending actions if any
(rather than scanning the actions list for possible remove
action).

Signed-off-by: Venky Shankar <vshankar@redhat.com>
src/test/rbd_mirror/test_mock_ImageMap.cc
src/tools/rbd_mirror/ImageMap.cc
src/tools/rbd_mirror/ImageMap.h
src/tools/rbd_mirror/image_map/Action.cc
src/tools/rbd_mirror/image_map/Action.h
src/tools/rbd_mirror/image_map/Policy.cc
src/tools/rbd_mirror/image_map/Policy.h
src/tools/rbd_mirror/image_map/StateTransition.cc
src/tools/rbd_mirror/image_map/StateTransition.h

index 2514c686b4ace3fc7f8620cc1e83950caf759b81..090fb94f74c226752f9593152a68d60df92f2845 100644 (file)
@@ -1029,7 +1029,7 @@ TEST_F(TestMockImageMap, RemoveInstanceWithRemoveImage) {
   ASSERT_EQ(0, when_shut_down(mock_image_map.get()));
 }
 
-TEST_F(TestMockImageMap, AddErrorWithRetryAndResume) {
+TEST_F(TestMockImageMap, AddErrorAndRemoveImage) {
   MockThreads mock_threads(m_threads);
   expect_work_queue(mock_threads);
 
@@ -1052,11 +1052,6 @@ TEST_F(TestMockImageMap, AddErrorWithRetryAndResume) {
   };
   std::set<std::string> global_image_ids_ack(global_image_ids);
 
-  std::set<std::string> remove_global_image_ids{
-    "global id 1"
-  };
-  std::set<std::string> remove_global_image_ids_ack(remove_global_image_ids);
-
   // UPDATE_MAPPING+ACQUIRE
   expect_add_event(mock_threads);
   MockUpdateRequest mock_update_request;
@@ -1088,22 +1083,29 @@ TEST_F(TestMockImageMap, AddErrorWithRetryAndResume) {
                          mock_listener, shuffled_global_image_ids, 0);
   remote_peer_ack_listener_wait(mock_image_map.get(), shuffled_global_image_ids, 0);
 
+  mock_image_map->update_instances_removed({"9876"});
+
   // instance blacklisted -- ACQUIRE request fails
-  remote_peer_ack_nowait(mock_image_map.get(), shuffled_global_image_ids, -EBLACKLISTED);
+  update_map_and_acquire(mock_threads, mock_update_request,
+                         mock_listener, shuffled_global_image_ids, 0);
+  remote_peer_ack_listener_wait(mock_image_map.get(), shuffled_global_image_ids, -EBLACKLISTED);
 
-  expect_add_event(mock_threads);
-  expect_update_request(mock_update_request, 0);
-  expect_add_event(mock_threads);
-  listener_acquire_images(mock_listener, shuffled_global_image_ids);
+  // new peer acks acquire request
+  remote_peer_ack_nowait(mock_image_map.get(), shuffled_global_image_ids, 0);
+  wait_for_scheduled_task();
 
-  // remove the dead instance
-  mock_image_map->update_instances_removed({"9876"});
+  std::set<std::string> shuffled_global_image_ids_ack(shuffled_global_image_ids);
 
-  ASSERT_TRUE(wait_for_map_update(1));
-  ASSERT_TRUE(wait_for_listener_notify(shuffled_global_image_ids.size()));
+  // remove image
+  listener_remove_images(mock_listener, "uuid1", shuffled_global_image_ids);
+  expect_add_event(mock_threads);
+  listener_release_images(mock_listener, shuffled_global_image_ids);
+  update_map_request(mock_threads, mock_update_request, shuffled_global_image_ids, 0);
 
-  // remote peer ACKs image acquire request
-  remote_peer_ack_nowait(mock_image_map.get(), shuffled_global_image_ids, 0);
+  mock_image_map->update_images("uuid1", {}, std::move(shuffled_global_image_ids));
+  ASSERT_TRUE(wait_for_listener_notify(shuffled_global_image_ids_ack.size() * 2));
+
+  remote_peer_ack_wait(mock_image_map.get(), shuffled_global_image_ids_ack, 0);
 
   wait_for_scheduled_task();
   ASSERT_EQ(0, when_shut_down(mock_image_map.get()));
index 48f34cf3dcf47d5f9ddcc0774f0eb5eba8f2eecd..4e5b851360fe7e2e06450912842184bd9e2410be 100644 (file)
@@ -324,9 +324,7 @@ void ImageMap<I>::schedule_add_action(const std::string &global_image_id) {
 
   // in order of state-machine execution, so its easier to follow
   Context *on_update = new C_UpdateMap(this, global_image_id);
-  Context *on_acquire = new FunctionContext([this, global_image_id](int r) {
-      queue_acquire_image(global_image_id);
-    });
+  Context *on_acquire = new C_AcquireImage(this, global_image_id);
   Context *on_finish = new FunctionContext([this, global_image_id](int r) {
       handle_add_action(global_image_id, r);
     });
@@ -365,9 +363,7 @@ void ImageMap<I>::schedule_shuffle_action(const std::string &global_image_id) {
       queue_release_image(global_image_id);
     });
   Context *on_update = new C_UpdateMap(this, global_image_id);
-  Context *on_acquire = new FunctionContext([this, global_image_id](int r) {
-      queue_acquire_image(global_image_id);
-    });
+  Context *on_acquire = new C_AcquireImage(this, global_image_id);
   Context *on_finish = new FunctionContext([this, global_image_id](int r) {
       handle_shuffle_action(global_image_id, r);
     });
index b1fb891c8e34009802df1955f2e6e7d5036218d1..a10b84060513190eabd757b98c57bf2a016097b0 100644 (file)
@@ -155,6 +155,25 @@ private:
     }
   };
 
+  struct C_AcquireImage : Context {
+    ImageMap *image_map;
+    std::string global_image_id;
+
+    C_AcquireImage(ImageMap *image_map, const std::string &global_image_id)
+      : image_map(image_map),
+        global_image_id(global_image_id) {
+    }
+
+    void finish(int r) override {
+      image_map->queue_acquire_image(global_image_id);
+    }
+
+    // maybe called more than once
+    void complete(int r) override {
+      finish(r);
+    }
+  };
+
   // async op-tracker helper routines
   void start_async_op() {
     m_async_op_tracker.start_op();
index 8fd19e8ef87170553e2e78c5433a2ca0fa665737..12055d06198b113fcd5ec0788e0ef37712dd0ed9 100644 (file)
@@ -58,16 +58,10 @@ void Action::execute_state_callback(StateTransition::State state) {
   }
 }
 
-void Action::state_callback_complete(StateTransition::State state, bool delete_context) {
-  Context *on_state = nullptr;
-
+void Action::state_callback_complete(StateTransition::State state) {
   auto it = context_map.find(state);
   if (it != context_map.end()) {
-    std::swap(it->second, on_state);
-  }
-
-  if (on_state && delete_context) {
-    delete on_state;
+    it->second = nullptr;
   }
 }
 
index ba9c2c2976345db05d8cc9a03c8d83953a2c7243..6414f66517a2bfadacdf9074ea49480f626df537 100644 (file)
@@ -21,7 +21,7 @@ public:
                                       Context *on_finish);
 
   void execute_state_callback(StateTransition::State state);
-  void state_callback_complete(StateTransition::State state, bool delete_context);
+  void state_callback_complete(StateTransition::State state);
   void execute_completion_callback(int r);
 
   StateTransition::ActionType get_action_type() const;
index c4b5b9bd41b673f16466f7e943af434f1c6c6ca6..b809324918ff1b841f8be0b20e7f201bbb19f9e3 100644 (file)
@@ -121,7 +121,7 @@ void Policy::remove_instances(const std::vector<std::string> &instance_ids,
   for (auto const &instance : instance_ids) {
     dout(10) << ": removing instance_id=" << instance << dendl;
     for (auto const &global_image_id : m_map[instance]) {
-      if (!remove_pending(global_image_id)) {
+      if (!actions_pending(global_image_id, m_map_lock)) {
         remap_global_image_ids->emplace(global_image_id);
       }
     }
@@ -146,14 +146,16 @@ void Policy::start_next_action(const std::string &global_image_id) {
   StateTransition::ActionType action_type = action.get_action_type();
   action_state.transition = StateTransition::transit(action_type, action_state.current_state);
 
+  StateTransition::State next_state = action_state.transition.next_state;
+
   dout(10) << ": global_image_id=" << global_image_id << ", action=" << action
-           << ", currrent_state=" << action_state.current_state << ", next_state="
-           << action_state.transition.next_state << dendl;
+           << ", current_state=" << action_state.current_state << ", next_state="
+           << next_state << dendl;
 
   // invoke state context callback
-  pre_execute_state_callback(global_image_id, action_type, action_state.transition.next_state);
+  pre_execute_state_callback(global_image_id, action_type, next_state);
   m_map_lock.put_write();
-  action.execute_state_callback(action_state.transition.next_state);
+  action.execute_state_callback(next_state);
   m_map_lock.get_write();
 }
 
@@ -171,9 +173,8 @@ bool Policy::finish_action(const std::string &global_image_id, int r) {
   Action &action = action_state.actions.front();
 
   bool complete;
-  if (r == 0) {
-    post_execute_state_callback(global_image_id, action_state.transition.next_state);
-    complete = perform_transition(&action_state, &action);
+  if (can_transit(action_state, r)) {
+    complete = perform_transition(global_image_id, &action_state, &action, r != 0);
   } else {
     complete = abort_or_retry(&action_state, &action);
   }
@@ -204,47 +205,60 @@ bool Policy::queue_action(const std::string &global_image_id, const Action &acti
   return it->second.actions.size() == 1;
 }
 
-bool Policy::is_transition_complete(StateTransition::ActionType action_type, StateTransition::State *state) {
-  assert(m_map_lock.is_locked());
-
-  dout(10) << ": action_type=" << action_type << ", state=" << *state << dendl;
+void Policy::rollback(ActionState *action_state) {
+  dout(20) << dendl;
+  assert(m_map_lock.is_wlocked());
 
-  bool complete = false;
-  switch (action_type) {
-  case StateTransition::ACTION_TYPE_ADD:
-  case StateTransition::ACTION_TYPE_SHUFFLE:
-    complete = *state == StateTransition::STATE_ASSOCIATED;
-    break;
-  case StateTransition::ACTION_TYPE_REMOVE:
-    if (*state == StateTransition::STATE_REMOVE_MAPPING) {
-      complete = true;
-      *state = StateTransition::STATE_UNASSIGNED;
-    }
-    break;
-  default:
-    derr << "UNKNOWN (" << static_cast<uint32_t>(action_type) << ")" << dendl;
-    assert(false);
-  }
+  assert(action_state->transition.error_state);
+  StateTransition::State state = action_state->transition.error_state.get();
 
-  return complete;
+  dout(10) << ": rolling back state=" << action_state->current_state << " -> "
+           << state << dendl;
+  action_state->current_state = state;
 }
 
-bool Policy::perform_transition(ActionState *action_state, Action *action) {
+bool Policy::advance(const std::string &global_image_id,
+                     ActionState *action_state, Action *action) {
   dout(20) << dendl;
   assert(m_map_lock.is_wlocked());
 
   StateTransition::State state = action_state->transition.next_state;
-  // delete context based on retry_on_error flag
-  action->state_callback_complete(state, action_state->transition.retry_on_error);
+  if (!is_state_retriable(state)) {
+    action->state_callback_complete(state);
+  }
 
-  bool complete = is_transition_complete(action->get_action_type(), &state);
-  dout(10) << ": advancing state: " << action_state->current_state << " -> "
-           << state << dendl;
+  post_execute_state_callback(global_image_id, state);
 
+  bool reached_final_state = false;
+  if (action_state->transition.final_state) {
+    reached_final_state = true;
+    state = action_state->transition.final_state.get();
+    assert(is_idle_state(state));
+  }
+
+  dout(10) << ": advancing state=" << action_state->current_state << " -> "
+           << state << dendl;
   action_state->current_state = state;
-  if (is_idle_state(state)) {
-    action_state->last_idle_state = state;
-    dout(10) << ": tranisition reached idle state=" << state << dendl;
+
+  return reached_final_state;
+}
+
+bool Policy::perform_transition(const std::string &global_image_id,
+                                ActionState *action_state, Action *action, bool transition_error) {
+  dout(20) << dendl;
+  assert(m_map_lock.is_wlocked());
+
+  bool complete = false;
+  if (transition_error) {
+    rollback(action_state);
+   } else {
+    complete = advance(global_image_id, action_state, action);
+  }
+
+  if (is_idle_state(action_state->current_state)) {
+    action_state->last_idle_state = action_state->current_state;
+    dout(10) << ": transition reached idle state=" << action_state->current_state
+             << dendl;
   }
 
   return complete;
@@ -254,10 +268,13 @@ bool Policy::abort_or_retry(ActionState *action_state, Action *action) {
   dout(20) << dendl;
   assert(m_map_lock.is_wlocked());
 
-  bool complete = !action_state->transition.retry_on_error;
+  StateTransition::State state = action_state->transition.next_state;
+  bool complete = !is_state_retriable(state);
+
   if (complete) {
-    // we aborted, so the context need not be freed
-    action->state_callback_complete(action_state->transition.next_state, false);
+    // we aborted, so the context need not be freed later
+    action->state_callback_complete(state);
+
     if (action_state->last_idle_state) {
       dout(10) << ": using last idle state=" << action_state->last_idle_state.get()
                << " as current state" << dendl;
@@ -304,8 +321,8 @@ void Policy::post_execute_state_callback(const std::string &global_image_id, Sta
   case StateTransition::STATE_UPDATE_MAPPING:
   case StateTransition::STATE_REMOVE_MAPPING:
     break;
-  case StateTransition::STATE_UNASSIGNED:
   default:
+  case StateTransition::STATE_UNASSIGNED:
     assert(false);
   }
 }
@@ -320,20 +337,6 @@ bool Policy::actions_pending(const std::string &global_image_id, const RWLock &l
   return !it->second.actions.empty();
 }
 
-bool Policy::remove_pending(const std::string &global_image_id) {
-  dout(20) << ": global_image_id=" << global_image_id << dendl;
-  assert(m_map_lock.is_locked());
-
-  auto it = m_actions.find(global_image_id);
-  assert(it != m_actions.end());
-
-  auto r_it = std::find_if(it->second.actions.rbegin(), it->second.actions.rend(),
-                           [](const Action &action) {
-      return (action.get_action_type() == StateTransition::ACTION_TYPE_REMOVE);
-    });
-  return r_it != it->second.actions.rend();
-}
-
 Policy::LookupInfo Policy::lookup(const std::string &global_image_id, const RWLock &lock) {
   assert(m_map_lock.is_locked());
 
index 6a715a6363a0ec2e81ff50ec724a7bcf24d9893f..a4467fac4a93fef367d8fa965643ecfe388aa23f 100644 (file)
@@ -71,6 +71,19 @@ private:
     utime_t map_time;                                                         // (re)mapped time
   };
 
+  // for the lack of a better function name
+  bool is_state_retriable(StateTransition::State state) {
+    return state == StateTransition::STATE_UPDATE_MAPPING ||
+           state == StateTransition::STATE_REMOVE_MAPPING ||
+           state == StateTransition::STATE_ASSOCIATED;
+  }
+  // can the state machine transit advance (on success) or rollback
+  // (on failure).
+  bool can_transit(const ActionState &action_state, int r) {
+    assert(m_map_lock.is_locked());
+    return r == 0 || action_state.transition.error_state;
+  }
+
   void set_image_mapped_timestamp(const std::string &global_image_id, utime_t time) {
     assert(m_map_lock.is_wlocked());
 
@@ -91,7 +104,8 @@ private:
   std::set<std::string> m_dead_instances;
 
   bool is_idle_state(StateTransition::State state) {
-    if (state == StateTransition::STATE_ASSOCIATED ||
+    if (state == StateTransition::STATE_UNASSIGNED ||
+        state == StateTransition::STATE_ASSOCIATED ||
         state == StateTransition::STATE_DISASSOCIATED) {
       return true;
     }
@@ -115,7 +129,6 @@ private:
 
   bool queue_action(const std::string &global_image_id, const Action &action);
   bool actions_pending(const std::string &global_image_id, const RWLock &lock);
-  bool remove_pending(const std::string &glolbal_image_id);
 
   LookupInfo lookup(const std::string &global_image_id, const RWLock &lock);
   void map(const std::string &global_image_id,
@@ -132,8 +145,11 @@ private:
                                   StateTransition::ActionType action_type, StateTransition::State state);
   void post_execute_state_callback(const std::string &global_image_id, StateTransition::State state);
 
-  bool is_transition_complete(StateTransition::ActionType action_type, StateTransition::State *state);
-  bool perform_transition(ActionState *action_state, Action *action);
+  void rollback(ActionState *action_state);
+  bool advance(const std::string &global_image_id, ActionState *action_state, Action *action);
+
+  bool perform_transition(const std::string &global_image_id, ActionState *action_state,
+                          Action *action, bool transition_error);
   bool abort_or_retry(ActionState *action_state, Action *action);
 
 protected:
index 4d7cfd9d773facad4fd389c0c327e626fed140ec..b075e7f9a469d3c0e7f8489a1900ad98cf4a6f04 100644 (file)
@@ -55,17 +55,17 @@ std::ostream &operator<<(std::ostream &os, const StateTransition::State &state)
 const StateTransition::TransitionTable StateTransition::transition_table[] = {
   // action_type         current_state                   Transition
   // -------------------------------------------------------------------------------
-  ACTION_TYPE_ADD,     STATE_UNASSIGNED,      Transition(STATE_UPDATE_MAPPING, true),
-  ACTION_TYPE_ADD,     STATE_ASSOCIATED,      Transition(STATE_ASSOCIATED,     false),
-  ACTION_TYPE_ADD,     STATE_DISASSOCIATED,   Transition(STATE_UPDATE_MAPPING, true),
-  ACTION_TYPE_ADD,     STATE_UPDATE_MAPPING,  Transition(STATE_ASSOCIATED,     false),
+  ACTION_TYPE_ADD,     STATE_UNASSIGNED,      Transition(STATE_UPDATE_MAPPING),
+  ACTION_TYPE_ADD,     STATE_UPDATE_MAPPING,  Transition(STATE_ASSOCIATED, STATE_ASSOCIATED,
+                                                         STATE_UNASSIGNED),
 
-  ACTION_TYPE_REMOVE,  STATE_ASSOCIATED,      Transition(STATE_DISASSOCIATED,  false),
-  ACTION_TYPE_REMOVE,  STATE_DISASSOCIATED,   Transition(STATE_REMOVE_MAPPING, true),
+  ACTION_TYPE_REMOVE,  STATE_ASSOCIATED,      Transition(STATE_DISASSOCIATED),
+  ACTION_TYPE_REMOVE,  STATE_DISASSOCIATED,   Transition(STATE_REMOVE_MAPPING, STATE_UNASSIGNED),
 
-  ACTION_TYPE_SHUFFLE, STATE_ASSOCIATED,      Transition(STATE_DISASSOCIATED,  false),
-  ACTION_TYPE_SHUFFLE, STATE_DISASSOCIATED,   Transition(STATE_UPDATE_MAPPING, true),
-  ACTION_TYPE_SHUFFLE, STATE_UPDATE_MAPPING,  Transition(STATE_ASSOCIATED,     false),
+  ACTION_TYPE_SHUFFLE, STATE_ASSOCIATED,      Transition(STATE_DISASSOCIATED),
+  ACTION_TYPE_SHUFFLE, STATE_DISASSOCIATED,   Transition(STATE_UPDATE_MAPPING),
+  ACTION_TYPE_SHUFFLE, STATE_UPDATE_MAPPING,  Transition(STATE_ASSOCIATED, STATE_ASSOCIATED,
+                                                         STATE_DISASSOCIATED),
 };
 
 const StateTransition::Transition &StateTransition::transit(ActionType action_type, State state) {
index c9f6ee544e54ca3dc9cf7004dd99e37537f9ad05..7cc9db051ea9a58e607bb6bef50014615673ea8a 100644 (file)
@@ -4,6 +4,8 @@
 #ifndef CEPH_RBD_MIRROR_IMAGE_MAP_STATE_TRANSITION_H
 #define CEPH_RBD_MIRROR_IMAGE_MAP_STATE_TRANSITION_H
 
+#include <boost/optional.hpp>
+
 namespace rbd {
 namespace mirror {
 namespace image_map {
@@ -27,13 +29,23 @@ public:
 
   struct Transition {
     State next_state;
-    bool retry_on_error;
+    boost::optional<State> final_state;
+    boost::optional<State> error_state;
 
-    Transition() {
+    Transition()
+      : Transition(STATE_UNASSIGNED, boost::none, boost::none) {
+    }
+    Transition(State next_state)
+      : Transition(next_state, boost::none, boost::none) {
+    }
+    Transition(State next_state, State final_state)
+      : Transition(next_state, final_state, boost::none) {
     }
-    Transition(State next_state, bool retry_on_error)
+    Transition(State next_state, boost::optional<State> final_state,
+               boost::optional<State> error_state)
       : next_state(next_state),
-        retry_on_error(retry_on_error) {
+        final_state(final_state),
+        error_state(error_state) {
     }
   };