]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
rbd-mirror: abstract policy class for mapping images to instance
authorVenky Shankar <vshankar@redhat.com>
Wed, 13 Sep 2017 10:55:25 +0000 (06:55 -0400)
committerVenky Shankar <vshankar@redhat.com>
Mon, 13 Nov 2017 13:27:43 +0000 (08:27 -0500)
Also, a "simple" policy implementation that maps M images to
N instances (M/N per rbd mirror daemon instance).

Signed-off-by: Venky Shankar <vshankar@redhat.com>
src/common/options.cc
src/tools/rbd_mirror/CMakeLists.txt
src/tools/rbd_mirror/image_map/Action.cc [new file with mode: 0644]
src/tools/rbd_mirror/image_map/Action.h [new file with mode: 0644]
src/tools/rbd_mirror/image_map/Policy.cc [new file with mode: 0644]
src/tools/rbd_mirror/image_map/Policy.h [new file with mode: 0644]
src/tools/rbd_mirror/image_map/SimplePolicy.cc [new file with mode: 0644]
src/tools/rbd_mirror/image_map/SimplePolicy.h [new file with mode: 0644]
src/tools/rbd_mirror/image_map/StateTransition.cc [new file with mode: 0644]
src/tools/rbd_mirror/image_map/StateTransition.h [new file with mode: 0644]

index 6064e0c76bf69aeb6815e3f800c6f00bafc245dd..2f32e45c3b65084bcc892af505f3f125b90d1fd3 100644 (file)
@@ -5476,6 +5476,15 @@ static std::vector<Option> get_rbd_mirror_options() {
     Option("rbd_mirror_leader_max_acquire_attempts_before_break", Option::TYPE_INT, Option::LEVEL_ADVANCED)
     .set_default(3)
     .set_description("number of failed attempts to acquire lock after missing heartbeats before breaking lock"),
+
+    Option("rbd_mirror_image_policy_type", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("simple")
+    .set_enum_allowed({"simple"})
+    .set_description("policy type for mapping images to instances"),
+
+    Option("rbd_mirror_image_policy_migration_throttle", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(300)
+    .set_description("number of seconds after which an image can be reshuffled (migrated) again"),
   });
 }
 
index 103969d892531eed794d26ed5684e2f2c27b7d7b..2a558a249a2bc7c0574d7840327b02f713efecd3 100644 (file)
@@ -19,7 +19,11 @@ set(rbd_mirror_internal
   ServiceDaemon.cc
   Threads.cc
   types.cc
+  image_map/Action.cc
   image_map/LoadRequest.cc
+  image_map/Policy.cc
+  image_map/SimplePolicy.cc
+  image_map/StateTransition.cc
   image_map/UpdateRequest.cc
   image_replayer/BootstrapRequest.cc
   image_replayer/CloseImageRequest.cc
diff --git a/src/tools/rbd_mirror/image_map/Action.cc b/src/tools/rbd_mirror/image_map/Action.cc
new file mode 100644 (file)
index 0000000..a31dc6d
--- /dev/null
@@ -0,0 +1,75 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <ostream>
+#include "include/Context.h"
+#include "Action.h"
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+std::ostream &operator<<(std::ostream &os, const Action &action) {
+  os << "[action_type=" << action.get_action_type() << "]";
+  return os;
+}
+
+Action::Action(StateTransition::ActionType action_type)
+  : action_type(action_type) {
+}
+
+Action Action::create_add_action(Context *on_update, Context *on_acquire, Context *on_finish) {
+  Action action(StateTransition::ACTION_TYPE_ADD);
+  action.context_map.emplace(StateTransition::STATE_UPDATE_MAPPING, on_update);
+  action.context_map.emplace(StateTransition::STATE_ASSOCIATED, on_acquire);
+  action.context_map.emplace(StateTransition::STATE_COMPLETE, on_finish);
+
+  return action;
+}
+
+Action Action::create_remove_action(Context *on_release, Context *on_remove, Context *on_finish) {
+  Action action(StateTransition::ACTION_TYPE_REMOVE);
+  action.context_map.emplace(StateTransition::STATE_DISASSOCIATED, on_release);
+  action.context_map.emplace(StateTransition::STATE_REMOVE_MAPPING, on_remove);
+  action.context_map.emplace(StateTransition::STATE_COMPLETE, on_finish);
+
+  return action;
+}
+
+Action Action::create_shuffle_action(Context *on_release, Context *on_update, Context *on_acquire,
+                                     Context *on_finish) {
+  Action action(StateTransition::ACTION_TYPE_SHUFFLE);
+  action.context_map.emplace(StateTransition::STATE_DISASSOCIATED, on_release);
+  action.context_map.emplace(StateTransition::STATE_UPDATE_MAPPING, on_update);
+  action.context_map.emplace(StateTransition::STATE_ASSOCIATED, on_acquire);
+  action.context_map.emplace(StateTransition::STATE_COMPLETE, on_finish);
+
+  return action;
+}
+
+StateTransition::ActionType Action::get_action_type() const {
+  return action_type;
+}
+
+void Action::execute_state_callback(StateTransition::State state) {
+  auto it = context_map.find(state);
+  if (it != context_map.end() && it->second != nullptr) {
+    it->second->complete(0);
+  }
+}
+
+void Action::execute_completion_callback(int r) {
+  auto it = context_map.find(StateTransition::STATE_COMPLETE);
+  assert(it != context_map.end());
+
+  Context *on_finish = nullptr;
+  std::swap(it->second, on_finish); // just called once so its swap'd
+
+  if (on_finish != nullptr) {
+    on_finish->complete(r);
+  }
+}
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
diff --git a/src/tools/rbd_mirror/image_map/Action.h b/src/tools/rbd_mirror/image_map/Action.h
new file mode 100644 (file)
index 0000000..5cdd7ce
--- /dev/null
@@ -0,0 +1,41 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_ACTION_H
+#define CEPH_RBD_MIRROR_IMAGE_MAP_ACTION_H
+
+#include <map>
+#include "StateTransition.h"
+
+class Context;
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+struct Action {
+public:
+  static Action create_add_action(Context *on_update, Context *on_acquire, Context *on_finish);
+  static Action create_remove_action(Context *on_release, Context *on_remove, Context *on_finish);
+  static Action create_shuffle_action(Context *on_release, Context *on_update, Context *on_acquire,
+                                      Context *on_finish);
+
+  void execute_state_callback(StateTransition::State state);
+  void execute_completion_callback(int r);
+
+  StateTransition::ActionType get_action_type() const;
+
+private:
+  Action(StateTransition::ActionType action_type);
+
+  StateTransition::ActionType action_type;                 // action type for this action
+  std::map<StateTransition::State, Context *> context_map; // map sub action type to context callback
+};
+
+std::ostream &operator<<(std::ostream &os, const Action &action);
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_IMAGE_MAP_ACTION_H
diff --git a/src/tools/rbd_mirror/image_map/Policy.cc b/src/tools/rbd_mirror/image_map/Policy.cc
new file mode 100644 (file)
index 0000000..612efd3
--- /dev/null
@@ -0,0 +1,417 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/debug.h"
+#include "common/errno.h"
+
+#include "librbd/Utils.h"
+#include "Policy.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_map::Policy: " << this \
+                           << " " << __func__
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+using librbd::util::unique_lock_name;
+
+const std::string Policy::UNMAPPED_INSTANCE_ID("");
+
+Policy::Policy(librados::IoCtx &ioctx)
+  : m_ioctx(ioctx),
+    m_map_lock(unique_lock_name("rbd::mirror::image_map::Policy::m_map_lock", this)) {
+
+  // map should at least have once instance
+  std::string instance_id = stringify(ioctx.get_instance_id());
+  add_instances({instance_id}, nullptr);
+}
+
+void Policy::init(const std::map<std::string, cls::rbd::MirrorImageMap> &image_mapping) {
+  dout(20) << dendl;
+
+  RWLock::WLocker map_lock(m_map_lock);
+
+  for (auto const &it : image_mapping) {
+    map(it.first, it.second.instance_id, utime_t(0, 0), m_map_lock);
+  }
+}
+
+std::string Policy::lookup(const std::string &global_image_id) {
+  dout(20) << ": global_image_id=" << global_image_id << dendl;
+
+  RWLock::RLocker map_lock(m_map_lock);
+  return lookup(global_image_id, m_map_lock);
+}
+
+bool Policy::add_image(const std::string &global_image_id,
+                       Context *on_update, Context *on_acquire, Context *on_finish) {
+  dout(20) << ": global_image_id=" << global_image_id << dendl;
+
+  RWLock::WLocker map_lock(m_map_lock);
+
+  auto it = m_actions.find(global_image_id);
+  if (it == m_actions.end()) {
+    m_actions.emplace(global_image_id, ActionState());
+  }
+
+  Action action = Action::create_add_action(on_update, on_acquire, on_finish);
+  return queue_action(global_image_id, action);
+}
+
+bool Policy::remove_image(const std::string &global_image_id,
+                          Context *on_release, Context *on_remove, Context *on_finish) {
+  dout(20) << ": global_image_id=" << global_image_id << dendl;
+
+  RWLock::WLocker map_lock(m_map_lock);
+
+  on_finish = new FunctionContext([this, global_image_id, on_finish](int r) {
+      {
+        RWLock::WLocker map_lock(m_map_lock);
+        if (!actions_pending(global_image_id, m_map_lock)) {
+          m_actions.erase(global_image_id);
+        }
+      }
+
+      if (on_finish != nullptr) {
+        on_finish->complete(r);
+      }
+    });
+
+  Action action = Action::create_remove_action(on_release, on_remove, on_finish);
+  return queue_action(global_image_id, action);
+}
+
+bool Policy::shuffle_image(const std::string &global_image_id,
+                           Context *on_release, Context *on_update,
+                           Context *on_acquire, Context *on_finish) {
+  dout(20) << ": global_image_id=" << global_image_id << dendl;
+
+  RWLock::WLocker map_lock(m_map_lock);
+
+  Action action = Action::create_shuffle_action(on_release, on_update, on_acquire, on_finish);
+  return queue_action(global_image_id, action);
+}
+
+void Policy::add_instances(const std::vector<std::string> &instance_ids,
+                           std::set<std::string> *remap_global_image_ids) {
+  dout(20) << ": adding " << instance_ids.size() << " instance(s)" << dendl;
+
+  RWLock::WLocker map_lock(m_map_lock);
+
+  for (auto const &instance : instance_ids) {
+    dout(10) << ": adding instance_id=" << instance << dendl;
+    m_map.emplace(instance, std::set<std::string>{});
+  }
+
+  if (remap_global_image_ids != nullptr) {
+    do_shuffle_add_instances(instance_ids, remap_global_image_ids);
+  }
+}
+
+void Policy::remove_instances(const std::vector<std::string> &instance_ids,
+                              std::set<std::string> *remap_global_image_ids) {
+  dout(20) << ": removing " << instance_ids.size() << " instance(s)" << dendl;
+
+  RWLock::WLocker map_lock(m_map_lock);
+
+  for (auto const &instance : instance_ids) {
+    dout(10) << ": removing instance_id=" << instance << dendl;
+    for (auto const &global_image_id : m_map[instance]) {
+      if (!remove_pending(global_image_id)) {
+        remap_global_image_ids->emplace(global_image_id);
+      }
+    }
+  }
+
+  m_dead_instances.insert(instance_ids.begin(), instance_ids.end());
+}
+
+// new actions are always started from a stable (idle) state since
+// actions either complete successfully ending up in an idle state
+// or get aborted due to peer being blacklisted.
+void Policy::start_next_action(const std::string &global_image_id) {
+  RWLock::WLocker map_lock(m_map_lock);
+
+  auto it = m_actions.find(global_image_id);
+  assert(it != m_actions.end());
+  assert(!it->second.actions.empty());
+
+  ActionState &action_state = it->second;
+  Action &action = action_state.actions.front();
+
+  StateTransition::ActionType action_type = action.get_action_type();
+  action_state.transition = StateTransition::transit(action_type, action_state.current_state);
+
+  dout(10) << ": global_image_id=" << global_image_id << ", action=" << action
+           << ", currrent_state=" << action_state.current_state << ", next_state="
+           << action_state.transition.next_state << dendl;
+
+  // invoke state context callback
+  pre_execute_state_callback(global_image_id, action_type, action_state.transition.next_state);
+  m_map_lock.put_write();
+  action.execute_state_callback(action_state.transition.next_state);
+  m_map_lock.get_write();
+}
+
+bool Policy::finish_action(const std::string &global_image_id, int r) {
+  RWLock::WLocker map_lock(m_map_lock);
+
+  dout(10) << ": global_image_id=" << global_image_id << ", r=" << r
+           << dendl;
+
+  auto it = m_actions.find(global_image_id);
+  assert(it != m_actions.end());
+  assert(!it->second.actions.empty());
+
+  ActionState &action_state = it->second;
+  Action &action = action_state.actions.front();
+
+  bool complete;
+  if (r == 0) {
+    post_execute_state_callback(global_image_id, action_state.transition.next_state);
+    complete = perform_transition(&action_state, action.get_action_type());
+  } else {
+    complete = abort_or_retry(&action_state);
+  }
+
+  if (complete) {
+    dout(10) << ": completing action=" << action << dendl;
+
+    m_map_lock.put_write();
+    action.execute_completion_callback(r);
+    m_map_lock.get_write();
+
+    action_state.last_idle_state.reset();
+    action_state.actions.pop_front();
+  }
+
+  return !action_state.actions.empty();
+}
+
+bool Policy::queue_action(const std::string &global_image_id, const Action &action) {
+  dout(20) << ": global_image_id=" << global_image_id << ", action=" << action
+           << dendl;
+  assert(m_map_lock.is_wlocked());
+
+  auto it = m_actions.find(global_image_id);
+  assert(it != m_actions.end());
+
+  it->second.actions.push_back(action);
+  return it->second.actions.size() == 1;
+}
+
+bool Policy::is_transition_complete(StateTransition::ActionType action_type, StateTransition::State *state) {
+  assert(m_map_lock.is_locked());
+
+  dout(10) << ": action_type=" << action_type << ", state=" << *state << dendl;
+
+  bool complete = false;
+  switch (action_type) {
+  case StateTransition::ACTION_TYPE_ADD:
+  case StateTransition::ACTION_TYPE_SHUFFLE:
+    complete = *state == StateTransition::STATE_ASSOCIATED;
+    break;
+  case StateTransition::ACTION_TYPE_REMOVE:
+    if (*state == StateTransition::STATE_REMOVE_MAPPING) {
+      complete = true;
+      *state = StateTransition::STATE_UNASSIGNED;
+    }
+    break;
+  default:
+    derr << "UNKNOWN (" << static_cast<uint32_t>(action_type) << ")" << dendl;
+    assert(false);
+  }
+
+  return complete;
+}
+
+bool Policy::perform_transition(ActionState *action_state, StateTransition::ActionType action_type) {
+  dout(20) << dendl;
+  assert(m_map_lock.is_wlocked());
+
+  StateTransition::State state;
+
+  bool complete = is_transition_complete(action_type, &state);
+  dout(10) << ": advancing state: " << action_state->current_state << " -> "
+           << state << dendl;
+
+  action_state->current_state = state;
+  if (is_idle_state(state)) {
+    action_state->last_idle_state = state;
+    dout(10) << ": tranisition reached idle state=" << state << dendl;
+  }
+
+  return complete;
+}
+
+bool Policy::abort_or_retry(ActionState *action_state) {
+  dout(20) << dendl;
+  assert(m_map_lock.is_wlocked());
+
+  bool complete = !action_state->transition.retry_on_error;
+  if (complete && action_state->last_idle_state) {
+    dout(10) << ": using last idle state=" << action_state->last_idle_state.get()
+             << " as current state" << dendl;
+    action_state->current_state = action_state->last_idle_state.get();
+  }
+
+  return complete;
+}
+
+void Policy::pre_execute_state_callback(const std::string &global_image_id,
+                                        StateTransition::ActionType action_type,
+                                        StateTransition::State state) {
+  assert(m_map_lock.is_wlocked());
+
+  dout(10) << ": global_image_id=" << global_image_id << ", action_type="
+           << action_type << ", state=" << state << dendl;
+
+  utime_t map_time = generate_image_map_timestamp(action_type);
+  switch (state) {
+  case StateTransition::STATE_UPDATE_MAPPING:
+    map(global_image_id, map_time);
+    break;
+  case StateTransition::STATE_ASSOCIATED:
+  case StateTransition::STATE_DISASSOCIATED:
+  case StateTransition::STATE_REMOVE_MAPPING:
+    break;
+  case StateTransition::STATE_UNASSIGNED:
+  default:
+    assert(false);
+  }
+}
+
+void Policy::post_execute_state_callback(const std::string &global_image_id, StateTransition::State state) {
+  assert(m_map_lock.is_wlocked());
+
+  dout(10) << ": global_image_id=" << global_image_id << ", state=" << state << dendl;
+
+  switch (state) {
+  case StateTransition::STATE_DISASSOCIATED:
+    unmap(global_image_id);
+    break;
+  case StateTransition::STATE_ASSOCIATED:
+  case StateTransition::STATE_UPDATE_MAPPING:
+  case StateTransition::STATE_REMOVE_MAPPING:
+    break;
+  case StateTransition::STATE_UNASSIGNED:
+  default:
+    assert(false);
+  }
+}
+
+bool Policy::actions_pending(const std::string &global_image_id, const RWLock &lock) {
+  dout(20) << ": global_image_id=" << global_image_id << dendl;
+  assert(m_map_lock.is_locked());
+
+  auto it = m_actions.find(global_image_id);
+  assert(it != m_actions.end());
+
+  return !it->second.actions.empty();
+}
+
+bool Policy::remove_pending(const std::string &global_image_id) {
+  dout(20) << ": global_image_id=" << global_image_id << dendl;
+  assert(m_map_lock.is_locked());
+
+  auto it = m_actions.find(global_image_id);
+  assert(it != m_actions.end());
+
+  auto r_it = std::find_if(it->second.actions.rbegin(), it->second.actions.rend(),
+                           [](const Action &action) {
+      return (action.get_action_type() == StateTransition::ACTION_TYPE_REMOVE);
+    });
+  return r_it != it->second.actions.rend();
+}
+
+std::string Policy::lookup(const std::string &global_image_id, const RWLock &lock) {
+  assert(m_map_lock.is_locked());
+
+  for (auto it = m_map.begin(); it != m_map.end(); ++it) {
+    if (it->second.find(global_image_id) != it->second.end()) {
+      return it->first;
+    }
+  }
+
+  return UNMAPPED_INSTANCE_ID;
+}
+
+void Policy::map(const std::string &global_image_id, const std::string &instance_id,
+                 utime_t map_time, const RWLock &lock) {
+  assert(m_map_lock.is_wlocked());
+
+  auto ins = m_map[instance_id].emplace(global_image_id);
+  assert(ins.second);
+
+  auto it = m_actions.find(global_image_id);
+  assert(it != m_actions.end());
+  it->second.map_time = map_time;
+}
+
+void Policy::unmap(const std::string &global_image_id, const std::string &instance_id,
+                   const RWLock &lock) {
+  assert(m_map_lock.is_wlocked());
+
+  m_map[instance_id].erase(global_image_id);
+
+  if (is_dead_instance(instance_id) && m_map[instance_id].empty()) {
+    dout(10) << ": removing dead instance_id=" << instance_id << dendl;
+    m_map.erase(instance_id);
+    m_dead_instances.erase(instance_id);
+  }
+}
+
+void Policy::map(const std::string &global_image_id, utime_t map_time) {
+  dout(20) << ": global_image_id=" << global_image_id << dendl;
+  assert(m_map_lock.is_wlocked());
+
+  std::string instance_id = lookup(global_image_id, m_map_lock);
+  if (instance_id != UNMAPPED_INSTANCE_ID && !is_dead_instance(instance_id)) {
+    return;
+  }
+  if (is_dead_instance(instance_id)) {
+    unmap(global_image_id, instance_id, m_map_lock);
+  }
+
+  instance_id = do_map(global_image_id);
+  map(global_image_id, instance_id, map_time, m_map_lock);
+}
+
+void Policy::unmap(const std::string &global_image_id) {
+  dout(20) << ": global_image_id=" << global_image_id << dendl;
+  assert(m_map_lock.is_wlocked());
+
+  std::string instance_id = lookup(global_image_id, m_map_lock);
+  if (instance_id == UNMAPPED_INSTANCE_ID) {
+    return;
+  }
+
+  unmap(global_image_id, instance_id, m_map_lock);
+}
+
+bool Policy::can_shuffle_image(const std::string &global_image_id) {
+  dout(20) << ": global_image_id=" << global_image_id << dendl;
+  assert(m_map_lock.is_locked());
+
+  CephContext *cct = reinterpret_cast<CephContext *>(m_ioctx.cct());
+  int migration_throttle = cct->_conf->get_val<int64_t>("rbd_mirror_image_policy_migration_throttle");
+
+  auto it = m_actions.find(global_image_id);
+  assert(it != m_actions.end());
+
+  utime_t last_shuffled_time = it->second.map_time;
+  dout(10) << ": migration_throttle=" << migration_throttle << ", last_shuffled_time="
+           << last_shuffled_time << dendl;
+
+  utime_t now = ceph_clock_now();
+  return !actions_pending(global_image_id, m_map_lock) &&
+    !(migration_throttle > 0 && (now - last_shuffled_time < migration_throttle));
+}
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
diff --git a/src/tools/rbd_mirror/image_map/Policy.h b/src/tools/rbd_mirror/image_map/Policy.h
new file mode 100644 (file)
index 0000000..636f026
--- /dev/null
@@ -0,0 +1,145 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_POLICY_H
+#define CEPH_RBD_MIRROR_IMAGE_MAP_POLICY_H
+
+#include <map>
+#include <tuple>
+#include <boost/optional.hpp>
+
+#include "common/RWLock.h"
+#include "cls/rbd/cls_rbd_types.h"
+#include "include/rados/librados.hpp"
+#include "Action.h"
+
+class Context;
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+class Policy {
+public:
+  Policy(librados::IoCtx &ioctx);
+
+  virtual ~Policy() {
+  }
+
+  // init -- called during initialization
+  void init(const std::map<std::string, cls::rbd::MirrorImageMap> &image_mapping);
+
+  // lookup an image from the map
+  std::string lookup(const std::string &global_image_id);
+
+  // add, remove, shuffle
+  bool add_image(const std::string &global_image_id,
+                 Context *on_update, Context *on_acquire, Context *on_finish);
+  bool remove_image(const std::string &global_image_id,
+                    Context *on_release, Context *on_remove, Context *on_finish);
+  bool shuffle_image(const std::string &global_image_id,
+                     Context *on_release, Context *on_update,
+                     Context *on_acquire, Context *on_finish);
+
+  // shuffle images when instances are added/removed
+  void add_instances(const std::vector<std::string> &instance_ids,
+                     std::set<std::string> *remap_global_image_ids);
+  void remove_instances(const std::vector<std::string> &instance_ids,
+                        std::set<std::string> *remap_global_image_ids);
+
+  void start_next_action(const std::string &global_image_id);
+  bool finish_action(const std::string &global_image_id, int r);
+
+  static const std::string UNMAPPED_INSTANCE_ID;
+
+private:
+  typedef std::list<Action> Actions;
+
+  struct ActionState {
+    Actions actions;                                                          // list of pending actions
+
+    StateTransition::State current_state = StateTransition::STATE_UNASSIGNED; // current state
+    boost::optional<StateTransition::State> last_idle_state;                  // last successfull idle
+                                                                              // state transition
+
+    StateTransition::Transition transition;                                   // (cached) next transition
+
+    utime_t map_time;                                                         // (re)mapped time
+  };
+
+  librados::IoCtx &m_ioctx;
+  std::map<std::string, ActionState> m_actions;
+  std::set<std::string> m_dead_instances;
+
+  bool is_idle_state(StateTransition::State state) {
+    if (state == StateTransition::STATE_ASSOCIATED ||
+        state == StateTransition::STATE_DISASSOCIATED) {
+      return true;
+    }
+
+    return false;
+  }
+
+  // generate image map time based on action type
+  utime_t generate_image_map_timestamp(StateTransition::ActionType action_type) {
+    // for a shuffle action (image remap) use current time as
+    // map time, historical time otherwise.
+    utime_t time;
+    if (action_type == StateTransition::ACTION_TYPE_SHUFFLE) {
+      time = ceph_clock_now();
+    } else {
+      time = utime_t(0, 0);
+    }
+
+    return time;
+  }
+
+  bool queue_action(const std::string &global_image_id, const Action &action);
+  bool actions_pending(const std::string &global_image_id, const RWLock &lock);
+  bool remove_pending(const std::string &glolbal_image_id);
+
+  std::string lookup(const std::string &global_image_id, const RWLock &lock);
+  void map(const std::string &global_image_id,
+           const std::string &instance_id, utime_t map_time, const RWLock &lock);
+  void unmap(const std::string &global_image_id, const std::string &instance_id, const RWLock &lock);
+
+  // map an image
+  void map(const std::string &global_image_id, utime_t map_time);
+  // unmap (remove) an image from the map
+  void unmap(const std::string &global_image_id);
+
+  // state transition related..
+  void pre_execute_state_callback(const std::string &global_image_id,
+                                  StateTransition::ActionType action_type, StateTransition::State state);
+  void post_execute_state_callback(const std::string &global_image_id, StateTransition::State state);
+
+  bool is_transition_complete(StateTransition::ActionType action_type, StateTransition::State *state);
+  bool perform_transition(ActionState *action_state, StateTransition::ActionType action_type);
+  bool abort_or_retry(ActionState *action_state);
+
+protected:
+  typedef std::map<std::string, std::set<std::string> > InstanceToImageMap;
+
+  RWLock m_map_lock;        // protects m_map, m_shuffled_timestamp
+  InstanceToImageMap m_map; // instance_id -> global_id map
+
+  bool is_dead_instance(const std::string instance_id) {
+    assert(m_map_lock.is_locked());
+    return m_dead_instances.find(instance_id) != m_dead_instances.end();
+  }
+
+  bool can_shuffle_image(const std::string &global_image_id);
+
+  // map an image (global image id) to an instance
+  virtual std::string do_map(const std::string &global_image_id) = 0;
+
+  // shuffle images when instances are added/removed
+  virtual void do_shuffle_add_instances(const std::vector<std::string> &instance_ids,
+                                        std::set<std::string> *remap_global_image_ids) = 0;
+};
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_IMAGE_MAP_POLICY_H
diff --git a/src/tools/rbd_mirror/image_map/SimplePolicy.cc b/src/tools/rbd_mirror/image_map/SimplePolicy.cc
new file mode 100644 (file)
index 0000000..dc2ec5a
--- /dev/null
@@ -0,0 +1,85 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/debug.h"
+#include "common/errno.h"
+
+#include "SimplePolicy.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_map::SimplePolicy: " << this \
+                           << " " << __func__
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+SimplePolicy::SimplePolicy(librados::IoCtx &ioctx)
+  : Policy(ioctx) {
+}
+
+uint64_t SimplePolicy::calc_images_per_instance(int nr_instances) {
+  assert(nr_instances > 0);
+
+  uint64_t nr_images = 0;
+  for (auto const &it : m_map) {
+    if (!Policy::is_dead_instance(it.first)) {
+      nr_images += it.second.size();
+    }
+  }
+
+  uint64_t images_per_instance = nr_images / nr_instances;
+  if (images_per_instance == 0) {
+    ++images_per_instance;
+  }
+
+  return images_per_instance;
+}
+
+void SimplePolicy::do_shuffle_add_instances(const std::vector<std::string> &instance_ids,
+                                            std::set<std::string> *remap_global_image_ids) {
+  assert(m_map_lock.is_wlocked());
+
+  uint64_t images_per_instance = calc_images_per_instance(m_map.size());
+  dout(5) << ": images per instance=" << images_per_instance << dendl;
+
+  for (auto const &instance : m_map) {
+    if (instance.second.size() <= images_per_instance) {
+      continue;
+    }
+
+    auto it = instance.second.begin();
+    uint64_t cut_off = instance.second.size() - images_per_instance;
+
+    while (it != instance.second.end() && cut_off > 0) {
+      if (Policy::can_shuffle_image(*it)) {
+        --cut_off;
+        remap_global_image_ids->emplace(*it);
+      }
+
+      ++it;
+    }
+  }
+}
+
+std::string SimplePolicy::do_map(const std::string &global_image_id) {
+  assert(m_map_lock.is_wlocked());
+
+  auto min_it = m_map.begin();
+
+  for (auto it = min_it; it != m_map.end(); ++it) {
+    assert(it->second.find(global_image_id) == it->second.end());
+    if (it->second.size() < min_it->second.size() && !Policy::is_dead_instance(it->first)) {
+      min_it = it;
+    }
+  }
+
+  dout(20) << ": global_image_id=" << global_image_id << " maps to instance_id="
+           << min_it->first << dendl;
+  return min_it->first;
+}
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
diff --git a/src/tools/rbd_mirror/image_map/SimplePolicy.h b/src/tools/rbd_mirror/image_map/SimplePolicy.h
new file mode 100644 (file)
index 0000000..5351194
--- /dev/null
@@ -0,0 +1,38 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_SIMPLE_POLICY_H
+#define CEPH_RBD_MIRROR_IMAGE_MAP_SIMPLE_POLICY_H
+
+#include "Policy.h"
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+class SimplePolicy : public Policy {
+public:
+  static SimplePolicy *create(librados::IoCtx &ioctx) {
+    return new SimplePolicy(ioctx);
+  }
+
+protected:
+  using Policy::m_map_lock;
+  using Policy::m_map;
+
+  SimplePolicy(librados::IoCtx &ioctx);
+
+  std::string do_map(const std::string &global_image_id) override;
+
+  void do_shuffle_add_instances(const std::vector<std::string> &instance_ids,
+                                std::set<std::string> *remap_global_image_ids) override;
+
+private:
+  uint64_t calc_images_per_instance(int nr_instances);
+};
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_IMAGE_MAP_SIMPLE_POLICY_H
diff --git a/src/tools/rbd_mirror/image_map/StateTransition.cc b/src/tools/rbd_mirror/image_map/StateTransition.cc
new file mode 100644 (file)
index 0000000..4d7cfd9
--- /dev/null
@@ -0,0 +1,83 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <ostream>
+#include "include/assert.h"
+#include "StateTransition.h"
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+std::ostream &operator<<(std::ostream &os, const StateTransition::ActionType &action_type) {
+  switch (action_type) {
+  case StateTransition::ACTION_TYPE_ADD:
+    os << "ADD_IMAGE";
+    break;
+  case StateTransition::ACTION_TYPE_REMOVE:
+    os << "REMOVE_IMAGE";
+    break;
+  case StateTransition::ACTION_TYPE_SHUFFLE:
+    os << "SHUFFLE_IMAGE";
+    break;
+  default:
+    os << "UNKNOWN (" << static_cast<uint32_t>(action_type) << ")";
+  }
+
+  return os;
+}
+
+std::ostream &operator<<(std::ostream &os, const StateTransition::State &state) {
+  switch(state) {
+  case StateTransition::STATE_UNASSIGNED:
+    os << "UNASSIGNED";
+    break;
+  case StateTransition::STATE_ASSOCIATED:
+    os << "ASSOCIATED";
+    break;
+  case StateTransition::STATE_DISASSOCIATED:
+    os << "DISASSOCIATED";
+    break;
+  case StateTransition::STATE_UPDATE_MAPPING:
+    os << "UPDATE_MAPPING";
+    break;
+  case StateTransition::STATE_REMOVE_MAPPING:
+    os << "REMOVE_MAPPING";
+    break;
+  case StateTransition::STATE_COMPLETE:
+    os << "COMPLETE";
+    break;
+  }
+
+  return os;
+}
+
+const StateTransition::TransitionTable StateTransition::transition_table[] = {
+  // action_type         current_state                   Transition
+  // -------------------------------------------------------------------------------
+  ACTION_TYPE_ADD,     STATE_UNASSIGNED,      Transition(STATE_UPDATE_MAPPING, true),
+  ACTION_TYPE_ADD,     STATE_ASSOCIATED,      Transition(STATE_ASSOCIATED,     false),
+  ACTION_TYPE_ADD,     STATE_DISASSOCIATED,   Transition(STATE_UPDATE_MAPPING, true),
+  ACTION_TYPE_ADD,     STATE_UPDATE_MAPPING,  Transition(STATE_ASSOCIATED,     false),
+
+  ACTION_TYPE_REMOVE,  STATE_ASSOCIATED,      Transition(STATE_DISASSOCIATED,  false),
+  ACTION_TYPE_REMOVE,  STATE_DISASSOCIATED,   Transition(STATE_REMOVE_MAPPING, true),
+
+  ACTION_TYPE_SHUFFLE, STATE_ASSOCIATED,      Transition(STATE_DISASSOCIATED,  false),
+  ACTION_TYPE_SHUFFLE, STATE_DISASSOCIATED,   Transition(STATE_UPDATE_MAPPING, true),
+  ACTION_TYPE_SHUFFLE, STATE_UPDATE_MAPPING,  Transition(STATE_ASSOCIATED,     false),
+};
+
+const StateTransition::Transition &StateTransition::transit(ActionType action_type, State state) {
+  for (auto const &entry : transition_table) {
+    if (entry.action_type == action_type && entry.current_state == state) {
+      return entry.transition;
+    }
+  }
+
+  assert(false);
+}
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
diff --git a/src/tools/rbd_mirror/image_map/StateTransition.h b/src/tools/rbd_mirror/image_map/StateTransition.h
new file mode 100644 (file)
index 0000000..c9f6ee5
--- /dev/null
@@ -0,0 +1,63 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_STATE_TRANSITION_H
+#define CEPH_RBD_MIRROR_IMAGE_MAP_STATE_TRANSITION_H
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+class StateTransition {
+public:
+  enum ActionType {
+    ACTION_TYPE_ADD = 0,
+    ACTION_TYPE_REMOVE,
+    ACTION_TYPE_SHUFFLE,
+  };
+
+  enum State {
+    STATE_UNASSIGNED = 0, // starting (initial) state
+    STATE_ASSOCIATED,     // acquire image
+    STATE_DISASSOCIATED,  // release image
+    STATE_UPDATE_MAPPING, // update on-disk map
+    STATE_REMOVE_MAPPING, // remove on-disk map
+    STATE_COMPLETE,       // special state to invoke completion callback
+  };
+
+  struct Transition {
+    State next_state;
+    bool retry_on_error;
+
+    Transition() {
+    }
+    Transition(State next_state, bool retry_on_error)
+      : next_state(next_state),
+        retry_on_error(retry_on_error) {
+    }
+  };
+
+  static const Transition &transit(ActionType action_type, State state);
+
+private:
+  struct TransitionTable {
+    // in: action + current_state
+    ActionType action_type;
+    State current_state;
+
+    // out: Transition
+    Transition transition;
+  };
+
+  // image transition table
+  static const TransitionTable transition_table[];
+};
+
+std::ostream &operator<<(std::ostream &os, const StateTransition::ActionType &action_type);
+std::ostream &operator<<(std::ostream &os, const StateTransition::State &state);
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_IMAGE_MAP_STATE_TRANSITION_H