Option("rbd_mirror_leader_max_acquire_attempts_before_break", Option::TYPE_INT, Option::LEVEL_ADVANCED)
.set_default(3)
.set_description("number of failed attempts to acquire lock after missing heartbeats before breaking lock"),
+
+ Option("rbd_mirror_image_policy_type", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+ .set_default("simple")
+ .set_enum_allowed({"simple"})
+ .set_description("policy type for mapping images to instances"),
+
+ Option("rbd_mirror_image_policy_migration_throttle", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+ .set_default(300)
+ .set_description("number of seconds after which an image can be reshuffled (migrated) again"),
});
}
ServiceDaemon.cc
Threads.cc
types.cc
+ image_map/Action.cc
image_map/LoadRequest.cc
+ image_map/Policy.cc
+ image_map/SimplePolicy.cc
+ image_map/StateTransition.cc
image_map/UpdateRequest.cc
image_replayer/BootstrapRequest.cc
image_replayer/CloseImageRequest.cc
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <ostream>
+#include "include/Context.h"
+#include "Action.h"
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+std::ostream &operator<<(std::ostream &os, const Action &action) {
+ os << "[action_type=" << action.get_action_type() << "]";
+ return os;
+}
+
+Action::Action(StateTransition::ActionType action_type)
+ : action_type(action_type) {
+}
+
+Action Action::create_add_action(Context *on_update, Context *on_acquire, Context *on_finish) {
+ Action action(StateTransition::ACTION_TYPE_ADD);
+ action.context_map.emplace(StateTransition::STATE_UPDATE_MAPPING, on_update);
+ action.context_map.emplace(StateTransition::STATE_ASSOCIATED, on_acquire);
+ action.context_map.emplace(StateTransition::STATE_COMPLETE, on_finish);
+
+ return action;
+}
+
+Action Action::create_remove_action(Context *on_release, Context *on_remove, Context *on_finish) {
+ Action action(StateTransition::ACTION_TYPE_REMOVE);
+ action.context_map.emplace(StateTransition::STATE_DISASSOCIATED, on_release);
+ action.context_map.emplace(StateTransition::STATE_REMOVE_MAPPING, on_remove);
+ action.context_map.emplace(StateTransition::STATE_COMPLETE, on_finish);
+
+ return action;
+}
+
+Action Action::create_shuffle_action(Context *on_release, Context *on_update, Context *on_acquire,
+ Context *on_finish) {
+ Action action(StateTransition::ACTION_TYPE_SHUFFLE);
+ action.context_map.emplace(StateTransition::STATE_DISASSOCIATED, on_release);
+ action.context_map.emplace(StateTransition::STATE_UPDATE_MAPPING, on_update);
+ action.context_map.emplace(StateTransition::STATE_ASSOCIATED, on_acquire);
+ action.context_map.emplace(StateTransition::STATE_COMPLETE, on_finish);
+
+ return action;
+}
+
+StateTransition::ActionType Action::get_action_type() const {
+ return action_type;
+}
+
+void Action::execute_state_callback(StateTransition::State state) {
+ auto it = context_map.find(state);
+ if (it != context_map.end() && it->second != nullptr) {
+ it->second->complete(0);
+ }
+}
+
+void Action::execute_completion_callback(int r) {
+ auto it = context_map.find(StateTransition::STATE_COMPLETE);
+ assert(it != context_map.end());
+
+ Context *on_finish = nullptr;
+ std::swap(it->second, on_finish); // just called once so its swap'd
+
+ if (on_finish != nullptr) {
+ on_finish->complete(r);
+ }
+}
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_ACTION_H
+#define CEPH_RBD_MIRROR_IMAGE_MAP_ACTION_H
+
+#include <map>
+#include "StateTransition.h"
+
+class Context;
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+struct Action {
+public:
+ static Action create_add_action(Context *on_update, Context *on_acquire, Context *on_finish);
+ static Action create_remove_action(Context *on_release, Context *on_remove, Context *on_finish);
+ static Action create_shuffle_action(Context *on_release, Context *on_update, Context *on_acquire,
+ Context *on_finish);
+
+ void execute_state_callback(StateTransition::State state);
+ void execute_completion_callback(int r);
+
+ StateTransition::ActionType get_action_type() const;
+
+private:
+ Action(StateTransition::ActionType action_type);
+
+ StateTransition::ActionType action_type; // action type for this action
+ std::map<StateTransition::State, Context *> context_map; // map sub action type to context callback
+};
+
+std::ostream &operator<<(std::ostream &os, const Action &action);
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_IMAGE_MAP_ACTION_H
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/debug.h"
+#include "common/errno.h"
+
+#include "librbd/Utils.h"
+#include "Policy.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_map::Policy: " << this \
+ << " " << __func__
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+using librbd::util::unique_lock_name;
+
+const std::string Policy::UNMAPPED_INSTANCE_ID("");
+
+Policy::Policy(librados::IoCtx &ioctx)
+ : m_ioctx(ioctx),
+ m_map_lock(unique_lock_name("rbd::mirror::image_map::Policy::m_map_lock", this)) {
+
+ // map should at least have once instance
+ std::string instance_id = stringify(ioctx.get_instance_id());
+ add_instances({instance_id}, nullptr);
+}
+
+void Policy::init(const std::map<std::string, cls::rbd::MirrorImageMap> &image_mapping) {
+ dout(20) << dendl;
+
+ RWLock::WLocker map_lock(m_map_lock);
+
+ for (auto const &it : image_mapping) {
+ map(it.first, it.second.instance_id, utime_t(0, 0), m_map_lock);
+ }
+}
+
+std::string Policy::lookup(const std::string &global_image_id) {
+ dout(20) << ": global_image_id=" << global_image_id << dendl;
+
+ RWLock::RLocker map_lock(m_map_lock);
+ return lookup(global_image_id, m_map_lock);
+}
+
+bool Policy::add_image(const std::string &global_image_id,
+ Context *on_update, Context *on_acquire, Context *on_finish) {
+ dout(20) << ": global_image_id=" << global_image_id << dendl;
+
+ RWLock::WLocker map_lock(m_map_lock);
+
+ auto it = m_actions.find(global_image_id);
+ if (it == m_actions.end()) {
+ m_actions.emplace(global_image_id, ActionState());
+ }
+
+ Action action = Action::create_add_action(on_update, on_acquire, on_finish);
+ return queue_action(global_image_id, action);
+}
+
+bool Policy::remove_image(const std::string &global_image_id,
+ Context *on_release, Context *on_remove, Context *on_finish) {
+ dout(20) << ": global_image_id=" << global_image_id << dendl;
+
+ RWLock::WLocker map_lock(m_map_lock);
+
+ on_finish = new FunctionContext([this, global_image_id, on_finish](int r) {
+ {
+ RWLock::WLocker map_lock(m_map_lock);
+ if (!actions_pending(global_image_id, m_map_lock)) {
+ m_actions.erase(global_image_id);
+ }
+ }
+
+ if (on_finish != nullptr) {
+ on_finish->complete(r);
+ }
+ });
+
+ Action action = Action::create_remove_action(on_release, on_remove, on_finish);
+ return queue_action(global_image_id, action);
+}
+
+bool Policy::shuffle_image(const std::string &global_image_id,
+ Context *on_release, Context *on_update,
+ Context *on_acquire, Context *on_finish) {
+ dout(20) << ": global_image_id=" << global_image_id << dendl;
+
+ RWLock::WLocker map_lock(m_map_lock);
+
+ Action action = Action::create_shuffle_action(on_release, on_update, on_acquire, on_finish);
+ return queue_action(global_image_id, action);
+}
+
+void Policy::add_instances(const std::vector<std::string> &instance_ids,
+ std::set<std::string> *remap_global_image_ids) {
+ dout(20) << ": adding " << instance_ids.size() << " instance(s)" << dendl;
+
+ RWLock::WLocker map_lock(m_map_lock);
+
+ for (auto const &instance : instance_ids) {
+ dout(10) << ": adding instance_id=" << instance << dendl;
+ m_map.emplace(instance, std::set<std::string>{});
+ }
+
+ if (remap_global_image_ids != nullptr) {
+ do_shuffle_add_instances(instance_ids, remap_global_image_ids);
+ }
+}
+
+void Policy::remove_instances(const std::vector<std::string> &instance_ids,
+ std::set<std::string> *remap_global_image_ids) {
+ dout(20) << ": removing " << instance_ids.size() << " instance(s)" << dendl;
+
+ RWLock::WLocker map_lock(m_map_lock);
+
+ for (auto const &instance : instance_ids) {
+ dout(10) << ": removing instance_id=" << instance << dendl;
+ for (auto const &global_image_id : m_map[instance]) {
+ if (!remove_pending(global_image_id)) {
+ remap_global_image_ids->emplace(global_image_id);
+ }
+ }
+ }
+
+ m_dead_instances.insert(instance_ids.begin(), instance_ids.end());
+}
+
+// new actions are always started from a stable (idle) state since
+// actions either complete successfully ending up in an idle state
+// or get aborted due to peer being blacklisted.
+void Policy::start_next_action(const std::string &global_image_id) {
+ RWLock::WLocker map_lock(m_map_lock);
+
+ auto it = m_actions.find(global_image_id);
+ assert(it != m_actions.end());
+ assert(!it->second.actions.empty());
+
+ ActionState &action_state = it->second;
+ Action &action = action_state.actions.front();
+
+ StateTransition::ActionType action_type = action.get_action_type();
+ action_state.transition = StateTransition::transit(action_type, action_state.current_state);
+
+ dout(10) << ": global_image_id=" << global_image_id << ", action=" << action
+ << ", currrent_state=" << action_state.current_state << ", next_state="
+ << action_state.transition.next_state << dendl;
+
+ // invoke state context callback
+ pre_execute_state_callback(global_image_id, action_type, action_state.transition.next_state);
+ m_map_lock.put_write();
+ action.execute_state_callback(action_state.transition.next_state);
+ m_map_lock.get_write();
+}
+
+bool Policy::finish_action(const std::string &global_image_id, int r) {
+ RWLock::WLocker map_lock(m_map_lock);
+
+ dout(10) << ": global_image_id=" << global_image_id << ", r=" << r
+ << dendl;
+
+ auto it = m_actions.find(global_image_id);
+ assert(it != m_actions.end());
+ assert(!it->second.actions.empty());
+
+ ActionState &action_state = it->second;
+ Action &action = action_state.actions.front();
+
+ bool complete;
+ if (r == 0) {
+ post_execute_state_callback(global_image_id, action_state.transition.next_state);
+ complete = perform_transition(&action_state, action.get_action_type());
+ } else {
+ complete = abort_or_retry(&action_state);
+ }
+
+ if (complete) {
+ dout(10) << ": completing action=" << action << dendl;
+
+ m_map_lock.put_write();
+ action.execute_completion_callback(r);
+ m_map_lock.get_write();
+
+ action_state.last_idle_state.reset();
+ action_state.actions.pop_front();
+ }
+
+ return !action_state.actions.empty();
+}
+
+bool Policy::queue_action(const std::string &global_image_id, const Action &action) {
+ dout(20) << ": global_image_id=" << global_image_id << ", action=" << action
+ << dendl;
+ assert(m_map_lock.is_wlocked());
+
+ auto it = m_actions.find(global_image_id);
+ assert(it != m_actions.end());
+
+ it->second.actions.push_back(action);
+ return it->second.actions.size() == 1;
+}
+
+bool Policy::is_transition_complete(StateTransition::ActionType action_type, StateTransition::State *state) {
+ assert(m_map_lock.is_locked());
+
+ dout(10) << ": action_type=" << action_type << ", state=" << *state << dendl;
+
+ bool complete = false;
+ switch (action_type) {
+ case StateTransition::ACTION_TYPE_ADD:
+ case StateTransition::ACTION_TYPE_SHUFFLE:
+ complete = *state == StateTransition::STATE_ASSOCIATED;
+ break;
+ case StateTransition::ACTION_TYPE_REMOVE:
+ if (*state == StateTransition::STATE_REMOVE_MAPPING) {
+ complete = true;
+ *state = StateTransition::STATE_UNASSIGNED;
+ }
+ break;
+ default:
+ derr << "UNKNOWN (" << static_cast<uint32_t>(action_type) << ")" << dendl;
+ assert(false);
+ }
+
+ return complete;
+}
+
+bool Policy::perform_transition(ActionState *action_state, StateTransition::ActionType action_type) {
+ dout(20) << dendl;
+ assert(m_map_lock.is_wlocked());
+
+ StateTransition::State state;
+
+ bool complete = is_transition_complete(action_type, &state);
+ dout(10) << ": advancing state: " << action_state->current_state << " -> "
+ << state << dendl;
+
+ action_state->current_state = state;
+ if (is_idle_state(state)) {
+ action_state->last_idle_state = state;
+ dout(10) << ": tranisition reached idle state=" << state << dendl;
+ }
+
+ return complete;
+}
+
+bool Policy::abort_or_retry(ActionState *action_state) {
+ dout(20) << dendl;
+ assert(m_map_lock.is_wlocked());
+
+ bool complete = !action_state->transition.retry_on_error;
+ if (complete && action_state->last_idle_state) {
+ dout(10) << ": using last idle state=" << action_state->last_idle_state.get()
+ << " as current state" << dendl;
+ action_state->current_state = action_state->last_idle_state.get();
+ }
+
+ return complete;
+}
+
+void Policy::pre_execute_state_callback(const std::string &global_image_id,
+ StateTransition::ActionType action_type,
+ StateTransition::State state) {
+ assert(m_map_lock.is_wlocked());
+
+ dout(10) << ": global_image_id=" << global_image_id << ", action_type="
+ << action_type << ", state=" << state << dendl;
+
+ utime_t map_time = generate_image_map_timestamp(action_type);
+ switch (state) {
+ case StateTransition::STATE_UPDATE_MAPPING:
+ map(global_image_id, map_time);
+ break;
+ case StateTransition::STATE_ASSOCIATED:
+ case StateTransition::STATE_DISASSOCIATED:
+ case StateTransition::STATE_REMOVE_MAPPING:
+ break;
+ case StateTransition::STATE_UNASSIGNED:
+ default:
+ assert(false);
+ }
+}
+
+void Policy::post_execute_state_callback(const std::string &global_image_id, StateTransition::State state) {
+ assert(m_map_lock.is_wlocked());
+
+ dout(10) << ": global_image_id=" << global_image_id << ", state=" << state << dendl;
+
+ switch (state) {
+ case StateTransition::STATE_DISASSOCIATED:
+ unmap(global_image_id);
+ break;
+ case StateTransition::STATE_ASSOCIATED:
+ case StateTransition::STATE_UPDATE_MAPPING:
+ case StateTransition::STATE_REMOVE_MAPPING:
+ break;
+ case StateTransition::STATE_UNASSIGNED:
+ default:
+ assert(false);
+ }
+}
+
+bool Policy::actions_pending(const std::string &global_image_id, const RWLock &lock) {
+ dout(20) << ": global_image_id=" << global_image_id << dendl;
+ assert(m_map_lock.is_locked());
+
+ auto it = m_actions.find(global_image_id);
+ assert(it != m_actions.end());
+
+ return !it->second.actions.empty();
+}
+
+bool Policy::remove_pending(const std::string &global_image_id) {
+ dout(20) << ": global_image_id=" << global_image_id << dendl;
+ assert(m_map_lock.is_locked());
+
+ auto it = m_actions.find(global_image_id);
+ assert(it != m_actions.end());
+
+ auto r_it = std::find_if(it->second.actions.rbegin(), it->second.actions.rend(),
+ [](const Action &action) {
+ return (action.get_action_type() == StateTransition::ACTION_TYPE_REMOVE);
+ });
+ return r_it != it->second.actions.rend();
+}
+
+std::string Policy::lookup(const std::string &global_image_id, const RWLock &lock) {
+ assert(m_map_lock.is_locked());
+
+ for (auto it = m_map.begin(); it != m_map.end(); ++it) {
+ if (it->second.find(global_image_id) != it->second.end()) {
+ return it->first;
+ }
+ }
+
+ return UNMAPPED_INSTANCE_ID;
+}
+
+void Policy::map(const std::string &global_image_id, const std::string &instance_id,
+ utime_t map_time, const RWLock &lock) {
+ assert(m_map_lock.is_wlocked());
+
+ auto ins = m_map[instance_id].emplace(global_image_id);
+ assert(ins.second);
+
+ auto it = m_actions.find(global_image_id);
+ assert(it != m_actions.end());
+ it->second.map_time = map_time;
+}
+
+void Policy::unmap(const std::string &global_image_id, const std::string &instance_id,
+ const RWLock &lock) {
+ assert(m_map_lock.is_wlocked());
+
+ m_map[instance_id].erase(global_image_id);
+
+ if (is_dead_instance(instance_id) && m_map[instance_id].empty()) {
+ dout(10) << ": removing dead instance_id=" << instance_id << dendl;
+ m_map.erase(instance_id);
+ m_dead_instances.erase(instance_id);
+ }
+}
+
+void Policy::map(const std::string &global_image_id, utime_t map_time) {
+ dout(20) << ": global_image_id=" << global_image_id << dendl;
+ assert(m_map_lock.is_wlocked());
+
+ std::string instance_id = lookup(global_image_id, m_map_lock);
+ if (instance_id != UNMAPPED_INSTANCE_ID && !is_dead_instance(instance_id)) {
+ return;
+ }
+ if (is_dead_instance(instance_id)) {
+ unmap(global_image_id, instance_id, m_map_lock);
+ }
+
+ instance_id = do_map(global_image_id);
+ map(global_image_id, instance_id, map_time, m_map_lock);
+}
+
+void Policy::unmap(const std::string &global_image_id) {
+ dout(20) << ": global_image_id=" << global_image_id << dendl;
+ assert(m_map_lock.is_wlocked());
+
+ std::string instance_id = lookup(global_image_id, m_map_lock);
+ if (instance_id == UNMAPPED_INSTANCE_ID) {
+ return;
+ }
+
+ unmap(global_image_id, instance_id, m_map_lock);
+}
+
+bool Policy::can_shuffle_image(const std::string &global_image_id) {
+ dout(20) << ": global_image_id=" << global_image_id << dendl;
+ assert(m_map_lock.is_locked());
+
+ CephContext *cct = reinterpret_cast<CephContext *>(m_ioctx.cct());
+ int migration_throttle = cct->_conf->get_val<int64_t>("rbd_mirror_image_policy_migration_throttle");
+
+ auto it = m_actions.find(global_image_id);
+ assert(it != m_actions.end());
+
+ utime_t last_shuffled_time = it->second.map_time;
+ dout(10) << ": migration_throttle=" << migration_throttle << ", last_shuffled_time="
+ << last_shuffled_time << dendl;
+
+ utime_t now = ceph_clock_now();
+ return !actions_pending(global_image_id, m_map_lock) &&
+ !(migration_throttle > 0 && (now - last_shuffled_time < migration_throttle));
+}
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_POLICY_H
+#define CEPH_RBD_MIRROR_IMAGE_MAP_POLICY_H
+
+#include <map>
+#include <tuple>
+#include <boost/optional.hpp>
+
+#include "common/RWLock.h"
+#include "cls/rbd/cls_rbd_types.h"
+#include "include/rados/librados.hpp"
+#include "Action.h"
+
+class Context;
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+class Policy {
+public:
+ Policy(librados::IoCtx &ioctx);
+
+ virtual ~Policy() {
+ }
+
+ // init -- called during initialization
+ void init(const std::map<std::string, cls::rbd::MirrorImageMap> &image_mapping);
+
+ // lookup an image from the map
+ std::string lookup(const std::string &global_image_id);
+
+ // add, remove, shuffle
+ bool add_image(const std::string &global_image_id,
+ Context *on_update, Context *on_acquire, Context *on_finish);
+ bool remove_image(const std::string &global_image_id,
+ Context *on_release, Context *on_remove, Context *on_finish);
+ bool shuffle_image(const std::string &global_image_id,
+ Context *on_release, Context *on_update,
+ Context *on_acquire, Context *on_finish);
+
+ // shuffle images when instances are added/removed
+ void add_instances(const std::vector<std::string> &instance_ids,
+ std::set<std::string> *remap_global_image_ids);
+ void remove_instances(const std::vector<std::string> &instance_ids,
+ std::set<std::string> *remap_global_image_ids);
+
+ void start_next_action(const std::string &global_image_id);
+ bool finish_action(const std::string &global_image_id, int r);
+
+ static const std::string UNMAPPED_INSTANCE_ID;
+
+private:
+ typedef std::list<Action> Actions;
+
+ struct ActionState {
+ Actions actions; // list of pending actions
+
+ StateTransition::State current_state = StateTransition::STATE_UNASSIGNED; // current state
+ boost::optional<StateTransition::State> last_idle_state; // last successfull idle
+ // state transition
+
+ StateTransition::Transition transition; // (cached) next transition
+
+ utime_t map_time; // (re)mapped time
+ };
+
+ librados::IoCtx &m_ioctx;
+ std::map<std::string, ActionState> m_actions;
+ std::set<std::string> m_dead_instances;
+
+ bool is_idle_state(StateTransition::State state) {
+ if (state == StateTransition::STATE_ASSOCIATED ||
+ state == StateTransition::STATE_DISASSOCIATED) {
+ return true;
+ }
+
+ return false;
+ }
+
+ // generate image map time based on action type
+ utime_t generate_image_map_timestamp(StateTransition::ActionType action_type) {
+ // for a shuffle action (image remap) use current time as
+ // map time, historical time otherwise.
+ utime_t time;
+ if (action_type == StateTransition::ACTION_TYPE_SHUFFLE) {
+ time = ceph_clock_now();
+ } else {
+ time = utime_t(0, 0);
+ }
+
+ return time;
+ }
+
+ bool queue_action(const std::string &global_image_id, const Action &action);
+ bool actions_pending(const std::string &global_image_id, const RWLock &lock);
+ bool remove_pending(const std::string &glolbal_image_id);
+
+ std::string lookup(const std::string &global_image_id, const RWLock &lock);
+ void map(const std::string &global_image_id,
+ const std::string &instance_id, utime_t map_time, const RWLock &lock);
+ void unmap(const std::string &global_image_id, const std::string &instance_id, const RWLock &lock);
+
+ // map an image
+ void map(const std::string &global_image_id, utime_t map_time);
+ // unmap (remove) an image from the map
+ void unmap(const std::string &global_image_id);
+
+ // state transition related..
+ void pre_execute_state_callback(const std::string &global_image_id,
+ StateTransition::ActionType action_type, StateTransition::State state);
+ void post_execute_state_callback(const std::string &global_image_id, StateTransition::State state);
+
+ bool is_transition_complete(StateTransition::ActionType action_type, StateTransition::State *state);
+ bool perform_transition(ActionState *action_state, StateTransition::ActionType action_type);
+ bool abort_or_retry(ActionState *action_state);
+
+protected:
+ typedef std::map<std::string, std::set<std::string> > InstanceToImageMap;
+
+ RWLock m_map_lock; // protects m_map, m_shuffled_timestamp
+ InstanceToImageMap m_map; // instance_id -> global_id map
+
+ bool is_dead_instance(const std::string instance_id) {
+ assert(m_map_lock.is_locked());
+ return m_dead_instances.find(instance_id) != m_dead_instances.end();
+ }
+
+ bool can_shuffle_image(const std::string &global_image_id);
+
+ // map an image (global image id) to an instance
+ virtual std::string do_map(const std::string &global_image_id) = 0;
+
+ // shuffle images when instances are added/removed
+ virtual void do_shuffle_add_instances(const std::vector<std::string> &instance_ids,
+ std::set<std::string> *remap_global_image_ids) = 0;
+};
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_IMAGE_MAP_POLICY_H
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/debug.h"
+#include "common/errno.h"
+
+#include "SimplePolicy.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_map::SimplePolicy: " << this \
+ << " " << __func__
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+SimplePolicy::SimplePolicy(librados::IoCtx &ioctx)
+ : Policy(ioctx) {
+}
+
+uint64_t SimplePolicy::calc_images_per_instance(int nr_instances) {
+ assert(nr_instances > 0);
+
+ uint64_t nr_images = 0;
+ for (auto const &it : m_map) {
+ if (!Policy::is_dead_instance(it.first)) {
+ nr_images += it.second.size();
+ }
+ }
+
+ uint64_t images_per_instance = nr_images / nr_instances;
+ if (images_per_instance == 0) {
+ ++images_per_instance;
+ }
+
+ return images_per_instance;
+}
+
+void SimplePolicy::do_shuffle_add_instances(const std::vector<std::string> &instance_ids,
+ std::set<std::string> *remap_global_image_ids) {
+ assert(m_map_lock.is_wlocked());
+
+ uint64_t images_per_instance = calc_images_per_instance(m_map.size());
+ dout(5) << ": images per instance=" << images_per_instance << dendl;
+
+ for (auto const &instance : m_map) {
+ if (instance.second.size() <= images_per_instance) {
+ continue;
+ }
+
+ auto it = instance.second.begin();
+ uint64_t cut_off = instance.second.size() - images_per_instance;
+
+ while (it != instance.second.end() && cut_off > 0) {
+ if (Policy::can_shuffle_image(*it)) {
+ --cut_off;
+ remap_global_image_ids->emplace(*it);
+ }
+
+ ++it;
+ }
+ }
+}
+
+std::string SimplePolicy::do_map(const std::string &global_image_id) {
+ assert(m_map_lock.is_wlocked());
+
+ auto min_it = m_map.begin();
+
+ for (auto it = min_it; it != m_map.end(); ++it) {
+ assert(it->second.find(global_image_id) == it->second.end());
+ if (it->second.size() < min_it->second.size() && !Policy::is_dead_instance(it->first)) {
+ min_it = it;
+ }
+ }
+
+ dout(20) << ": global_image_id=" << global_image_id << " maps to instance_id="
+ << min_it->first << dendl;
+ return min_it->first;
+}
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_SIMPLE_POLICY_H
+#define CEPH_RBD_MIRROR_IMAGE_MAP_SIMPLE_POLICY_H
+
+#include "Policy.h"
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+class SimplePolicy : public Policy {
+public:
+ static SimplePolicy *create(librados::IoCtx &ioctx) {
+ return new SimplePolicy(ioctx);
+ }
+
+protected:
+ using Policy::m_map_lock;
+ using Policy::m_map;
+
+ SimplePolicy(librados::IoCtx &ioctx);
+
+ std::string do_map(const std::string &global_image_id) override;
+
+ void do_shuffle_add_instances(const std::vector<std::string> &instance_ids,
+ std::set<std::string> *remap_global_image_ids) override;
+
+private:
+ uint64_t calc_images_per_instance(int nr_instances);
+};
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_IMAGE_MAP_SIMPLE_POLICY_H
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <ostream>
+#include "include/assert.h"
+#include "StateTransition.h"
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+std::ostream &operator<<(std::ostream &os, const StateTransition::ActionType &action_type) {
+ switch (action_type) {
+ case StateTransition::ACTION_TYPE_ADD:
+ os << "ADD_IMAGE";
+ break;
+ case StateTransition::ACTION_TYPE_REMOVE:
+ os << "REMOVE_IMAGE";
+ break;
+ case StateTransition::ACTION_TYPE_SHUFFLE:
+ os << "SHUFFLE_IMAGE";
+ break;
+ default:
+ os << "UNKNOWN (" << static_cast<uint32_t>(action_type) << ")";
+ }
+
+ return os;
+}
+
+std::ostream &operator<<(std::ostream &os, const StateTransition::State &state) {
+ switch(state) {
+ case StateTransition::STATE_UNASSIGNED:
+ os << "UNASSIGNED";
+ break;
+ case StateTransition::STATE_ASSOCIATED:
+ os << "ASSOCIATED";
+ break;
+ case StateTransition::STATE_DISASSOCIATED:
+ os << "DISASSOCIATED";
+ break;
+ case StateTransition::STATE_UPDATE_MAPPING:
+ os << "UPDATE_MAPPING";
+ break;
+ case StateTransition::STATE_REMOVE_MAPPING:
+ os << "REMOVE_MAPPING";
+ break;
+ case StateTransition::STATE_COMPLETE:
+ os << "COMPLETE";
+ break;
+ }
+
+ return os;
+}
+
+const StateTransition::TransitionTable StateTransition::transition_table[] = {
+ // action_type current_state Transition
+ // -------------------------------------------------------------------------------
+ ACTION_TYPE_ADD, STATE_UNASSIGNED, Transition(STATE_UPDATE_MAPPING, true),
+ ACTION_TYPE_ADD, STATE_ASSOCIATED, Transition(STATE_ASSOCIATED, false),
+ ACTION_TYPE_ADD, STATE_DISASSOCIATED, Transition(STATE_UPDATE_MAPPING, true),
+ ACTION_TYPE_ADD, STATE_UPDATE_MAPPING, Transition(STATE_ASSOCIATED, false),
+
+ ACTION_TYPE_REMOVE, STATE_ASSOCIATED, Transition(STATE_DISASSOCIATED, false),
+ ACTION_TYPE_REMOVE, STATE_DISASSOCIATED, Transition(STATE_REMOVE_MAPPING, true),
+
+ ACTION_TYPE_SHUFFLE, STATE_ASSOCIATED, Transition(STATE_DISASSOCIATED, false),
+ ACTION_TYPE_SHUFFLE, STATE_DISASSOCIATED, Transition(STATE_UPDATE_MAPPING, true),
+ ACTION_TYPE_SHUFFLE, STATE_UPDATE_MAPPING, Transition(STATE_ASSOCIATED, false),
+};
+
+const StateTransition::Transition &StateTransition::transit(ActionType action_type, State state) {
+ for (auto const &entry : transition_table) {
+ if (entry.action_type == action_type && entry.current_state == state) {
+ return entry.transition;
+ }
+ }
+
+ assert(false);
+}
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_STATE_TRANSITION_H
+#define CEPH_RBD_MIRROR_IMAGE_MAP_STATE_TRANSITION_H
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+class StateTransition {
+public:
+ enum ActionType {
+ ACTION_TYPE_ADD = 0,
+ ACTION_TYPE_REMOVE,
+ ACTION_TYPE_SHUFFLE,
+ };
+
+ enum State {
+ STATE_UNASSIGNED = 0, // starting (initial) state
+ STATE_ASSOCIATED, // acquire image
+ STATE_DISASSOCIATED, // release image
+ STATE_UPDATE_MAPPING, // update on-disk map
+ STATE_REMOVE_MAPPING, // remove on-disk map
+ STATE_COMPLETE, // special state to invoke completion callback
+ };
+
+ struct Transition {
+ State next_state;
+ bool retry_on_error;
+
+ Transition() {
+ }
+ Transition(State next_state, bool retry_on_error)
+ : next_state(next_state),
+ retry_on_error(retry_on_error) {
+ }
+ };
+
+ static const Transition &transit(ActionType action_type, State state);
+
+private:
+ struct TransitionTable {
+ // in: action + current_state
+ ActionType action_type;
+ State current_state;
+
+ // out: Transition
+ Transition transition;
+ };
+
+ // image transition table
+ static const TransitionTable transition_table[];
+};
+
+std::ostream &operator<<(std::ostream &os, const StateTransition::ActionType &action_type);
+std::ostream &operator<<(std::ostream &os, const StateTransition::State &state);
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_IMAGE_MAP_STATE_TRANSITION_H