From: John Spray Date: Thu, 24 Mar 2016 13:23:26 +0000 (+0000) Subject: mds: simplify standby/standby-replay logic X-Git-Tag: v11.0.0~711^2~3 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=2a2a56b2d0e9e4224cae27f80d990b787e209bed;p=ceph-ci.git mds: simplify standby/standby-replay logic This used to use an arcane set of constants in standby_for_rank, combined with daemons sometimes sending requests to enter state STANDBY_REPLAY. Simplify this so that there is only one place we put daemons into standby replay, and that is in tick->maybe_promote_staandby. There is a behavioural change in here, which is that we used to sometimes promote standbys to be standby-replay, even if they didn't have "standby replay = true", when they did have a standby_for_rank or standby_for_name set. I'm calling that a bug, and making it so that daemons will only go into standby-replay if "standby replay = true" is set. Signed-off-by: John Spray --- diff --git a/src/mds/Beacon.cc b/src/mds/Beacon.cc index 9a07b915974..06020af1053 100644 --- a/src/mds/Beacon.cc +++ b/src/mds/Beacon.cc @@ -34,10 +34,10 @@ Beacon::Beacon(CephContext *cct_, MonClient *monc_, std::string name_) : Dispatcher(cct_), lock("Beacon"), monc(monc_), timer(g_ceph_context, lock), - name(name_), standby_for_rank(MDSMap::MDS_NO_STANDBY_PREF), - standby_for_fscid(FS_CLUSTER_ID_NONE), awaiting_seq(-1) + name(name_), standby_for_rank(MDS_RANK_NONE), + standby_for_fscid(FS_CLUSTER_ID_NONE), want_state(MDSMap::STATE_BOOT), + awaiting_seq(-1) { - want_state = MDSMap::STATE_NULL; last_seq = 0; sender = NULL; was_laggy = false; @@ -51,18 +51,16 @@ Beacon::~Beacon() } -void Beacon::init(MDSMap const *mdsmap, MDSMap::DaemonState want_state_, - mds_rank_t standby_rank_, std::string const & standby_name_, - fs_cluster_id_t standby_fscid_) +void Beacon::init(MDSMap const *mdsmap) { Mutex::Locker l(lock); assert(mdsmap != NULL); - want_state = want_state_; _notify_mdsmap(mdsmap); - standby_for_rank = standby_rank_; - standby_for_name = standby_name_; - standby_for_fscid = standby_fscid_; + standby_for_rank = mds_rank_t(g_conf->mds_standby_for_rank); + standby_for_name = g_conf->mds_standby_for_name; + standby_for_fscid = fs_cluster_id_t(g_conf->mds_standby_for_fscid); + standby_replay = g_conf->mds_standby_replay; // Spawn threads and start messaging timer.init(); @@ -210,6 +208,7 @@ void Beacon::_send() beacon->set_standby_for_rank(standby_for_rank); beacon->set_standby_for_name(standby_for_name); beacon->set_standby_for_fscid(standby_for_fscid); + beacon->set_standby_replay(standby_replay); beacon->set_health(health); beacon->set_compat(compat); // piggyback the sys info on beacon msg diff --git a/src/mds/Beacon.h b/src/mds/Beacon.h index e8368cfe471..1a29f24f6a7 100644 --- a/src/mds/Beacon.h +++ b/src/mds/Beacon.h @@ -51,6 +51,7 @@ class Beacon : public Dispatcher mds_rank_t standby_for_rank; std::string standby_for_name; fs_cluster_id_t standby_for_fscid; + bool standby_replay; MDSMap::DaemonState want_state; // Internal beacon state @@ -86,9 +87,7 @@ public: Beacon(CephContext *cct_, MonClient *monc_, std::string name); ~Beacon(); - void init(MDSMap const *mdsmap, MDSMap::DaemonState want_state_, - mds_rank_t standby_rank_, std::string const &standby_name_, - fs_cluster_id_t standby_fscid_); + void init(MDSMap const *mdsmap); void shutdown(); bool ms_dispatch(Message *m); diff --git a/src/mds/FSMap.cc b/src/mds/FSMap.cc index a20bb09b8ce..c704c20e687 100644 --- a/src/mds/FSMap.cc +++ b/src/mds/FSMap.cc @@ -553,10 +553,10 @@ mds_gid_t FSMap::find_unused(fs_cluster_id_t fscid, info.standby_for_fscid != fscid) continue; - if ((info.standby_for_rank == MDSMap::MDS_NO_STANDBY_PREF || - info.standby_for_rank == MDSMap::MDS_MATCHED_ACTIVE || - (info.standby_for_rank == MDSMap::MDS_STANDBY_ANY - && force_standby_active))) { + // To be considered 'unused' a daemon must either not + // be selected for standby-replay or the force_standby_active + // setting must be enabled to use replay daemons anyway. + if (!info.standby_replay || force_standby_active) { return gid; } } diff --git a/src/mds/MDSDaemon.cc b/src/mds/MDSDaemon.cc index caa33f16440..e7d9fe51b13 100644 --- a/src/mds/MDSDaemon.cc +++ b/src/mds/MDSDaemon.cc @@ -120,8 +120,6 @@ MDSDaemon::MDSDaemon(const std::string &n, Messenger *m, MonClient *mc) : log_client(m->cct, messenger, &mc->monmap, LogClient::NO_FLAGS), mds_rank(NULL), tick_event(0), - standby_for_rank(MDSMap::MDS_NO_STANDBY_PREF), - standby_replay(false), asok_hook(NULL) { orig_argc = 0; @@ -545,26 +543,7 @@ int MDSDaemon::init() timer.init(); - MDSMap::DaemonState wanted_state = MDSMap::STATE_BOOT; - if (g_conf->mds_standby_replay) { - standby_replay = true; - } - - standby_for_rank = mds_rank_t(g_conf->mds_standby_for_rank); - standby_for_name.assign(g_conf->mds_standby_for_name); - - if (standby_replay && standby_for_rank == -1) { - if (standby_for_name.empty()) - standby_for_rank = MDSMap::MDS_STANDBY_ANY; - else - standby_for_rank = MDSMap::MDS_STANDBY_NAME; - } else if (!standby_replay && !standby_for_name.empty()) { - standby_for_rank = MDSMap::MDS_MATCHED_ACTIVE; - } - - beacon.init(mdsmap, wanted_state, - standby_for_rank, standby_for_name, - fs_cluster_id_t(g_conf->mds_standby_for_fscid)); + beacon.init(mdsmap); messenger->set_myname(entity_name_t::MDS(MDS_RANK_NONE)); // schedule tick @@ -1037,11 +1016,6 @@ void MDSDaemon::_handle_mds_map(MDSMap *oldmap) beacon.set_want_state(mdsmap, new_state); dout(1) << "handle_mds_map standby" << dendl; - if (standby_replay) { - // we want to be in standby_replay - beacon.set_want_state(mdsmap, MDSMap::STATE_STANDBY_REPLAY); - beacon.send(); - } return; } diff --git a/src/mds/MDSDaemon.h b/src/mds/MDSDaemon.h index 05a056d2748..4d6296c5df2 100644 --- a/src/mds/MDSDaemon.h +++ b/src/mds/MDSDaemon.h @@ -141,10 +141,6 @@ class MDSDaemon : public Dispatcher, public md_config_obs_t { void wait_for_omap_osds(); - mds_rank_t standby_for_rank; - string standby_for_name; - bool standby_replay; - private: bool ms_dispatch(Message *m); bool ms_get_authorizer(int dest_type, AuthAuthorizer **authorizer, bool force_new); diff --git a/src/mds/MDSMap.cc b/src/mds/MDSMap.cc index f66fc7a85e4..12db94b858f 100644 --- a/src/mds/MDSMap.cc +++ b/src/mds/MDSMap.cc @@ -19,11 +19,6 @@ using std::stringstream; -const mds_rank_t MDSMap::MDS_NO_STANDBY_PREF(-1); -const mds_rank_t MDSMap::MDS_STANDBY_ANY(-2); -const mds_rank_t MDSMap::MDS_STANDBY_NAME(-3); -const mds_rank_t MDSMap::MDS_MATCHED_ACTIVE(-4); - // features CompatSet get_mdsmap_compat_set_all() { CompatSet::FeatureSet feature_compat; @@ -83,6 +78,7 @@ void MDSMap::mds_info_t::dump(Formatter *f) const f->dump_int("standby_for_rank", standby_for_rank); f->dump_int("standby_for_fscid", standby_for_fscid); f->dump_string("standby_for_name", standby_for_name); + f->dump_bool("standby_replay", standby_replay); f->open_array_section("export_targets"); for (set::iterator p = export_targets.begin(); p != export_targets.end(); ++p) { @@ -408,7 +404,7 @@ void MDSMap::get_health(list >& summary, void MDSMap::mds_info_t::encode_versioned(bufferlist& bl, uint64_t features) const { - ENCODE_START(6, 4, bl); + ENCODE_START(7, 4, bl); ::encode(global_id, bl); ::encode(name, bl); ::encode(rank, bl); @@ -422,6 +418,7 @@ void MDSMap::mds_info_t::encode_versioned(bufferlist& bl, uint64_t features) con ::encode(export_targets, bl); ::encode(mds_features, bl); ::encode(standby_for_fscid, bl); + ::encode(standby_replay, bl); ENCODE_FINISH(bl); } @@ -444,7 +441,7 @@ void MDSMap::mds_info_t::encode_unversioned(bufferlist& bl) const void MDSMap::mds_info_t::decode(bufferlist::iterator& bl) { - DECODE_START_LEGACY_COMPAT_LEN(6, 4, 4, bl); + DECODE_START_LEGACY_COMPAT_LEN(7, 4, 4, bl); ::decode(global_id, bl); ::decode(name, bl); ::decode(rank, bl); @@ -462,6 +459,9 @@ void MDSMap::mds_info_t::decode(bufferlist::iterator& bl) if (struct_v >= 6) { ::decode(standby_for_fscid, bl); } + if (struct_v >= 7) { + ::decode(standby_replay, bl); + } DECODE_FINISH(bl); } diff --git a/src/mds/MDSMap.h b/src/mds/MDSMap.h index eda4fc7bce4..9a37fed768c 100644 --- a/src/mds/MDSMap.h +++ b/src/mds/MDSMap.h @@ -118,16 +118,6 @@ public: */ } DaemonState; - // indicate startup standby preferences for MDS - // of course, if they have a specific rank to follow, they just set that! - static const mds_rank_t MDS_NO_STANDBY_PREF; // doesn't have instructions to do anything - static const mds_rank_t MDS_STANDBY_ANY; // is instructed to be standby-replay, may - // or may not have specific name to follow - static const mds_rank_t MDS_STANDBY_NAME; // standby for a named MDS - static const mds_rank_t MDS_MATCHED_ACTIVE; // has a matched standby, which if up - // it should follow, but otherwise should - // be assigned a rank - struct mds_info_t { mds_gid_t global_id; std::string name; @@ -140,12 +130,15 @@ public: mds_rank_t standby_for_rank; std::string standby_for_name; fs_cluster_id_t standby_for_fscid; + bool standby_replay; std::set export_targets; uint64_t mds_features; - mds_info_t() : global_id(MDS_GID_NONE), rank(MDS_RANK_NONE), inc(0), state(STATE_STANDBY), state_seq(0), - standby_for_rank(MDS_NO_STANDBY_PREF), - standby_for_fscid(FS_CLUSTER_ID_NONE) + mds_info_t() : global_id(MDS_GID_NONE), rank(MDS_RANK_NONE), inc(0), + state(STATE_STANDBY), state_seq(0), + standby_for_rank(MDS_RANK_NONE), + standby_for_fscid(FS_CLUSTER_ID_NONE), + standby_replay(false) { } bool laggy() const { return !(laggy_since == utime_t()); } diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc index ea1582995f2..d2b08cadd5c 100644 --- a/src/mon/MDSMonitor.cc +++ b/src/mon/MDSMonitor.cc @@ -529,6 +529,7 @@ bool MDSMonitor::prepare_beacon(MonOpRequestRef op) new_info.standby_for_rank = m->get_standby_for_rank(); new_info.standby_for_name = m->get_standby_for_name(); new_info.standby_for_fscid = m->get_standby_for_fscid(); + new_info.standby_replay = m->get_standby_replay(); pending_fsmap.insert(new_info); } @@ -567,6 +568,15 @@ bool MDSMonitor::prepare_beacon(MonOpRequestRef op) } else { // state update const MDSMap::mds_info_t &info = pending_fsmap.get_info_gid(gid); + // Old MDS daemons don't mention that they're standby replay until + // after they've sent their boot beacon, so update this field. + if (info.standby_replay != m->get_standby_replay()) { + pending_fsmap.modify_daemon(info.global_id, [&m]( + MDSMap::mds_info_t *i) + { + i->standby_replay = m->get_standby_replay(); + }); + } if (info.state == MDSMap::STATE_STOPPING && state != MDSMap::STATE_STOPPED ) { // we can't transition to any other states from STOPPING @@ -593,67 +603,6 @@ bool MDSMonitor::prepare_beacon(MonOpRequestRef op) if (state == MDSMap::STATE_STOPPED) { pending_fsmap.stop(gid); last_beacon.erase(gid); - } else if (state == MDSMap::STATE_STANDBY_REPLAY) { - if (m->get_standby_for_rank() == MDSMap::MDS_STANDBY_NAME) { - dout(20) << "looking for mds " << m->get_standby_for_name() - << " to STANDBY_REPLAY for" << dendl; - auto target_info = pending_fsmap.find_by_name(m->get_standby_for_name()); - if (target_info == nullptr) { - // This name is unknown, do nothing, stay in standby - return false; - } - - auto target_ns = pending_fsmap.mds_roles.at(target_info->global_id); - if (target_ns == FS_CLUSTER_ID_NONE) { - // The named daemon is not in a Filesystem, do nothing. - return false; - } - - auto target_fs = pending_fsmap.get_filesystem(target_ns); - if (target_fs->mds_map.is_followable(info.rank)) { - dout(10) <<" found mds " << m->get_standby_for_name() - << "; it has rank " << target_info->rank << dendl; - pending_fsmap.modify_daemon(info.global_id, - [target_info, target_ns, seq](MDSMap::mds_info_t *info) { - info->standby_for_rank = target_info->rank; - info->standby_for_fscid = target_ns; - info->state = MDSMap::STATE_STANDBY_REPLAY; - info->state_seq = seq; - }); - } else { - // The named daemon has a rank but isn't followable, do nothing - return false; - } - } else if (m->get_standby_for_rank() >= 0) { - fs_cluster_id_t target_ns = m->get_standby_for_fscid(); - - mds_role_t target_role = { - target_ns == FS_CLUSTER_ID_NONE ? - pending_fsmap.legacy_client_fscid : info.standby_for_fscid, - m->get_standby_for_rank()}; - - if (target_role.fscid != FS_CLUSTER_ID_NONE) { - auto fs = pending_fsmap.get_filesystem(target_role.fscid); - if (fs->mds_map.is_followable(target_role.rank)) { - pending_fsmap.modify_daemon(info.global_id, - [target_role, seq](MDSMap::mds_info_t *info) { - info->standby_for_rank = target_role.rank; - info->standby_for_fscid = target_role.fscid; - info->state = MDSMap::STATE_STANDBY_REPLAY; - info->state_seq = seq; - }); - } else { - // We know who they want to follow, but he's not in a suitable state - return false; - } - } else { - // Couldn't resolve to a particular filesystem - return false; - } - } else { //it's a standby for anybody, and is already in the list - assert(pending_fsmap.get_mds_info().count(info.global_id)); - return false; - } } else if (state == MDSMap::STATE_DAMAGED) { if (!mon->osdmon()->is_writeable()) { dout(4) << __func__ << ": DAMAGED from rank " << info.rank @@ -2722,8 +2671,8 @@ bool MDSMonitor::maybe_expand_cluster(std::shared_ptr fs) while (fs->mds_map.is_in(mds)) { mds++; } - mds_gid_t newgid = pending_fsmap.find_replacement_for({fs->fscid, mds}, name, - g_conf->mon_force_standby_active); + mds_gid_t newgid = pending_fsmap.find_replacement_for({fs->fscid, mds}, + name, g_conf->mon_force_standby_active); if (newgid == MDS_GID_NONE) { break; } @@ -2766,7 +2715,8 @@ void MDSMonitor::maybe_replace_gid(mds_gid_t gid, info.state != MDSMap::STATE_STANDBY_REPLAY && !pending_fsmap.get_filesystem(fscid)->mds_map.test_flag(CEPH_MDSMAP_DOWN) && (sgid = pending_fsmap.find_replacement_for({fscid, info.rank}, info.name, - g_conf->mon_force_standby_active)) != MDS_GID_NONE) { + g_conf->mon_force_standby_active)) != MDS_GID_NONE) + { MDSMap::mds_info_t si = pending_fsmap.get_info_gid(sgid); dout(10) << " replacing " << gid << " " << info.addr << " mds." @@ -2846,6 +2796,10 @@ bool MDSMonitor::maybe_promote_standby(std::shared_ptr fs) const auto &info = j.second; assert(info.state == MDSMap::STATE_STANDBY); + if (!info.standby_replay) { + continue; + } + /* * This mds is standby but has no rank assigned. * See if we can find it somebody to shadow @@ -2886,8 +2840,7 @@ bool MDSMonitor::maybe_promote_standby(std::shared_ptr fs) continue; // we're supposed to follow someone else } - if (info.standby_for_rank == MDSMap::MDS_STANDBY_ANY && - try_standby_replay(info, *(fs_i.second), cand_info)) { + if (try_standby_replay(info, *(fs_i.second), cand_info)) { do_propose = true; break; }