From: John Spray Date: Wed, 17 Feb 2016 16:03:27 +0000 (+0000) Subject: mds: add FSMap X-Git-Tag: v10.1.0~144^2~7 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=00022bf9f59fdd438513ffe29265599e79fe3d80;p=ceph.git mds: add FSMap FSMap encapsulates a collection of MDSMaps, one for each filesystem, and a collection of standby MDS daemons that haven't been assigned to a filesystem yet. Signed-off-by: John Spray --- diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 589d271f84cb..d2b56e66f081 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -173,6 +173,7 @@ set(auth_files set(mds_files) list(APPEND mds_files mds/MDSMap.cc + mds/FSMap.cc mds/inode_backtrace.cc mds/mdstypes.cc) diff --git a/src/common/Makefile.am b/src/common/Makefile.am index 538788ed64dd..79c6a9ab7ee6 100644 --- a/src/common/Makefile.am +++ b/src/common/Makefile.am @@ -125,6 +125,7 @@ libcommon_internal_la_SOURCES += \ osd/ECMsgTypes.cc \ osd/HitSet.cc \ mds/MDSMap.cc \ + mds/FSMap.cc \ mds/inode_backtrace.cc \ mds/mdstypes.cc \ mds/flock.cc diff --git a/src/mds/FSMap.cc b/src/mds/FSMap.cc new file mode 100644 index 000000000000..ac067f3df4eb --- /dev/null +++ b/src/mds/FSMap.cc @@ -0,0 +1,762 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2004-2006 Sage Weil + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + + +#include "FSMap.h" + +#include +using std::stringstream; + + +void Filesystem::dump(Formatter *f) const +{ + f->open_object_section("mdsmap"); + mds_map.dump(f); + f->close_section(); + f->dump_int("id", fscid); +} + +void FSMap::dump(Formatter *f) const +{ + f->dump_int("epoch", epoch); + + f->open_object_section("compat"); + compat.dump(f); + f->close_section(); + + f->open_array_section("standbys"); + for (const auto &i : standby_daemons) { + f->open_object_section("info"); + i.second.dump(f); + f->dump_int("epoch", standby_epochs.at(i.first)); + f->close_section(); + } + f->close_section(); + + f->open_array_section("filesystems"); + for (const auto fs : filesystems) { + f->open_object_section("filesystem"); + fs.second->dump(f); + f->close_section(); + } + f->close_section(); +} + +void FSMap::generate_test_instances(list& ls) +{ + FSMap *m = new FSMap(); + + std::list mds_map_instances; + MDSMap::generate_test_instances(mds_map_instances); + + int k = 20; + for (auto i : mds_map_instances) { + auto fs = std::make_shared(); + fs->fscid = k++; + fs->mds_map = *i; + delete i; + m->filesystems[fs->fscid] = fs; + } + mds_map_instances.clear(); + + ls.push_back(m); +} + +void FSMap::print(ostream& out) const +{ + // TODO add a non-json print? + JSONFormatter f(true); + f.open_object_section("fsmap"); + dump(&f); + f.close_section(); + f.flush(out); +} + + + +void FSMap::print_summary(Formatter *f, ostream *out) +{ + map by_rank; + map by_state; + + if (f) { + f->dump_unsigned("epoch", get_epoch()); + for (auto i : filesystems) { + auto fs = i.second; + f->dump_unsigned("id", fs->fscid); + f->dump_unsigned("up", fs->mds_map.up.size()); + f->dump_unsigned("in", fs->mds_map.in.size()); + f->dump_unsigned("max", fs->mds_map.max_mds); + } + } else { + *out << "e" << get_epoch() << ":"; + if (filesystems.size() == 1) { + auto fs = filesystems.begin()->second; + *out << " " << fs->mds_map.up.size() << "/" << fs->mds_map.in.size() << "/" + << fs->mds_map.max_mds << " up"; + } else { + for (auto i : filesystems) { + auto fs = i.second; + *out << " " << fs->mds_map.fs_name << "-" << fs->mds_map.up.size() << "/" + << fs->mds_map.in.size() << "/" << fs->mds_map.max_mds << " up"; + } + } + } + + if (f) { + f->open_array_section("by_rank"); + } + + const auto all_info = get_mds_info(); + for (const auto &p : all_info) { + const auto &info = p.second; + string s = ceph_mds_state_name(info.state); + if (info.laggy()) { + s += "(laggy or crashed)"; + } + + const fs_cluster_id_t fscid = mds_roles.at(info.global_id); + + if (info.rank != MDS_RANK_NONE) { + if (f) { + f->open_object_section("mds"); + f->dump_unsigned("filesystem_id", fscid); + f->dump_unsigned("rank", info.rank); + f->dump_string("name", info.name); + f->dump_string("status", s); + f->close_section(); + } else { + by_rank[mds_role_t(fscid, info.rank)] = info.name + "=" + s; + } + } else { + by_state[s]++; + } + } + + if (f) { + f->close_section(); + } else { + if (!by_rank.empty()) { + if (filesystems.size() > 1) { + // Disambiguate filesystems + std::map pretty; + for (auto i : by_rank) { + const auto &fs_name = filesystems.at(i.first.fscid)->mds_map.fs_name; + std::ostringstream o; + o << "[" << fs_name << ":" << i.first.rank << "]"; + pretty[o.str()] = i.second; + } + *out << " " << pretty; + } else { + *out << " " << by_rank; + } + } + } + + for (map::reverse_iterator p = by_state.rbegin(); p != by_state.rend(); ++p) { + if (f) { + f->dump_unsigned(p->first.c_str(), p->second); + } else { + *out << ", " << p->second << " " << p->first; + } + } + + size_t failed = 0; + size_t damaged = 0; + for (auto i : filesystems) { + auto fs = i.second; + failed += fs->mds_map.failed.size(); + damaged += fs->mds_map.damaged.size(); + } + + if (failed > 0) { + if (f) { + f->dump_unsigned("failed", failed); + } else { + *out << ", " << failed << " failed"; + } + } + + if (damaged > 0) { + if (f) { + f->dump_unsigned("damaged", damaged); + } else { + *out << ", " << damaged << " damaged"; + } + } + //if (stopped.size()) + //out << ", " << stopped.size() << " stopped"; +} + +void FSMap::get_health(list >& summary, + list > *detail) const +{ + for (auto i : filesystems) { + auto fs = i.second; + + // TODO: move get_health up into here so that we can qualify + // all the messages with what filesystem they're talking about + fs->mds_map.get_health(summary, detail); + } +} + +void FSMap::encode(bufferlist& bl, uint64_t features) const +{ + ENCODE_START(10, 10, bl); + ::encode(epoch, bl); + ::encode(next_filesystem_id, bl); + ::encode(legacy_client_fscid, bl); + ::encode(compat, bl); + ::encode(enable_multiple, bl); + std::vector fs_list; + for (auto i : filesystems) { + fs_list.push_back(*(i.second)); + } + ::encode(fs_list, bl); + ::encode(mds_roles, bl); + ::encode(standby_daemons, bl, features); + ::encode(standby_epochs, bl); + ENCODE_FINISH(bl); +} + +void FSMap::decode(bufferlist::iterator& p) +{ + // Because the mon used to store an MDSMap where we now + // store an FSMap, FSMap knows how to decode the legacy + // MDSMap format (it never needs to encode it though). + Filesystem legacy_fs; + MDSMap &legacy_mds_map = legacy_fs.mds_map; + bool enabled = false; + + DECODE_START_LEGACY_COMPAT_LEN_16(10, 4, 4, p); + if (struct_v < 10) { + // Decoding an MDSMap (upgrade) + ::decode(epoch, p); + ::decode(legacy_mds_map.flags, p); + ::decode(legacy_mds_map.last_failure, p); + ::decode(legacy_mds_map.root, p); + ::decode(legacy_mds_map.session_timeout, p); + ::decode(legacy_mds_map.session_autoclose, p); + ::decode(legacy_mds_map.max_file_size, p); + ::decode(legacy_mds_map.max_mds, p); + ::decode(legacy_mds_map.mds_info, p); + if (struct_v < 3) { + __u32 n; + ::decode(n, p); + while (n--) { + __u32 m; + ::decode(m, p); + legacy_mds_map.data_pools.insert(m); + } + __s32 s; + ::decode(s, p); + legacy_mds_map.cas_pool = s; + } else { + ::decode(legacy_mds_map.data_pools, p); + ::decode(legacy_mds_map.cas_pool, p); + } + + // kclient ignores everything from here + __u16 ev = 1; + if (struct_v >= 2) + ::decode(ev, p); + if (ev >= 3) + ::decode(legacy_mds_map.compat, p); + else + legacy_mds_map.compat = get_mdsmap_compat_set_base(); + if (ev < 5) { + __u32 n; + ::decode(n, p); + legacy_mds_map.metadata_pool = n; + } else { + ::decode(legacy_mds_map.metadata_pool, p); + } + ::decode(legacy_mds_map.tableserver, p); + ::decode(legacy_mds_map.in, p); + ::decode(legacy_mds_map.inc, p); + ::decode(legacy_mds_map.up, p); + ::decode(legacy_mds_map.failed, p); + ::decode(legacy_mds_map.stopped, p); + if (ev >= 4) + ::decode(legacy_mds_map.last_failure_osd_epoch, p); + if (ev >= 6) { + ::decode(legacy_mds_map.ever_allowed_snaps, p); + ::decode(legacy_mds_map.explicitly_allowed_snaps, p); + } else { + legacy_mds_map.ever_allowed_snaps = true; + legacy_mds_map.explicitly_allowed_snaps = false; + } + if (ev >= 7) + ::decode(legacy_mds_map.inline_data_enabled, p); + + if (ev >= 8) { + assert(struct_v >= 5); + ::decode(enabled, p); + ::decode(legacy_mds_map.fs_name, p); + } else { + if (epoch > 1) { + // If an MDS has ever been started, epoch will be greater than 1, + // assume filesystem is enabled. + enabled = true; + } else { + // Upgrading from a cluster that never used an MDS, switch off + // filesystem until it's explicitly enabled. + enabled = false; + } + } + + if (ev >= 9) { + ::decode(legacy_mds_map.damaged, p); + } + // We're upgrading, populate fs_list from the legacy fields + assert(filesystems.empty()); + auto migrate_fs = std::make_shared(); + + *migrate_fs = legacy_fs; + migrate_fs->fscid = FS_CLUSTER_ID_ANONYMOUS; + migrate_fs->mds_map.fs_name = "default"; + legacy_client_fscid = migrate_fs->fscid; + compat = migrate_fs->mds_map.compat; + enable_multiple = false; + } else { + ::decode(epoch, p); + ::decode(next_filesystem_id, p); + ::decode(legacy_client_fscid, p); + ::decode(compat, p); + ::decode(enable_multiple, p); + std::vector fs_list; + ::decode(fs_list, p); + filesystems.clear(); + for (std::vector::const_iterator fs = fs_list.begin(); fs != fs_list.end(); ++fs) { + filesystems[fs->fscid] = std::make_shared(*fs); + } + + ::decode(mds_roles, p); + ::decode(standby_daemons, p); + ::decode(standby_epochs, p); + } + + DECODE_FINISH(p); +} + + +void Filesystem::encode(bufferlist& bl) const +{ + ENCODE_START(1, 1, bl); + ::encode(fscid, bl); + bufferlist mdsmap_bl; + mds_map.encode(mdsmap_bl, CEPH_FEATURE_PGID64 | CEPH_FEATURE_MDSENC); + ::encode(mdsmap_bl, bl); + ENCODE_FINISH(bl); +} + +void Filesystem::decode(bufferlist::iterator& p) +{ + DECODE_START(1, p); + ::decode(fscid, p); + bufferlist mdsmap_bl; + ::decode(mdsmap_bl, p); + bufferlist::iterator mdsmap_bl_iter = mdsmap_bl.begin(); + mds_map.decode(mdsmap_bl_iter); + DECODE_FINISH(p); +} + +int FSMap::parse_filesystem( + std::string const &ns_str, + std::shared_ptr *result + ) const +{ + std::string ns_err; + fs_cluster_id_t fscid = strict_strtol(ns_str.c_str(), 10, &ns_err); + if (!ns_err.empty() || filesystems.count(fscid) == 0) { + for (auto fs : filesystems) { + if (fs.second->mds_map.fs_name == ns_str) { + *result = fs.second; + return 0; + } + } + return -ENOENT; + } else { + *result = get_filesystem(fscid); + return 0; + } +} + +void Filesystem::print(std::ostream &out) const +{ + // TODO add a non-json print? + JSONFormatter f; + dump(&f); + f.flush(out); +} + +mds_gid_t FSMap::find_standby_for(mds_role_t role, const std::string& name) const +{ + mds_gid_t result = MDS_GID_NONE; + + // First see if we have a STANDBY_REPLAY + auto fs = get_filesystem(role.fscid); + for (const auto &i : fs->mds_map.mds_info) { + const auto &info = i.second; + if (info.rank == role.rank && info.state == MDSMap::STATE_STANDBY_REPLAY) { + return info.global_id; + } + } + + // See if there are any STANDBY daemons available + for (const auto &i : standby_daemons) { + const auto &gid = i.first; + const auto &info = i.second; + assert(info.state == MDSMap::STATE_STANDBY); + assert(info.rank == MDS_RANK_NONE); + + if (info.laggy()) { + continue; + } + + if ((info.standby_for_rank == role.rank && info.standby_for_ns == role.fscid) + || (name.length() && info.standby_for_name == name)) { + // It's a named standby for *me*, use it. + return gid; + } else if (info.standby_for_rank < 0 && info.standby_for_name.length() == 0) + // It's not a named standby for anyone, use it if we don't find + // a named standby for me later. + result = gid; + } + + return result; +} + +mds_gid_t FSMap::find_unused(bool force_standby_active) const { + for (const auto &i : standby_daemons) { + const auto &gid = i.first; + const auto &info = i.second; + assert(info.state == MDSMap::STATE_STANDBY); + + if (info.laggy() || info.rank >= 0) + continue; + + if ((info.standby_for_rank == MDSMap::MDS_NO_STANDBY_PREF || + info.standby_for_rank == MDSMap::MDS_MATCHED_ACTIVE || + (info.standby_for_rank == MDSMap::MDS_STANDBY_ANY + && force_standby_active))) { + return gid; + } + } + return MDS_GID_NONE; +} + +mds_gid_t FSMap::find_replacement_for(mds_role_t role, const std::string& name, + bool force_standby_active) const { + const mds_gid_t standby = find_standby_for(role, name); + if (standby) + return standby; + else + return find_unused(force_standby_active); +} + +void FSMap::sanity() const +{ + if (legacy_client_fscid != FS_CLUSTER_ID_NONE) { + assert(filesystems.count(legacy_client_fscid) == 1); + } + + for (const auto &i : filesystems) { + auto fs = i.second; + assert(fs->mds_map.compat.compare(compat) == 0); + assert(fs->fscid == i.first); + for (const auto &j : fs->mds_map.mds_info) { + assert(j.second.rank != MDS_RANK_NONE); + assert(mds_roles.count(j.first) == 1); + assert(standby_daemons.count(j.first) == 0); + assert(standby_epochs.count(j.first) == 0); + assert(mds_roles.at(j.first) == i.first); + if (j.second.state != MDSMap::STATE_STANDBY_REPLAY) { + assert(fs->mds_map.up.at(j.second.rank) == j.first); + assert(fs->mds_map.failed.count(j.second.rank) == 0); + assert(fs->mds_map.damaged.count(j.second.rank) == 0); + } + } + + for (const auto &j : fs->mds_map.up) { + mds_rank_t rank = j.first; + assert(fs->mds_map.in.count(rank) == 1); + mds_gid_t gid = j.second; + assert(fs->mds_map.mds_info.count(gid) == 1); + } + } + + for (const auto &i : standby_daemons) { + assert(i.second.state == MDSMap::STATE_STANDBY); + assert(i.second.rank == MDS_RANK_NONE); + assert(i.second.global_id == i.first); + assert(standby_epochs.count(i.first) == 1); + assert(mds_roles.count(i.first) == 1); + assert(mds_roles.at(i.first) == FS_CLUSTER_ID_NONE); + } + + for (const auto &i : standby_epochs) { + assert(standby_daemons.count(i.first) == 1); + } + + for (const auto &i : mds_roles) { + if (i.second == FS_CLUSTER_ID_NONE) { + assert(standby_daemons.count(i.first) == 1); + } else { + assert(filesystems.count(i.second) == 1); + assert(filesystems.at(i.second)->mds_map.mds_info.count(i.first) == 1); + } + } +} + +void FSMap::promote( + mds_gid_t standby_gid, + std::shared_ptr filesystem, + mds_rank_t assigned_rank) +{ + assert(gid_exists(standby_gid)); + bool is_standby_replay = mds_roles.at(standby_gid) != FS_CLUSTER_ID_NONE; + if (!is_standby_replay) { + assert(standby_daemons.count(standby_gid)); + assert(standby_daemons.at(standby_gid).state == MDSMap::STATE_STANDBY); + } + + MDSMap &mds_map = filesystem->mds_map; + + // Insert daemon state to Filesystem + if (!is_standby_replay) { + mds_map.mds_info[standby_gid] = standby_daemons.at(standby_gid); + } else { + assert(mds_map.mds_info.count(standby_gid)); + assert(mds_map.mds_info.at(standby_gid).state == MDSMap::STATE_STANDBY_REPLAY); + assert(mds_map.mds_info.at(standby_gid).rank == assigned_rank); + } + MDSMap::mds_info_t &info = mds_map.mds_info[standby_gid]; + + if (mds_map.stopped.count(assigned_rank)) { + // The cluster is being expanded with a stopped rank + info.state = MDSMap::STATE_STARTING; + mds_map.stopped.erase(assigned_rank); + } else if (!mds_map.is_in(assigned_rank)) { + // The cluster is being expanded with a new rank + info.state = MDSMap::STATE_CREATING; + } else { + // An existing rank is being assigned to a replacement + info.state = MDSMap::STATE_REPLAY; + mds_map.failed.erase(assigned_rank); + } + info.rank = assigned_rank; + info.inc = ++mds_map.inc[assigned_rank]; + mds_roles[standby_gid] = filesystem->fscid; + + // Update the rank state in Filesystem + mds_map.in.insert(assigned_rank); + mds_map.up[assigned_rank] = standby_gid; + + // Remove from the list of standbys + if (!is_standby_replay) { + standby_daemons.erase(standby_gid); + standby_epochs.erase(standby_gid); + } + + // Indicate that Filesystem has been modified + mds_map.epoch = epoch; +} + +void FSMap::assign_standby_replay( + const mds_gid_t standby_gid, + const fs_cluster_id_t leader_ns, + const mds_rank_t leader_rank) +{ + assert(mds_roles.at(standby_gid) == FS_CLUSTER_ID_NONE); + assert(gid_exists(standby_gid)); + assert(!gid_has_rank(standby_gid)); + assert(standby_daemons.count(standby_gid)); + + // Insert to the filesystem + auto fs = filesystems.at(leader_ns); + fs->mds_map.mds_info[standby_gid] = standby_daemons.at(standby_gid); + fs->mds_map.mds_info[standby_gid].rank = leader_rank; + fs->mds_map.mds_info[standby_gid].state = MDSMap::STATE_STANDBY_REPLAY; + mds_roles[standby_gid] = leader_ns; + + // Remove from the list of standbys + standby_daemons.erase(standby_gid); + standby_epochs.erase(standby_gid); + + // Indicate that Filesystem has been modified + fs->mds_map.epoch = epoch; +} + +void FSMap::erase(mds_gid_t who, epoch_t blacklist_epoch) +{ + if (mds_roles.at(who) == FS_CLUSTER_ID_NONE) { + standby_daemons.erase(who); + standby_epochs.erase(who); + } else { + auto fs = filesystems.at(mds_roles.at(who)); + const auto &info = fs->mds_map.mds_info.at(who); + if (info.state != MDSMap::STATE_STANDBY_REPLAY) { + if (info.state == MDSMap::STATE_CREATING) { + // If this gid didn't make it past CREATING, then forget + // the rank ever existed so that next time it's handed out + // to a gid it'll go back into CREATING. + fs->mds_map.in.erase(info.rank); + } else { + // Put this rank into the failed list so that the next available + // STANDBY will pick it up. + fs->mds_map.failed.insert(info.rank); + } + assert(fs->mds_map.up.at(info.rank) == info.global_id); + fs->mds_map.up.erase(info.rank); + } + fs->mds_map.mds_info.erase(who); + fs->mds_map.last_failure_osd_epoch = blacklist_epoch; + fs->mds_map.epoch = epoch; + } + + mds_roles.erase(who); +} + +void FSMap::damaged(mds_gid_t who, epoch_t blacklist_epoch) +{ + assert(mds_roles.at(who) != FS_CLUSTER_ID_NONE); + auto fs = filesystems.at(mds_roles.at(who)); + mds_rank_t rank = fs->mds_map.mds_info[who].rank; + + erase(who, blacklist_epoch); + fs->mds_map.failed.erase(rank); + fs->mds_map.damaged.insert(rank); + + assert(fs->mds_map.epoch == epoch); +} + +/** + * Update to indicate that the rank `rank` is to be removed + * from the damaged list of the filesystem `fscid` + */ +bool FSMap::undamaged(const fs_cluster_id_t fscid, const mds_rank_t rank) +{ + auto fs = filesystems.at(fscid); + + if (fs->mds_map.damaged.count(rank)) { + fs->mds_map.damaged.erase(rank); + fs->mds_map.failed.insert(rank); + fs->mds_map.epoch = epoch; + return true; + } else { + return false; + } +} + +void FSMap::insert(const MDSMap::mds_info_t &new_info) +{ + mds_roles[new_info.global_id] = FS_CLUSTER_ID_NONE; + standby_daemons[new_info.global_id] = new_info; + standby_epochs[new_info.global_id] = epoch; +} + +void FSMap::stop(mds_gid_t who) +{ + assert(mds_roles.at(who) != FS_CLUSTER_ID_NONE); + auto fs = filesystems.at(mds_roles.at(who)); + const auto &info = fs->mds_map.mds_info.at(who); + fs->mds_map.up.erase(info.rank); + fs->mds_map.in.erase(info.rank); + fs->mds_map.stopped.insert(info.rank); + + fs->mds_map.mds_info.erase(who); + mds_roles.erase(who); + + fs->mds_map.epoch = epoch; +} + + +/** + * Given one of the following forms: + * : + * : + * + * + * Parse into a mds_role_t. The rank-only form is only valid + * if legacy_client_ns is set. + */ +int FSMap::parse_role( + const std::string &role_str, + mds_role_t *role, + std::ostream &ss) const +{ + auto colon_pos = role_str.find(":"); + + if (colon_pos != std::string::npos && colon_pos != role_str.size()) { + auto fs_part = role_str.substr(0, colon_pos); + auto rank_part = role_str.substr(colon_pos + 1); + + std::string err; + fs_cluster_id_t fs_id = FS_CLUSTER_ID_NONE; + long fs_id_i = strict_strtol(fs_part.c_str(), 10, &err); + if (fs_id_i < 0 || !err.empty()) { + // Try resolving as name + auto fs = get_filesystem(fs_part); + if (fs == nullptr) { + ss << "Unknown filesystem name '" << fs_part << "'"; + return -EINVAL; + } else { + fs_id = fs->fscid; + } + } else { + fs_id = fs_id_i; + } + + mds_rank_t rank; + long rank_i = strict_strtol(rank_part.c_str(), 10, &err); + if (rank_i < 0 || !err.empty()) { + ss << "Invalid rank '" << rank_part << "'"; + return -EINVAL; + } else { + rank = rank_i; + } + + *role = {fs_id, rank}; + } else { + std::string err; + long who_i = strict_strtol(role_str.c_str(), 10, &err); + if (who_i < 0 || !err.empty()) { + ss << "Invalid rank '" << role_str << "'"; + return -EINVAL; + } + + if (legacy_client_fscid == FS_CLUSTER_ID_NONE) { + ss << "No filesystem selected"; + return -ENOENT; + } else { + *role = mds_role_t(legacy_client_fscid, who_i); + } + } + + // Now check that the role actually exists + if (get_filesystem(role->fscid) == nullptr) { + ss << "Filesystem with ID '" << role->fscid << "' not found"; + return -ENOENT; + } + + auto fs = get_filesystem(role->fscid); + if (fs->mds_map.in.count(role->rank) == 0) { + ss << "Rank '" << role->rank << "' not found"; + return -ENOENT; + } + + return 0; +} + diff --git a/src/mds/FSMap.h b/src/mds/FSMap.h new file mode 100644 index 000000000000..f9320f049753 --- /dev/null +++ b/src/mds/FSMap.h @@ -0,0 +1,459 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2004-2006 Sage Weil + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + + +#ifndef CEPH_FSMAP_H +#define CEPH_FSMAP_H + +#include + +#include "include/types.h" +#include "common/Clock.h" +#include "msg/Message.h" +#include "mds/MDSMap.h" + +#include +#include +#include + +#include "common/config.h" + +#include "include/CompatSet.h" +#include "include/ceph_features.h" +#include "common/Formatter.h" +#include "mds/mdstypes.h" + +class CephContext; + +#define MDS_FEATURE_INCOMPAT_BASE CompatSet::Feature(1, "base v0.20") +#define MDS_FEATURE_INCOMPAT_CLIENTRANGES CompatSet::Feature(2, "client writeable ranges") +#define MDS_FEATURE_INCOMPAT_FILELAYOUT CompatSet::Feature(3, "default file layouts on dirs") +#define MDS_FEATURE_INCOMPAT_DIRINODE CompatSet::Feature(4, "dir inode in separate object") +#define MDS_FEATURE_INCOMPAT_ENCODING CompatSet::Feature(5, "mds uses versioned encoding") +#define MDS_FEATURE_INCOMPAT_OMAPDIRFRAG CompatSet::Feature(6, "dirfrag is stored in omap") +#define MDS_FEATURE_INCOMPAT_INLINE CompatSet::Feature(7, "mds uses inline data") +#define MDS_FEATURE_INCOMPAT_NOANCHOR CompatSet::Feature(8, "no anchor table") + +#define MDS_FS_NAME_DEFAULT "cephfs" + +/** + * The MDSMap and any additional fields describing a particular + * filesystem (a unique fs_cluster_id_t). + */ +class Filesystem +{ + public: + fs_cluster_id_t fscid; + MDSMap mds_map; + + void encode(bufferlist& bl) const; + void decode(bufferlist::iterator& p); + + Filesystem() + : + fscid(FS_CLUSTER_ID_NONE) + { + } + + void dump(Formatter *f) const; + void print(std::ostream& out) const; + + /** + * Return true if a daemon is already assigned as + * STANDBY_REPLAY for the gid `who` + */ + bool has_standby_replay(mds_gid_t who) const + { + for (const auto &i : mds_map.mds_info) { + const auto &info = i.second; + if (info.state == MDSMap::STATE_STANDBY_REPLAY + && info.rank == mds_map.mds_info.at(who).rank) { + return true; + } + } + + return false; + } +}; +WRITE_CLASS_ENCODER(Filesystem) + +class FSMap { +protected: + epoch_t epoch; + uint64_t next_filesystem_id; + fs_cluster_id_t legacy_client_fscid; + CompatSet compat; + bool enable_multiple; + + std::map > filesystems; + + // Remember which Filesystem an MDS daemon's info is stored in + // (or in standby_daemons for FS_CLUSTER_ID_NONE) + std::map mds_roles; + + // For MDS daemons not yet assigned to a Filesystem + std::map standby_daemons; + std::map standby_epochs; + +public: + + friend class MDSMonitor; + + FSMap() + : epoch(0), + next_filesystem_id(FS_CLUSTER_ID_ANONYMOUS + 1), + legacy_client_fscid(FS_CLUSTER_ID_NONE), + compat(get_mdsmap_compat_set_default()), + enable_multiple(false) + { } + + FSMap(const FSMap &rhs) + : + epoch(rhs.epoch), + next_filesystem_id(rhs.next_filesystem_id), + legacy_client_fscid(rhs.legacy_client_fscid), + compat(rhs.compat), + enable_multiple(rhs.enable_multiple), + mds_roles(rhs.mds_roles), + standby_daemons(rhs.standby_daemons), + standby_epochs(rhs.standby_epochs) + { + for (auto &i : rhs.filesystems) { + auto fs = i.second; + filesystems[fs->fscid] = std::make_shared(*fs); + } + } + + FSMap &operator=(const FSMap &rhs) + { + epoch = rhs.epoch; + next_filesystem_id = rhs.next_filesystem_id; + legacy_client_fscid = rhs.legacy_client_fscid; + compat = rhs.compat; + enable_multiple = rhs.enable_multiple; + mds_roles = rhs.mds_roles; + standby_daemons = rhs.standby_daemons; + standby_epochs = rhs.standby_epochs; + + for (auto &i : rhs.filesystems) { + auto fs = i.second; + filesystems[fs->fscid] = std::make_shared(*fs); + } + + return *this; + } + + const CompatSet &get_compat() const {return compat;} + + void set_enable_multiple(const bool v) + { + enable_multiple = v; + } + + bool get_enable_multiple() const + { + return enable_multiple; + } + + /** + * Get state of all daemons (for all filesystems, including all standbys) + */ + std::map get_mds_info() const + { + std::map result; + for (const auto &i : standby_daemons) { + result[i.first] = i.second; + } + + for (const auto &i : filesystems) { + auto fs_info = i.second->mds_map.get_mds_info(); + for (auto j : fs_info) { + result[j.first] = j.second; + } + } + + return result; + } + + /** + * Resolve daemon name to GID + */ + mds_gid_t find_mds_gid_by_name(const std::string& s) const + { + const auto info = get_mds_info(); + for (const auto &p : info) { + if (p.second.name == s) { + return p.first; + } + } + return MDS_GID_NONE; + } + + /** + * Resolve daemon name to status + */ + const MDSMap::mds_info_t* find_by_name(const std::string& name) const + { + std::map result; + for (const auto &i : standby_daemons) { + if (i.second.name == name) { + return &(i.second); + } + } + + for (const auto &i : filesystems) { + const auto &fs_info = i.second->mds_map.get_mds_info(); + for (const auto &j : fs_info) { + if (j.second.name == name) { + return &(j.second); + } + } + } + + return nullptr; + } + + /** + * Does a daemon exist with this GID? + */ + bool gid_exists(mds_gid_t gid) const + { + return mds_roles.count(gid) > 0; + } + + /** + * Does a daemon with this GID exist, *and* have an MDS rank assigned? + */ + bool gid_has_rank(mds_gid_t gid) const + { + return gid_exists(gid) && mds_roles.at(gid) != FS_CLUSTER_ID_NONE; + } + + /** + * Insert a new MDS daemon, as a standby + */ + void insert(const MDSMap::mds_info_t &new_info); + + /** + * Assign an MDS cluster standby replay rank to a standby daemon + */ + void assign_standby_replay( + const mds_gid_t standby_gid, + const fs_cluster_id_t leader_ns, + const mds_rank_t leader_rank); + + /** + * Assign an MDS cluster rank to a standby daemon + */ + void promote( + mds_gid_t standby_gid, + std::shared_ptr filesystem, + mds_rank_t assigned_rank); + + /** + * A daemon reports that it is STATE_STOPPED: remove it, + * and the rank it held. + */ + void stop(mds_gid_t who); + + /** + * The rank held by 'who', if any, is to be relinquished, and + * the state for the daemon GID is to be forgotten. + */ + void erase(mds_gid_t who, epoch_t blacklist_epoch); + + /** + * Update to indicate that the rank held by 'who' is damaged + */ + void damaged(mds_gid_t who, epoch_t blacklist_epoch); + + /** + * Update to indicate that the rank `rank` is to be removed + * from the damaged list of the filesystem `fscid` + */ + bool undamaged(const fs_cluster_id_t fscid, const mds_rank_t rank); + + /** + * Mutator helper for Filesystem objects: expose a non-const + * Filesystem pointer to `fn` and update epochs appropriately. + */ + void modify_filesystem( + const fs_cluster_id_t fscid, + std::function )> fn) + { + auto fs = filesystems.at(fscid); + fn(fs); + fs->mds_map.epoch = epoch; + } + + /** + * Apply a mutation to the mds_info_t structure for a particular + * daemon (identified by GID), and make appropriate updates to epochs. + */ + void modify_daemon( + mds_gid_t who, + std::function fn) + { + if (mds_roles.at(who) == FS_CLUSTER_ID_NONE) { + fn(&standby_daemons.at(who)); + standby_epochs[who] = epoch; + } else { + auto fs = filesystems[mds_roles.at(who)]; + auto &info = fs->mds_map.mds_info.at(who); + fn(&info); + + fs->mds_map.epoch = epoch; + } + } + + /** + * Given that gid exists in a filesystem or as a standby, return + * a reference to its info. + */ + const MDSMap::mds_info_t& get_info_gid(mds_gid_t gid) const + { + auto fscid = mds_roles.at(gid); + if (fscid == FS_CLUSTER_ID_NONE) { + return standby_daemons.at(gid); + } else { + return filesystems.at(fscid)->mds_map.mds_info.at(gid); + } + } + + /** + * A daemon has told us it's compat, and it's too new + * for the one we had previously. Impose the new one + * on all filesystems. + */ + void update_compat(CompatSet c) + { + // We could do something more complicated here to enable + // different filesystems to be served by different MDS versions, + // but this is a lot simpler because it doesn't require us to + // track the compat versions for standby daemons. + compat = c; + for (auto i : filesystems) { + MDSMap &mds_map = i.second->mds_map; + mds_map.compat = c; + mds_map.epoch = epoch; + } + } + + std::shared_ptr get_legacy_filesystem() + { + if (legacy_client_fscid == FS_CLUSTER_ID_NONE) { + return nullptr; + } else { + return filesystems.at(legacy_client_fscid); + } + } + + /** + * A daemon has informed us of its offload targets + */ + void update_export_targets(mds_gid_t who, const std::set targets) + { + auto fscid = mds_roles.at(who); + modify_filesystem(fscid, [who, &targets](std::shared_ptr fs) { + fs->mds_map.mds_info.at(who).export_targets = targets; + }); + } + + const std::map > &get_filesystems() const + { + return filesystems; + } + bool any_filesystems() const {return !filesystems.empty(); } + bool filesystem_exists(fs_cluster_id_t fscid) const + {return filesystems.count(fscid) > 0;} + + epoch_t get_epoch() const { return epoch; } + void inc_epoch() { epoch++; } + + std::shared_ptr get_filesystem(fs_cluster_id_t fscid) const + { + return filesystems.at(fscid); + } + + int parse_filesystem( + std::string const &ns_str, + std::shared_ptr *result + ) const; + + int parse_role( + const std::string &role_str, + mds_role_t *role, + std::ostream &ss) const; + + /** + * Return true if this pool is in use by any of the filesystems + */ + bool pool_in_use(int64_t poolid) const { + for (auto const &i : filesystems) { + if (i.second->mds_map.is_data_pool(poolid) + || i.second->mds_map.metadata_pool == poolid) { + return true; + } + } + return false; + } + + mds_gid_t find_standby_for(mds_role_t mds, const std::string& name) const; + + mds_gid_t find_unused(bool force_standby_active) const; + + mds_gid_t find_replacement_for(mds_role_t mds, const std::string& name, + bool force_standby_active) const; + + void get_health(list >& summary, + list > *detail) const; + + std::shared_ptr get_filesystem(const std::string &name) const + { + for (auto &i : filesystems) { + if (i.second->mds_map.fs_name == name) { + return i.second; + } + } + + return nullptr; + } + + /** + * Assert that the FSMap, Filesystem, MDSMap, mds_info_t relations are + * all self-consistent. + */ + void sanity() const; + + void encode(bufferlist& bl, uint64_t features) const; + void decode(bufferlist::iterator& p); + void decode(bufferlist& bl) { + bufferlist::iterator p = bl.begin(); + decode(p); + } + + void print(ostream& out) const; + void print_summary(Formatter *f, ostream *out); + + void dump(Formatter *f) const; + static void generate_test_instances(list& ls); +}; +WRITE_CLASS_ENCODER_FEATURES(FSMap) + +inline ostream& operator<<(ostream& out, FSMap& m) { + m.print_summary(NULL, &out); + return out; +} + +#endif diff --git a/src/mds/MDSMap.cc b/src/mds/MDSMap.cc index aa7949f2bbc1..e9b4e8559cb6 100644 --- a/src/mds/MDSMap.cc +++ b/src/mds/MDSMap.cc @@ -81,6 +81,7 @@ void MDSMap::mds_info_t::dump(Formatter *f) const f->dump_stream("laggy_since") << laggy_since; f->dump_int("standby_for_rank", standby_for_rank); + f->dump_int("standby_for_ns", standby_for_ns); f->dump_string("standby_for_name", standby_for_name); f->open_array_section("export_targets"); for (set::iterator p = export_targets.begin(); @@ -176,8 +177,9 @@ void MDSMap::generate_test_instances(list& ls) ls.push_back(m); } -void MDSMap::print(ostream& out) +void MDSMap::print(ostream& out) const { + out << "fs_name\t" << fs_name << "\n"; out << "epoch\t" << epoch << "\n"; out << "flags\t" << hex << flags << dec << "\n"; out << "created\t" << created << "\n"; @@ -201,17 +203,15 @@ void MDSMap::print(ostream& out) out << "inline_data\t" << (inline_data_enabled ? "enabled" : "disabled") << "\n"; multimap< pair, mds_gid_t > foo; - for (map::iterator p = mds_info.begin(); - p != mds_info.end(); - ++p) - foo.insert(std::make_pair(std::make_pair(p->second.rank, p->second.inc-1), p->first)); - - for (multimap< pair, mds_gid_t >::iterator p = foo.begin(); - p != foo.end(); - ++p) { - mds_info_t& info = mds_info[p->second]; + for (const auto &p : mds_info) { + foo.insert(std::make_pair( + std::make_pair(p.second.rank, p.second.inc-1), p.first)); + } + + for (const auto &p : foo) { + const mds_info_t& info = mds_info.at(p.second); - out << p->second << ":\t" + out << p.second << ":\t" << info.addr << " '" << info.name << "'" << " mds." << info.rank @@ -237,7 +237,7 @@ void MDSMap::print(ostream& out) -void MDSMap::print_summary(Formatter *f, ostream *out) +void MDSMap::print_summary(Formatter *f, ostream *out) const { map by_rank; map by_state; @@ -253,22 +253,20 @@ void MDSMap::print_summary(Formatter *f, ostream *out) if (f) f->open_array_section("by_rank"); - for (map::iterator p = mds_info.begin(); - p != mds_info.end(); - ++p) { - string s = ceph_mds_state_name(p->second.state); - if (p->second.laggy()) + for (const auto &p : mds_info) { + string s = ceph_mds_state_name(p.second.state); + if (p.second.laggy()) s += "(laggy or crashed)"; - if (p->second.rank >= 0) { + if (p.second.rank >= 0) { if (f) { f->open_object_section("mds"); - f->dump_unsigned("rank", p->second.rank); - f->dump_string("name", p->second.name); + f->dump_unsigned("rank", p.second.rank); + f->dump_string("name", p.second.name); f->dump_string("status", s); f->close_section(); } else { - by_rank[p->second.rank] = p->second.name + "=" + s; + by_rank[p.second.rank] = p.second.name + "=" + s; } } else { by_state[s]++; @@ -401,7 +399,7 @@ void MDSMap::get_health(list >& summary, void MDSMap::mds_info_t::encode_versioned(bufferlist& bl, uint64_t features) const { - ENCODE_START(5, 4, bl); + ENCODE_START(6, 4, bl); ::encode(global_id, bl); ::encode(name, bl); ::encode(rank, bl); @@ -414,6 +412,7 @@ void MDSMap::mds_info_t::encode_versioned(bufferlist& bl, uint64_t features) con ::encode(standby_for_name, bl); ::encode(export_targets, bl); ::encode(mds_features, bl); + ::encode(standby_for_ns, bl); ENCODE_FINISH(bl); } @@ -436,7 +435,7 @@ void MDSMap::mds_info_t::encode_unversioned(bufferlist& bl) const void MDSMap::mds_info_t::decode(bufferlist::iterator& bl) { - DECODE_START_LEGACY_COMPAT_LEN(5, 4, 4, bl); + DECODE_START_LEGACY_COMPAT_LEN(6, 4, 4, bl); ::decode(global_id, bl); ::decode(name, bl); ::decode(rank, bl); @@ -451,6 +450,9 @@ void MDSMap::mds_info_t::decode(bufferlist::iterator& bl) ::decode(export_targets, bl); if (struct_v >= 5) ::decode(mds_features, bl); + if (struct_v >= 6) { + ::decode(standby_for_ns, bl); + } DECODE_FINISH(bl); } @@ -649,12 +651,12 @@ void MDSMap::decode(bufferlist::iterator& p) MDSMap::availability_t MDSMap::is_cluster_available() const { if (epoch == 0) { - // This is ambiguous between "mds map was never initialized on mons" and - // "we never got an mdsmap from the mons". Treat it like the latter. + // If I'm a client, this means I'm looking at an MDSMap instance + // that was never actually initialized from the mons. Client should + // wait. return TRANSIENT_UNAVAILABLE; } - // If a rank is marked damage (unavailable until operator intervenes) if (damaged.size()) { return STUCK_UNAVAILABLE; @@ -665,25 +667,14 @@ MDSMap::availability_t MDSMap::is_cluster_available() const return STUCK_UNAVAILABLE; } - for (const auto rank : in) { - std::string name; - if (up.count(rank) != 0) { - name = mds_info.at(up.at(rank)).name; - } - const mds_gid_t replacement = find_replacement_for(rank, name, false); - const bool standby_avail = (replacement != MDS_GID_NONE); - - // If the rank is unfilled, and there are no standbys, we're unavailable - if (up.count(rank) == 0 && !standby_avail) { - return STUCK_UNAVAILABLE; - } else if (up.count(rank) && mds_info.at(up.at(rank)).laggy() && !standby_avail) { - // If the daemon is laggy and there are no standbys, we're unavailable. - // It would be nice to give it some grace here, but to do so callers - // would have to poll this time-wise, vs. just waiting for updates - // to mdsmap, so it's not worth the complexity. - return STUCK_UNAVAILABLE; - } - } + for (const auto rank : in) { + if (up.count(rank) && mds_info.at(up.at(rank)).laggy()) { + // This might only be transient, but because we can't see + // standbys, we have no way of knowing whether there is a + // standby available to replace the laggy guy. + return STUCK_UNAVAILABLE; + } +} if (get_num_mds(CEPH_MDS_STATE_ACTIVE) > 0) { // Nobody looks stuck, so indicate to client they should go ahead @@ -694,6 +685,11 @@ MDSMap::availability_t MDSMap::is_cluster_available() const return AVAILABLE; } else { // Nothing indicating we were stuck, but nobody active (yet) - return TRANSIENT_UNAVAILABLE; + //return TRANSIENT_UNAVAILABLE; + + // Because we don't have standbys in the MDSMap any more, we can't + // reliably indicate transient vs. stuck, so always say stuck so + // that the client doesn't block. + return STUCK_UNAVAILABLE; } } diff --git a/src/mds/MDSMap.h b/src/mds/MDSMap.h index 6be2473433ca..c0b38c79f8ec 100644 --- a/src/mds/MDSMap.h +++ b/src/mds/MDSMap.h @@ -140,11 +140,14 @@ public: utime_t laggy_since; mds_rank_t standby_for_rank; std::string standby_for_name; + fs_cluster_id_t standby_for_ns; std::set export_targets; uint64_t mds_features; mds_info_t() : global_id(MDS_GID_NONE), rank(MDS_RANK_NONE), inc(0), state(STATE_STANDBY), state_seq(0), - standby_for_rank(MDS_NO_STANDBY_PREF) { } + standby_for_rank(MDS_NO_STANDBY_PREF), + standby_for_ns(FS_CLUSTER_ID_NONE) + { } bool laggy() const { return !(laggy_since == utime_t()); } void clear_laggy() { laggy_since = utime_t(); } @@ -216,6 +219,8 @@ public: CompatSet compat; friend class MDSMonitor; + friend class Filesystem; + friend class FSMap; public: MDSMap() @@ -242,12 +247,15 @@ public: return utime_t(session_timeout,0); } uint64_t get_max_filesize() { return max_file_size; } + void set_max_filesize(uint64_t m) { max_file_size = m; } int get_flags() const { return flags; } int test_flag(int f) const { return flags & f; } void set_flag(int f) { flags |= f; } void clear_flag(int f) { flags &= ~f; } + const std::string &get_fs_name() const {return fs_name;} + void set_snaps_allowed() { set_flag(CEPH_MDSMAP_ALLOW_SNAPS); ever_allowed_snaps = true; @@ -277,7 +285,6 @@ public: const std::set &get_data_pools() const { return data_pools; } int64_t get_first_data_pool() const { return *data_pools.begin(); } - int64_t get_cas_pool() const { return cas_pool; } int64_t get_metadata_pool() const { return metadata_pool; } bool is_data_pool(int64_t poolid) const { return data_pools.count(poolid); @@ -287,16 +294,15 @@ public: return get_enabled() && (is_data_pool(poolid) || metadata_pool == poolid); } - const std::map& get_mds_info() { return mds_info; } - const mds_info_t& get_mds_info_gid(mds_gid_t gid) { - assert(mds_info.count(gid)); - return mds_info[gid]; + const std::map& get_mds_info() const { return mds_info; } + const mds_info_t& get_mds_info_gid(mds_gid_t gid) const { + return mds_info.at(gid); } - const mds_info_t& get_mds_info(mds_rank_t m) { - assert(up.count(m) && mds_info.count(up[m])); - return mds_info[up[m]]; + const mds_info_t& get_mds_info(mds_rank_t m) const { + assert(up.count(m) && mds_info.count(up.at(m))); + return mds_info.at(up.at(m)); } - mds_gid_t find_mds_gid_by_name(const std::string& s) { + mds_gid_t find_mds_gid_by_name(const std::string& s) const { for (std::map::const_iterator p = mds_info.begin(); p != mds_info.end(); ++p) { @@ -308,13 +314,13 @@ public: } // counts - unsigned get_num_in_mds() { + unsigned get_num_in_mds() const { return in.size(); } - unsigned get_num_up_mds() { + unsigned get_num_up_mds() const { return up.size(); } - int get_num_failed_mds() { + int get_num_failed_mds() const { return failed.size(); } unsigned get_num_mds(int state) const { @@ -339,19 +345,19 @@ public: } // sets - void get_mds_set(std::set& s) { + void get_mds_set(std::set& s) const { s = in; } - void get_up_mds_set(std::set& s) { + void get_up_mds_set(std::set& s) const { for (std::map::const_iterator p = up.begin(); p != up.end(); ++p) s.insert(p->first); } - void get_active_mds_set(std::set& s) { + void get_active_mds_set(std::set& s) const { get_mds_set(s, MDSMap::STATE_ACTIVE); } - void get_failed_mds_set(std::set& s) { + void get_failed_mds_set(std::set& s) const { s = failed; } @@ -408,7 +414,7 @@ public: if (p->second.state >= STATE_CLIENTREPLAY && p->second.state <= STATE_STOPPING) s.insert(p->second.rank); } - void get_mds_set(std::set& s, DaemonState state) { + void get_mds_set(std::set& s, DaemonState state) const { for (std::map::const_iterator p = mds_info.begin(); p != mds_info.end(); ++p) @@ -416,72 +422,6 @@ public: s.insert(p->second.rank); } - int get_random_up_mds() { - if (up.empty()) - return -1; - std::map::iterator p = up.begin(); - for (int n = rand() % up.size(); n; n--) - ++p; - return p->first; - } - - const mds_info_t* find_by_name(const std::string& name) const { - for (std::map::const_iterator p = mds_info.begin(); - p != mds_info.end(); - ++p) { - if (p->second.name == name) - return &p->second; - } - return NULL; - } - - mds_gid_t find_standby_for(mds_rank_t mds, std::string& name) const { - std::map::const_iterator generic_standby - = mds_info.end(); - for (std::map::const_iterator p = mds_info.begin(); - p != mds_info.end(); - ++p) { - if ((p->second.state != MDSMap::STATE_STANDBY && p->second.state != MDSMap::STATE_STANDBY_REPLAY) || - p->second.laggy() || - p->second.rank >= 0) - continue; - if (p->second.standby_for_rank == mds || (name.length() && p->second.standby_for_name == name)) - return p->first; - if (p->second.standby_for_rank < 0 && p->second.standby_for_name.length() == 0) - generic_standby = p; - } - if (generic_standby != mds_info.end()) - return generic_standby->first; - return MDS_GID_NONE; - } - - mds_gid_t find_unused_for(mds_rank_t mds, std::string& name, - bool force_standby_active) const { - for (std::map::const_iterator p = mds_info.begin(); - p != mds_info.end(); - ++p) { - if (p->second.state != MDSMap::STATE_STANDBY || - p->second.laggy() || - p->second.rank >= 0) - continue; - if ((p->second.standby_for_rank == MDS_NO_STANDBY_PREF || - p->second.standby_for_rank == MDS_MATCHED_ACTIVE || - (p->second.standby_for_rank == MDS_STANDBY_ANY && force_standby_active))) { - return p->first; - } - } - return MDS_GID_NONE; - } - - mds_gid_t find_replacement_for(mds_rank_t mds, std::string& name, - bool force_standby_active) const { - const mds_gid_t standby = find_standby_for(mds, name); - if (standby) - return standby; - else - return find_unused_for(mds, name, force_standby_active); - } - void get_health(list >& summary, list > *detail) const; @@ -542,8 +482,12 @@ public: return i->second.state; } - mds_info_t& get_info(mds_rank_t m) { assert(up.count(m)); return mds_info[up[m]]; } - mds_info_t& get_info_gid(mds_gid_t gid) { assert(mds_info.count(gid)); return mds_info[gid]; } + const mds_info_t& get_info(const mds_rank_t m) { + return mds_info.at(up.at(m)); + } + const mds_info_t& get_info_gid(const mds_gid_t gid) { + return mds_info.at(gid); + } bool is_boot(mds_rank_t m) const { return get_state(m) == STATE_BOOT; } bool is_creating(mds_rank_t m) const { return get_state(m) == STATE_CREATING; } @@ -578,12 +522,9 @@ public: return p->second.laggy(); } - - // cluster states - bool is_full() const { - return mds_rank_t(in.size()) >= max_mds; - } - bool is_degraded() const { // degraded = some recovery in process. fixes active membership and recovery_set. + // degraded = some recovery in process. fixes active membership and + // recovery_set. + bool is_degraded() const { if (!failed.empty() || !damaged.empty()) return true; for (std::map::const_iterator p = mds_info.begin(); @@ -652,17 +593,14 @@ public: return false; } - mds_rank_t get_rank_gid(mds_gid_t gid) { - if (mds_info.count(gid)) - return mds_info[gid].rank; - return MDS_RANK_NONE; + mds_rank_t get_rank_gid(mds_gid_t gid) const { + if (mds_info.count(gid)) { + return mds_info.at(gid).rank; + } else { + return MDS_RANK_NONE; + } } - int get_inc(mds_rank_t m) { - if (up.count(m)) - return mds_info[up[m]].inc; - return 0; - } int get_inc_gid(mds_gid_t gid) { if (mds_info.count(gid)) return mds_info[gid].inc; @@ -676,8 +614,8 @@ public: } - void print(ostream& out); - void print_summary(Formatter *f, ostream *out); + void print(ostream& out) const; + void print_summary(Formatter *f, ostream *out) const; void dump(Formatter *f) const; static void generate_test_instances(list& ls); diff --git a/src/mds/Makefile-server.am b/src/mds/Makefile-server.am index 3cc45f8e8ac5..97378a295e41 100644 --- a/src/mds/Makefile-server.am +++ b/src/mds/Makefile-server.am @@ -31,6 +31,7 @@ noinst_HEADERS += \ mds/MDSContext.h \ mds/MDSAuthCaps.h \ mds/MDSMap.h \ + mds/FSMap.h \ mds/MDSTable.h \ mds/MDSTableServer.h \ mds/MDSTableClient.h \