set(mds_files)
list(APPEND mds_files
mds/MDSMap.cc
+ mds/FSMap.cc
mds/inode_backtrace.cc
mds/mdstypes.cc)
osd/ECMsgTypes.cc \
osd/HitSet.cc \
mds/MDSMap.cc \
+ mds/FSMap.cc \
mds/inode_backtrace.cc \
mds/mdstypes.cc \
mds/flock.cc
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+
+#include "FSMap.h"
+
+#include <sstream>
+using std::stringstream;
+
+
+void Filesystem::dump(Formatter *f) const
+{
+ f->open_object_section("mdsmap");
+ mds_map.dump(f);
+ f->close_section();
+ f->dump_int("id", fscid);
+}
+
+void FSMap::dump(Formatter *f) const
+{
+ f->dump_int("epoch", epoch);
+
+ f->open_object_section("compat");
+ compat.dump(f);
+ f->close_section();
+
+ f->open_array_section("standbys");
+ for (const auto &i : standby_daemons) {
+ f->open_object_section("info");
+ i.second.dump(f);
+ f->dump_int("epoch", standby_epochs.at(i.first));
+ f->close_section();
+ }
+ f->close_section();
+
+ f->open_array_section("filesystems");
+ for (const auto fs : filesystems) {
+ f->open_object_section("filesystem");
+ fs.second->dump(f);
+ f->close_section();
+ }
+ f->close_section();
+}
+
+void FSMap::generate_test_instances(list<FSMap*>& ls)
+{
+ FSMap *m = new FSMap();
+
+ std::list<MDSMap*> mds_map_instances;
+ MDSMap::generate_test_instances(mds_map_instances);
+
+ int k = 20;
+ for (auto i : mds_map_instances) {
+ auto fs = std::make_shared<Filesystem>();
+ fs->fscid = k++;
+ fs->mds_map = *i;
+ delete i;
+ m->filesystems[fs->fscid] = fs;
+ }
+ mds_map_instances.clear();
+
+ ls.push_back(m);
+}
+
+void FSMap::print(ostream& out) const
+{
+ // TODO add a non-json print?
+ JSONFormatter f(true);
+ f.open_object_section("fsmap");
+ dump(&f);
+ f.close_section();
+ f.flush(out);
+}
+
+
+
+void FSMap::print_summary(Formatter *f, ostream *out)
+{
+ map<mds_role_t,string> by_rank;
+ map<string,int> by_state;
+
+ if (f) {
+ f->dump_unsigned("epoch", get_epoch());
+ for (auto i : filesystems) {
+ auto fs = i.second;
+ f->dump_unsigned("id", fs->fscid);
+ f->dump_unsigned("up", fs->mds_map.up.size());
+ f->dump_unsigned("in", fs->mds_map.in.size());
+ f->dump_unsigned("max", fs->mds_map.max_mds);
+ }
+ } else {
+ *out << "e" << get_epoch() << ":";
+ if (filesystems.size() == 1) {
+ auto fs = filesystems.begin()->second;
+ *out << " " << fs->mds_map.up.size() << "/" << fs->mds_map.in.size() << "/"
+ << fs->mds_map.max_mds << " up";
+ } else {
+ for (auto i : filesystems) {
+ auto fs = i.second;
+ *out << " " << fs->mds_map.fs_name << "-" << fs->mds_map.up.size() << "/"
+ << fs->mds_map.in.size() << "/" << fs->mds_map.max_mds << " up";
+ }
+ }
+ }
+
+ if (f) {
+ f->open_array_section("by_rank");
+ }
+
+ const auto all_info = get_mds_info();
+ for (const auto &p : all_info) {
+ const auto &info = p.second;
+ string s = ceph_mds_state_name(info.state);
+ if (info.laggy()) {
+ s += "(laggy or crashed)";
+ }
+
+ const fs_cluster_id_t fscid = mds_roles.at(info.global_id);
+
+ if (info.rank != MDS_RANK_NONE) {
+ if (f) {
+ f->open_object_section("mds");
+ f->dump_unsigned("filesystem_id", fscid);
+ f->dump_unsigned("rank", info.rank);
+ f->dump_string("name", info.name);
+ f->dump_string("status", s);
+ f->close_section();
+ } else {
+ by_rank[mds_role_t(fscid, info.rank)] = info.name + "=" + s;
+ }
+ } else {
+ by_state[s]++;
+ }
+ }
+
+ if (f) {
+ f->close_section();
+ } else {
+ if (!by_rank.empty()) {
+ if (filesystems.size() > 1) {
+ // Disambiguate filesystems
+ std::map<std::string, std::string> pretty;
+ for (auto i : by_rank) {
+ const auto &fs_name = filesystems.at(i.first.fscid)->mds_map.fs_name;
+ std::ostringstream o;
+ o << "[" << fs_name << ":" << i.first.rank << "]";
+ pretty[o.str()] = i.second;
+ }
+ *out << " " << pretty;
+ } else {
+ *out << " " << by_rank;
+ }
+ }
+ }
+
+ for (map<string,int>::reverse_iterator p = by_state.rbegin(); p != by_state.rend(); ++p) {
+ if (f) {
+ f->dump_unsigned(p->first.c_str(), p->second);
+ } else {
+ *out << ", " << p->second << " " << p->first;
+ }
+ }
+
+ size_t failed = 0;
+ size_t damaged = 0;
+ for (auto i : filesystems) {
+ auto fs = i.second;
+ failed += fs->mds_map.failed.size();
+ damaged += fs->mds_map.damaged.size();
+ }
+
+ if (failed > 0) {
+ if (f) {
+ f->dump_unsigned("failed", failed);
+ } else {
+ *out << ", " << failed << " failed";
+ }
+ }
+
+ if (damaged > 0) {
+ if (f) {
+ f->dump_unsigned("damaged", damaged);
+ } else {
+ *out << ", " << damaged << " damaged";
+ }
+ }
+ //if (stopped.size())
+ //out << ", " << stopped.size() << " stopped";
+}
+
+void FSMap::get_health(list<pair<health_status_t,string> >& summary,
+ list<pair<health_status_t,string> > *detail) const
+{
+ for (auto i : filesystems) {
+ auto fs = i.second;
+
+ // TODO: move get_health up into here so that we can qualify
+ // all the messages with what filesystem they're talking about
+ fs->mds_map.get_health(summary, detail);
+ }
+}
+
+void FSMap::encode(bufferlist& bl, uint64_t features) const
+{
+ ENCODE_START(10, 10, bl);
+ ::encode(epoch, bl);
+ ::encode(next_filesystem_id, bl);
+ ::encode(legacy_client_fscid, bl);
+ ::encode(compat, bl);
+ ::encode(enable_multiple, bl);
+ std::vector<Filesystem> fs_list;
+ for (auto i : filesystems) {
+ fs_list.push_back(*(i.second));
+ }
+ ::encode(fs_list, bl);
+ ::encode(mds_roles, bl);
+ ::encode(standby_daemons, bl, features);
+ ::encode(standby_epochs, bl);
+ ENCODE_FINISH(bl);
+}
+
+void FSMap::decode(bufferlist::iterator& p)
+{
+ // Because the mon used to store an MDSMap where we now
+ // store an FSMap, FSMap knows how to decode the legacy
+ // MDSMap format (it never needs to encode it though).
+ Filesystem legacy_fs;
+ MDSMap &legacy_mds_map = legacy_fs.mds_map;
+ bool enabled = false;
+
+ DECODE_START_LEGACY_COMPAT_LEN_16(10, 4, 4, p);
+ if (struct_v < 10) {
+ // Decoding an MDSMap (upgrade)
+ ::decode(epoch, p);
+ ::decode(legacy_mds_map.flags, p);
+ ::decode(legacy_mds_map.last_failure, p);
+ ::decode(legacy_mds_map.root, p);
+ ::decode(legacy_mds_map.session_timeout, p);
+ ::decode(legacy_mds_map.session_autoclose, p);
+ ::decode(legacy_mds_map.max_file_size, p);
+ ::decode(legacy_mds_map.max_mds, p);
+ ::decode(legacy_mds_map.mds_info, p);
+ if (struct_v < 3) {
+ __u32 n;
+ ::decode(n, p);
+ while (n--) {
+ __u32 m;
+ ::decode(m, p);
+ legacy_mds_map.data_pools.insert(m);
+ }
+ __s32 s;
+ ::decode(s, p);
+ legacy_mds_map.cas_pool = s;
+ } else {
+ ::decode(legacy_mds_map.data_pools, p);
+ ::decode(legacy_mds_map.cas_pool, p);
+ }
+
+ // kclient ignores everything from here
+ __u16 ev = 1;
+ if (struct_v >= 2)
+ ::decode(ev, p);
+ if (ev >= 3)
+ ::decode(legacy_mds_map.compat, p);
+ else
+ legacy_mds_map.compat = get_mdsmap_compat_set_base();
+ if (ev < 5) {
+ __u32 n;
+ ::decode(n, p);
+ legacy_mds_map.metadata_pool = n;
+ } else {
+ ::decode(legacy_mds_map.metadata_pool, p);
+ }
+ ::decode(legacy_mds_map.tableserver, p);
+ ::decode(legacy_mds_map.in, p);
+ ::decode(legacy_mds_map.inc, p);
+ ::decode(legacy_mds_map.up, p);
+ ::decode(legacy_mds_map.failed, p);
+ ::decode(legacy_mds_map.stopped, p);
+ if (ev >= 4)
+ ::decode(legacy_mds_map.last_failure_osd_epoch, p);
+ if (ev >= 6) {
+ ::decode(legacy_mds_map.ever_allowed_snaps, p);
+ ::decode(legacy_mds_map.explicitly_allowed_snaps, p);
+ } else {
+ legacy_mds_map.ever_allowed_snaps = true;
+ legacy_mds_map.explicitly_allowed_snaps = false;
+ }
+ if (ev >= 7)
+ ::decode(legacy_mds_map.inline_data_enabled, p);
+
+ if (ev >= 8) {
+ assert(struct_v >= 5);
+ ::decode(enabled, p);
+ ::decode(legacy_mds_map.fs_name, p);
+ } else {
+ if (epoch > 1) {
+ // If an MDS has ever been started, epoch will be greater than 1,
+ // assume filesystem is enabled.
+ enabled = true;
+ } else {
+ // Upgrading from a cluster that never used an MDS, switch off
+ // filesystem until it's explicitly enabled.
+ enabled = false;
+ }
+ }
+
+ if (ev >= 9) {
+ ::decode(legacy_mds_map.damaged, p);
+ }
+ // We're upgrading, populate fs_list from the legacy fields
+ assert(filesystems.empty());
+ auto migrate_fs = std::make_shared<Filesystem>();
+
+ *migrate_fs = legacy_fs;
+ migrate_fs->fscid = FS_CLUSTER_ID_ANONYMOUS;
+ migrate_fs->mds_map.fs_name = "default";
+ legacy_client_fscid = migrate_fs->fscid;
+ compat = migrate_fs->mds_map.compat;
+ enable_multiple = false;
+ } else {
+ ::decode(epoch, p);
+ ::decode(next_filesystem_id, p);
+ ::decode(legacy_client_fscid, p);
+ ::decode(compat, p);
+ ::decode(enable_multiple, p);
+ std::vector<Filesystem> fs_list;
+ ::decode(fs_list, p);
+ filesystems.clear();
+ for (std::vector<Filesystem>::const_iterator fs = fs_list.begin(); fs != fs_list.end(); ++fs) {
+ filesystems[fs->fscid] = std::make_shared<Filesystem>(*fs);
+ }
+
+ ::decode(mds_roles, p);
+ ::decode(standby_daemons, p);
+ ::decode(standby_epochs, p);
+ }
+
+ DECODE_FINISH(p);
+}
+
+
+void Filesystem::encode(bufferlist& bl) const
+{
+ ENCODE_START(1, 1, bl);
+ ::encode(fscid, bl);
+ bufferlist mdsmap_bl;
+ mds_map.encode(mdsmap_bl, CEPH_FEATURE_PGID64 | CEPH_FEATURE_MDSENC);
+ ::encode(mdsmap_bl, bl);
+ ENCODE_FINISH(bl);
+}
+
+void Filesystem::decode(bufferlist::iterator& p)
+{
+ DECODE_START(1, p);
+ ::decode(fscid, p);
+ bufferlist mdsmap_bl;
+ ::decode(mdsmap_bl, p);
+ bufferlist::iterator mdsmap_bl_iter = mdsmap_bl.begin();
+ mds_map.decode(mdsmap_bl_iter);
+ DECODE_FINISH(p);
+}
+
+int FSMap::parse_filesystem(
+ std::string const &ns_str,
+ std::shared_ptr<Filesystem> *result
+ ) const
+{
+ std::string ns_err;
+ fs_cluster_id_t fscid = strict_strtol(ns_str.c_str(), 10, &ns_err);
+ if (!ns_err.empty() || filesystems.count(fscid) == 0) {
+ for (auto fs : filesystems) {
+ if (fs.second->mds_map.fs_name == ns_str) {
+ *result = fs.second;
+ return 0;
+ }
+ }
+ return -ENOENT;
+ } else {
+ *result = get_filesystem(fscid);
+ return 0;
+ }
+}
+
+void Filesystem::print(std::ostream &out) const
+{
+ // TODO add a non-json print?
+ JSONFormatter f;
+ dump(&f);
+ f.flush(out);
+}
+
+mds_gid_t FSMap::find_standby_for(mds_role_t role, const std::string& name) const
+{
+ mds_gid_t result = MDS_GID_NONE;
+
+ // First see if we have a STANDBY_REPLAY
+ auto fs = get_filesystem(role.fscid);
+ for (const auto &i : fs->mds_map.mds_info) {
+ const auto &info = i.second;
+ if (info.rank == role.rank && info.state == MDSMap::STATE_STANDBY_REPLAY) {
+ return info.global_id;
+ }
+ }
+
+ // See if there are any STANDBY daemons available
+ for (const auto &i : standby_daemons) {
+ const auto &gid = i.first;
+ const auto &info = i.second;
+ assert(info.state == MDSMap::STATE_STANDBY);
+ assert(info.rank == MDS_RANK_NONE);
+
+ if (info.laggy()) {
+ continue;
+ }
+
+ if ((info.standby_for_rank == role.rank && info.standby_for_ns == role.fscid)
+ || (name.length() && info.standby_for_name == name)) {
+ // It's a named standby for *me*, use it.
+ return gid;
+ } else if (info.standby_for_rank < 0 && info.standby_for_name.length() == 0)
+ // It's not a named standby for anyone, use it if we don't find
+ // a named standby for me later.
+ result = gid;
+ }
+
+ return result;
+}
+
+mds_gid_t FSMap::find_unused(bool force_standby_active) const {
+ for (const auto &i : standby_daemons) {
+ const auto &gid = i.first;
+ const auto &info = i.second;
+ assert(info.state == MDSMap::STATE_STANDBY);
+
+ if (info.laggy() || info.rank >= 0)
+ continue;
+
+ if ((info.standby_for_rank == MDSMap::MDS_NO_STANDBY_PREF ||
+ info.standby_for_rank == MDSMap::MDS_MATCHED_ACTIVE ||
+ (info.standby_for_rank == MDSMap::MDS_STANDBY_ANY
+ && force_standby_active))) {
+ return gid;
+ }
+ }
+ return MDS_GID_NONE;
+}
+
+mds_gid_t FSMap::find_replacement_for(mds_role_t role, const std::string& name,
+ bool force_standby_active) const {
+ const mds_gid_t standby = find_standby_for(role, name);
+ if (standby)
+ return standby;
+ else
+ return find_unused(force_standby_active);
+}
+
+void FSMap::sanity() const
+{
+ if (legacy_client_fscid != FS_CLUSTER_ID_NONE) {
+ assert(filesystems.count(legacy_client_fscid) == 1);
+ }
+
+ for (const auto &i : filesystems) {
+ auto fs = i.second;
+ assert(fs->mds_map.compat.compare(compat) == 0);
+ assert(fs->fscid == i.first);
+ for (const auto &j : fs->mds_map.mds_info) {
+ assert(j.second.rank != MDS_RANK_NONE);
+ assert(mds_roles.count(j.first) == 1);
+ assert(standby_daemons.count(j.first) == 0);
+ assert(standby_epochs.count(j.first) == 0);
+ assert(mds_roles.at(j.first) == i.first);
+ if (j.second.state != MDSMap::STATE_STANDBY_REPLAY) {
+ assert(fs->mds_map.up.at(j.second.rank) == j.first);
+ assert(fs->mds_map.failed.count(j.second.rank) == 0);
+ assert(fs->mds_map.damaged.count(j.second.rank) == 0);
+ }
+ }
+
+ for (const auto &j : fs->mds_map.up) {
+ mds_rank_t rank = j.first;
+ assert(fs->mds_map.in.count(rank) == 1);
+ mds_gid_t gid = j.second;
+ assert(fs->mds_map.mds_info.count(gid) == 1);
+ }
+ }
+
+ for (const auto &i : standby_daemons) {
+ assert(i.second.state == MDSMap::STATE_STANDBY);
+ assert(i.second.rank == MDS_RANK_NONE);
+ assert(i.second.global_id == i.first);
+ assert(standby_epochs.count(i.first) == 1);
+ assert(mds_roles.count(i.first) == 1);
+ assert(mds_roles.at(i.first) == FS_CLUSTER_ID_NONE);
+ }
+
+ for (const auto &i : standby_epochs) {
+ assert(standby_daemons.count(i.first) == 1);
+ }
+
+ for (const auto &i : mds_roles) {
+ if (i.second == FS_CLUSTER_ID_NONE) {
+ assert(standby_daemons.count(i.first) == 1);
+ } else {
+ assert(filesystems.count(i.second) == 1);
+ assert(filesystems.at(i.second)->mds_map.mds_info.count(i.first) == 1);
+ }
+ }
+}
+
+void FSMap::promote(
+ mds_gid_t standby_gid,
+ std::shared_ptr<Filesystem> filesystem,
+ mds_rank_t assigned_rank)
+{
+ assert(gid_exists(standby_gid));
+ bool is_standby_replay = mds_roles.at(standby_gid) != FS_CLUSTER_ID_NONE;
+ if (!is_standby_replay) {
+ assert(standby_daemons.count(standby_gid));
+ assert(standby_daemons.at(standby_gid).state == MDSMap::STATE_STANDBY);
+ }
+
+ MDSMap &mds_map = filesystem->mds_map;
+
+ // Insert daemon state to Filesystem
+ if (!is_standby_replay) {
+ mds_map.mds_info[standby_gid] = standby_daemons.at(standby_gid);
+ } else {
+ assert(mds_map.mds_info.count(standby_gid));
+ assert(mds_map.mds_info.at(standby_gid).state == MDSMap::STATE_STANDBY_REPLAY);
+ assert(mds_map.mds_info.at(standby_gid).rank == assigned_rank);
+ }
+ MDSMap::mds_info_t &info = mds_map.mds_info[standby_gid];
+
+ if (mds_map.stopped.count(assigned_rank)) {
+ // The cluster is being expanded with a stopped rank
+ info.state = MDSMap::STATE_STARTING;
+ mds_map.stopped.erase(assigned_rank);
+ } else if (!mds_map.is_in(assigned_rank)) {
+ // The cluster is being expanded with a new rank
+ info.state = MDSMap::STATE_CREATING;
+ } else {
+ // An existing rank is being assigned to a replacement
+ info.state = MDSMap::STATE_REPLAY;
+ mds_map.failed.erase(assigned_rank);
+ }
+ info.rank = assigned_rank;
+ info.inc = ++mds_map.inc[assigned_rank];
+ mds_roles[standby_gid] = filesystem->fscid;
+
+ // Update the rank state in Filesystem
+ mds_map.in.insert(assigned_rank);
+ mds_map.up[assigned_rank] = standby_gid;
+
+ // Remove from the list of standbys
+ if (!is_standby_replay) {
+ standby_daemons.erase(standby_gid);
+ standby_epochs.erase(standby_gid);
+ }
+
+ // Indicate that Filesystem has been modified
+ mds_map.epoch = epoch;
+}
+
+void FSMap::assign_standby_replay(
+ const mds_gid_t standby_gid,
+ const fs_cluster_id_t leader_ns,
+ const mds_rank_t leader_rank)
+{
+ assert(mds_roles.at(standby_gid) == FS_CLUSTER_ID_NONE);
+ assert(gid_exists(standby_gid));
+ assert(!gid_has_rank(standby_gid));
+ assert(standby_daemons.count(standby_gid));
+
+ // Insert to the filesystem
+ auto fs = filesystems.at(leader_ns);
+ fs->mds_map.mds_info[standby_gid] = standby_daemons.at(standby_gid);
+ fs->mds_map.mds_info[standby_gid].rank = leader_rank;
+ fs->mds_map.mds_info[standby_gid].state = MDSMap::STATE_STANDBY_REPLAY;
+ mds_roles[standby_gid] = leader_ns;
+
+ // Remove from the list of standbys
+ standby_daemons.erase(standby_gid);
+ standby_epochs.erase(standby_gid);
+
+ // Indicate that Filesystem has been modified
+ fs->mds_map.epoch = epoch;
+}
+
+void FSMap::erase(mds_gid_t who, epoch_t blacklist_epoch)
+{
+ if (mds_roles.at(who) == FS_CLUSTER_ID_NONE) {
+ standby_daemons.erase(who);
+ standby_epochs.erase(who);
+ } else {
+ auto fs = filesystems.at(mds_roles.at(who));
+ const auto &info = fs->mds_map.mds_info.at(who);
+ if (info.state != MDSMap::STATE_STANDBY_REPLAY) {
+ if (info.state == MDSMap::STATE_CREATING) {
+ // If this gid didn't make it past CREATING, then forget
+ // the rank ever existed so that next time it's handed out
+ // to a gid it'll go back into CREATING.
+ fs->mds_map.in.erase(info.rank);
+ } else {
+ // Put this rank into the failed list so that the next available
+ // STANDBY will pick it up.
+ fs->mds_map.failed.insert(info.rank);
+ }
+ assert(fs->mds_map.up.at(info.rank) == info.global_id);
+ fs->mds_map.up.erase(info.rank);
+ }
+ fs->mds_map.mds_info.erase(who);
+ fs->mds_map.last_failure_osd_epoch = blacklist_epoch;
+ fs->mds_map.epoch = epoch;
+ }
+
+ mds_roles.erase(who);
+}
+
+void FSMap::damaged(mds_gid_t who, epoch_t blacklist_epoch)
+{
+ assert(mds_roles.at(who) != FS_CLUSTER_ID_NONE);
+ auto fs = filesystems.at(mds_roles.at(who));
+ mds_rank_t rank = fs->mds_map.mds_info[who].rank;
+
+ erase(who, blacklist_epoch);
+ fs->mds_map.failed.erase(rank);
+ fs->mds_map.damaged.insert(rank);
+
+ assert(fs->mds_map.epoch == epoch);
+}
+
+/**
+ * Update to indicate that the rank `rank` is to be removed
+ * from the damaged list of the filesystem `fscid`
+ */
+bool FSMap::undamaged(const fs_cluster_id_t fscid, const mds_rank_t rank)
+{
+ auto fs = filesystems.at(fscid);
+
+ if (fs->mds_map.damaged.count(rank)) {
+ fs->mds_map.damaged.erase(rank);
+ fs->mds_map.failed.insert(rank);
+ fs->mds_map.epoch = epoch;
+ return true;
+ } else {
+ return false;
+ }
+}
+
+void FSMap::insert(const MDSMap::mds_info_t &new_info)
+{
+ mds_roles[new_info.global_id] = FS_CLUSTER_ID_NONE;
+ standby_daemons[new_info.global_id] = new_info;
+ standby_epochs[new_info.global_id] = epoch;
+}
+
+void FSMap::stop(mds_gid_t who)
+{
+ assert(mds_roles.at(who) != FS_CLUSTER_ID_NONE);
+ auto fs = filesystems.at(mds_roles.at(who));
+ const auto &info = fs->mds_map.mds_info.at(who);
+ fs->mds_map.up.erase(info.rank);
+ fs->mds_map.in.erase(info.rank);
+ fs->mds_map.stopped.insert(info.rank);
+
+ fs->mds_map.mds_info.erase(who);
+ mds_roles.erase(who);
+
+ fs->mds_map.epoch = epoch;
+}
+
+
+/**
+ * Given one of the following forms:
+ * <fs name>:<rank>
+ * <fs id>:<rank>
+ * <rank>
+ *
+ * Parse into a mds_role_t. The rank-only form is only valid
+ * if legacy_client_ns is set.
+ */
+int FSMap::parse_role(
+ const std::string &role_str,
+ mds_role_t *role,
+ std::ostream &ss) const
+{
+ auto colon_pos = role_str.find(":");
+
+ if (colon_pos != std::string::npos && colon_pos != role_str.size()) {
+ auto fs_part = role_str.substr(0, colon_pos);
+ auto rank_part = role_str.substr(colon_pos + 1);
+
+ std::string err;
+ fs_cluster_id_t fs_id = FS_CLUSTER_ID_NONE;
+ long fs_id_i = strict_strtol(fs_part.c_str(), 10, &err);
+ if (fs_id_i < 0 || !err.empty()) {
+ // Try resolving as name
+ auto fs = get_filesystem(fs_part);
+ if (fs == nullptr) {
+ ss << "Unknown filesystem name '" << fs_part << "'";
+ return -EINVAL;
+ } else {
+ fs_id = fs->fscid;
+ }
+ } else {
+ fs_id = fs_id_i;
+ }
+
+ mds_rank_t rank;
+ long rank_i = strict_strtol(rank_part.c_str(), 10, &err);
+ if (rank_i < 0 || !err.empty()) {
+ ss << "Invalid rank '" << rank_part << "'";
+ return -EINVAL;
+ } else {
+ rank = rank_i;
+ }
+
+ *role = {fs_id, rank};
+ } else {
+ std::string err;
+ long who_i = strict_strtol(role_str.c_str(), 10, &err);
+ if (who_i < 0 || !err.empty()) {
+ ss << "Invalid rank '" << role_str << "'";
+ return -EINVAL;
+ }
+
+ if (legacy_client_fscid == FS_CLUSTER_ID_NONE) {
+ ss << "No filesystem selected";
+ return -ENOENT;
+ } else {
+ *role = mds_role_t(legacy_client_fscid, who_i);
+ }
+ }
+
+ // Now check that the role actually exists
+ if (get_filesystem(role->fscid) == nullptr) {
+ ss << "Filesystem with ID '" << role->fscid << "' not found";
+ return -ENOENT;
+ }
+
+ auto fs = get_filesystem(role->fscid);
+ if (fs->mds_map.in.count(role->rank) == 0) {
+ ss << "Rank '" << role->rank << "' not found";
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+
+#ifndef CEPH_FSMAP_H
+#define CEPH_FSMAP_H
+
+#include <errno.h>
+
+#include "include/types.h"
+#include "common/Clock.h"
+#include "msg/Message.h"
+#include "mds/MDSMap.h"
+
+#include <set>
+#include <map>
+#include <string>
+
+#include "common/config.h"
+
+#include "include/CompatSet.h"
+#include "include/ceph_features.h"
+#include "common/Formatter.h"
+#include "mds/mdstypes.h"
+
+class CephContext;
+
+#define MDS_FEATURE_INCOMPAT_BASE CompatSet::Feature(1, "base v0.20")
+#define MDS_FEATURE_INCOMPAT_CLIENTRANGES CompatSet::Feature(2, "client writeable ranges")
+#define MDS_FEATURE_INCOMPAT_FILELAYOUT CompatSet::Feature(3, "default file layouts on dirs")
+#define MDS_FEATURE_INCOMPAT_DIRINODE CompatSet::Feature(4, "dir inode in separate object")
+#define MDS_FEATURE_INCOMPAT_ENCODING CompatSet::Feature(5, "mds uses versioned encoding")
+#define MDS_FEATURE_INCOMPAT_OMAPDIRFRAG CompatSet::Feature(6, "dirfrag is stored in omap")
+#define MDS_FEATURE_INCOMPAT_INLINE CompatSet::Feature(7, "mds uses inline data")
+#define MDS_FEATURE_INCOMPAT_NOANCHOR CompatSet::Feature(8, "no anchor table")
+
+#define MDS_FS_NAME_DEFAULT "cephfs"
+
+/**
+ * The MDSMap and any additional fields describing a particular
+ * filesystem (a unique fs_cluster_id_t).
+ */
+class Filesystem
+{
+ public:
+ fs_cluster_id_t fscid;
+ MDSMap mds_map;
+
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::iterator& p);
+
+ Filesystem()
+ :
+ fscid(FS_CLUSTER_ID_NONE)
+ {
+ }
+
+ void dump(Formatter *f) const;
+ void print(std::ostream& out) const;
+
+ /**
+ * Return true if a daemon is already assigned as
+ * STANDBY_REPLAY for the gid `who`
+ */
+ bool has_standby_replay(mds_gid_t who) const
+ {
+ for (const auto &i : mds_map.mds_info) {
+ const auto &info = i.second;
+ if (info.state == MDSMap::STATE_STANDBY_REPLAY
+ && info.rank == mds_map.mds_info.at(who).rank) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+};
+WRITE_CLASS_ENCODER(Filesystem)
+
+class FSMap {
+protected:
+ epoch_t epoch;
+ uint64_t next_filesystem_id;
+ fs_cluster_id_t legacy_client_fscid;
+ CompatSet compat;
+ bool enable_multiple;
+
+ std::map<fs_cluster_id_t, std::shared_ptr<Filesystem> > filesystems;
+
+ // Remember which Filesystem an MDS daemon's info is stored in
+ // (or in standby_daemons for FS_CLUSTER_ID_NONE)
+ std::map<mds_gid_t, fs_cluster_id_t> mds_roles;
+
+ // For MDS daemons not yet assigned to a Filesystem
+ std::map<mds_gid_t, MDSMap::mds_info_t> standby_daemons;
+ std::map<mds_gid_t, epoch_t> standby_epochs;
+
+public:
+
+ friend class MDSMonitor;
+
+ FSMap()
+ : epoch(0),
+ next_filesystem_id(FS_CLUSTER_ID_ANONYMOUS + 1),
+ legacy_client_fscid(FS_CLUSTER_ID_NONE),
+ compat(get_mdsmap_compat_set_default()),
+ enable_multiple(false)
+ { }
+
+ FSMap(const FSMap &rhs)
+ :
+ epoch(rhs.epoch),
+ next_filesystem_id(rhs.next_filesystem_id),
+ legacy_client_fscid(rhs.legacy_client_fscid),
+ compat(rhs.compat),
+ enable_multiple(rhs.enable_multiple),
+ mds_roles(rhs.mds_roles),
+ standby_daemons(rhs.standby_daemons),
+ standby_epochs(rhs.standby_epochs)
+ {
+ for (auto &i : rhs.filesystems) {
+ auto fs = i.second;
+ filesystems[fs->fscid] = std::make_shared<Filesystem>(*fs);
+ }
+ }
+
+ FSMap &operator=(const FSMap &rhs)
+ {
+ epoch = rhs.epoch;
+ next_filesystem_id = rhs.next_filesystem_id;
+ legacy_client_fscid = rhs.legacy_client_fscid;
+ compat = rhs.compat;
+ enable_multiple = rhs.enable_multiple;
+ mds_roles = rhs.mds_roles;
+ standby_daemons = rhs.standby_daemons;
+ standby_epochs = rhs.standby_epochs;
+
+ for (auto &i : rhs.filesystems) {
+ auto fs = i.second;
+ filesystems[fs->fscid] = std::make_shared<Filesystem>(*fs);
+ }
+
+ return *this;
+ }
+
+ const CompatSet &get_compat() const {return compat;}
+
+ void set_enable_multiple(const bool v)
+ {
+ enable_multiple = v;
+ }
+
+ bool get_enable_multiple() const
+ {
+ return enable_multiple;
+ }
+
+ /**
+ * Get state of all daemons (for all filesystems, including all standbys)
+ */
+ std::map<mds_gid_t, MDSMap::mds_info_t> get_mds_info() const
+ {
+ std::map<mds_gid_t, MDSMap::mds_info_t> result;
+ for (const auto &i : standby_daemons) {
+ result[i.first] = i.second;
+ }
+
+ for (const auto &i : filesystems) {
+ auto fs_info = i.second->mds_map.get_mds_info();
+ for (auto j : fs_info) {
+ result[j.first] = j.second;
+ }
+ }
+
+ return result;
+ }
+
+ /**
+ * Resolve daemon name to GID
+ */
+ mds_gid_t find_mds_gid_by_name(const std::string& s) const
+ {
+ const auto info = get_mds_info();
+ for (const auto &p : info) {
+ if (p.second.name == s) {
+ return p.first;
+ }
+ }
+ return MDS_GID_NONE;
+ }
+
+ /**
+ * Resolve daemon name to status
+ */
+ const MDSMap::mds_info_t* find_by_name(const std::string& name) const
+ {
+ std::map<mds_gid_t, MDSMap::mds_info_t> result;
+ for (const auto &i : standby_daemons) {
+ if (i.second.name == name) {
+ return &(i.second);
+ }
+ }
+
+ for (const auto &i : filesystems) {
+ const auto &fs_info = i.second->mds_map.get_mds_info();
+ for (const auto &j : fs_info) {
+ if (j.second.name == name) {
+ return &(j.second);
+ }
+ }
+ }
+
+ return nullptr;
+ }
+
+ /**
+ * Does a daemon exist with this GID?
+ */
+ bool gid_exists(mds_gid_t gid) const
+ {
+ return mds_roles.count(gid) > 0;
+ }
+
+ /**
+ * Does a daemon with this GID exist, *and* have an MDS rank assigned?
+ */
+ bool gid_has_rank(mds_gid_t gid) const
+ {
+ return gid_exists(gid) && mds_roles.at(gid) != FS_CLUSTER_ID_NONE;
+ }
+
+ /**
+ * Insert a new MDS daemon, as a standby
+ */
+ void insert(const MDSMap::mds_info_t &new_info);
+
+ /**
+ * Assign an MDS cluster standby replay rank to a standby daemon
+ */
+ void assign_standby_replay(
+ const mds_gid_t standby_gid,
+ const fs_cluster_id_t leader_ns,
+ const mds_rank_t leader_rank);
+
+ /**
+ * Assign an MDS cluster rank to a standby daemon
+ */
+ void promote(
+ mds_gid_t standby_gid,
+ std::shared_ptr<Filesystem> filesystem,
+ mds_rank_t assigned_rank);
+
+ /**
+ * A daemon reports that it is STATE_STOPPED: remove it,
+ * and the rank it held.
+ */
+ void stop(mds_gid_t who);
+
+ /**
+ * The rank held by 'who', if any, is to be relinquished, and
+ * the state for the daemon GID is to be forgotten.
+ */
+ void erase(mds_gid_t who, epoch_t blacklist_epoch);
+
+ /**
+ * Update to indicate that the rank held by 'who' is damaged
+ */
+ void damaged(mds_gid_t who, epoch_t blacklist_epoch);
+
+ /**
+ * Update to indicate that the rank `rank` is to be removed
+ * from the damaged list of the filesystem `fscid`
+ */
+ bool undamaged(const fs_cluster_id_t fscid, const mds_rank_t rank);
+
+ /**
+ * Mutator helper for Filesystem objects: expose a non-const
+ * Filesystem pointer to `fn` and update epochs appropriately.
+ */
+ void modify_filesystem(
+ const fs_cluster_id_t fscid,
+ std::function<void(std::shared_ptr<Filesystem> )> fn)
+ {
+ auto fs = filesystems.at(fscid);
+ fn(fs);
+ fs->mds_map.epoch = epoch;
+ }
+
+ /**
+ * Apply a mutation to the mds_info_t structure for a particular
+ * daemon (identified by GID), and make appropriate updates to epochs.
+ */
+ void modify_daemon(
+ mds_gid_t who,
+ std::function<void(MDSMap::mds_info_t *info)> fn)
+ {
+ if (mds_roles.at(who) == FS_CLUSTER_ID_NONE) {
+ fn(&standby_daemons.at(who));
+ standby_epochs[who] = epoch;
+ } else {
+ auto fs = filesystems[mds_roles.at(who)];
+ auto &info = fs->mds_map.mds_info.at(who);
+ fn(&info);
+
+ fs->mds_map.epoch = epoch;
+ }
+ }
+
+ /**
+ * Given that gid exists in a filesystem or as a standby, return
+ * a reference to its info.
+ */
+ const MDSMap::mds_info_t& get_info_gid(mds_gid_t gid) const
+ {
+ auto fscid = mds_roles.at(gid);
+ if (fscid == FS_CLUSTER_ID_NONE) {
+ return standby_daemons.at(gid);
+ } else {
+ return filesystems.at(fscid)->mds_map.mds_info.at(gid);
+ }
+ }
+
+ /**
+ * A daemon has told us it's compat, and it's too new
+ * for the one we had previously. Impose the new one
+ * on all filesystems.
+ */
+ void update_compat(CompatSet c)
+ {
+ // We could do something more complicated here to enable
+ // different filesystems to be served by different MDS versions,
+ // but this is a lot simpler because it doesn't require us to
+ // track the compat versions for standby daemons.
+ compat = c;
+ for (auto i : filesystems) {
+ MDSMap &mds_map = i.second->mds_map;
+ mds_map.compat = c;
+ mds_map.epoch = epoch;
+ }
+ }
+
+ std::shared_ptr<const Filesystem> get_legacy_filesystem()
+ {
+ if (legacy_client_fscid == FS_CLUSTER_ID_NONE) {
+ return nullptr;
+ } else {
+ return filesystems.at(legacy_client_fscid);
+ }
+ }
+
+ /**
+ * A daemon has informed us of its offload targets
+ */
+ void update_export_targets(mds_gid_t who, const std::set<mds_rank_t> targets)
+ {
+ auto fscid = mds_roles.at(who);
+ modify_filesystem(fscid, [who, &targets](std::shared_ptr<Filesystem> fs) {
+ fs->mds_map.mds_info.at(who).export_targets = targets;
+ });
+ }
+
+ const std::map<fs_cluster_id_t, std::shared_ptr<Filesystem> > &get_filesystems() const
+ {
+ return filesystems;
+ }
+ bool any_filesystems() const {return !filesystems.empty(); }
+ bool filesystem_exists(fs_cluster_id_t fscid) const
+ {return filesystems.count(fscid) > 0;}
+
+ epoch_t get_epoch() const { return epoch; }
+ void inc_epoch() { epoch++; }
+
+ std::shared_ptr<Filesystem> get_filesystem(fs_cluster_id_t fscid) const
+ {
+ return filesystems.at(fscid);
+ }
+
+ int parse_filesystem(
+ std::string const &ns_str,
+ std::shared_ptr<Filesystem> *result
+ ) const;
+
+ int parse_role(
+ const std::string &role_str,
+ mds_role_t *role,
+ std::ostream &ss) const;
+
+ /**
+ * Return true if this pool is in use by any of the filesystems
+ */
+ bool pool_in_use(int64_t poolid) const {
+ for (auto const &i : filesystems) {
+ if (i.second->mds_map.is_data_pool(poolid)
+ || i.second->mds_map.metadata_pool == poolid) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ mds_gid_t find_standby_for(mds_role_t mds, const std::string& name) const;
+
+ mds_gid_t find_unused(bool force_standby_active) const;
+
+ mds_gid_t find_replacement_for(mds_role_t mds, const std::string& name,
+ bool force_standby_active) const;
+
+ void get_health(list<pair<health_status_t,std::string> >& summary,
+ list<pair<health_status_t,std::string> > *detail) const;
+
+ std::shared_ptr<const Filesystem> get_filesystem(const std::string &name) const
+ {
+ for (auto &i : filesystems) {
+ if (i.second->mds_map.fs_name == name) {
+ return i.second;
+ }
+ }
+
+ return nullptr;
+ }
+
+ /**
+ * Assert that the FSMap, Filesystem, MDSMap, mds_info_t relations are
+ * all self-consistent.
+ */
+ void sanity() const;
+
+ void encode(bufferlist& bl, uint64_t features) const;
+ void decode(bufferlist::iterator& p);
+ void decode(bufferlist& bl) {
+ bufferlist::iterator p = bl.begin();
+ decode(p);
+ }
+
+ void print(ostream& out) const;
+ void print_summary(Formatter *f, ostream *out);
+
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<FSMap*>& ls);
+};
+WRITE_CLASS_ENCODER_FEATURES(FSMap)
+
+inline ostream& operator<<(ostream& out, FSMap& m) {
+ m.print_summary(NULL, &out);
+ return out;
+}
+
+#endif
f->dump_stream("laggy_since") << laggy_since;
f->dump_int("standby_for_rank", standby_for_rank);
+ f->dump_int("standby_for_ns", standby_for_ns);
f->dump_string("standby_for_name", standby_for_name);
f->open_array_section("export_targets");
for (set<mds_rank_t>::iterator p = export_targets.begin();
ls.push_back(m);
}
-void MDSMap::print(ostream& out)
+void MDSMap::print(ostream& out) const
{
+ out << "fs_name\t" << fs_name << "\n";
out << "epoch\t" << epoch << "\n";
out << "flags\t" << hex << flags << dec << "\n";
out << "created\t" << created << "\n";
out << "inline_data\t" << (inline_data_enabled ? "enabled" : "disabled") << "\n";
multimap< pair<mds_rank_t, unsigned>, mds_gid_t > foo;
- for (map<mds_gid_t,mds_info_t>::iterator p = mds_info.begin();
- p != mds_info.end();
- ++p)
- foo.insert(std::make_pair(std::make_pair(p->second.rank, p->second.inc-1), p->first));
-
- for (multimap< pair<mds_rank_t, unsigned>, mds_gid_t >::iterator p = foo.begin();
- p != foo.end();
- ++p) {
- mds_info_t& info = mds_info[p->second];
+ for (const auto &p : mds_info) {
+ foo.insert(std::make_pair(
+ std::make_pair(p.second.rank, p.second.inc-1), p.first));
+ }
+
+ for (const auto &p : foo) {
+ const mds_info_t& info = mds_info.at(p.second);
- out << p->second << ":\t"
+ out << p.second << ":\t"
<< info.addr
<< " '" << info.name << "'"
<< " mds." << info.rank
-void MDSMap::print_summary(Formatter *f, ostream *out)
+void MDSMap::print_summary(Formatter *f, ostream *out) const
{
map<mds_rank_t,string> by_rank;
map<string,int> by_state;
if (f)
f->open_array_section("by_rank");
- for (map<mds_gid_t,mds_info_t>::iterator p = mds_info.begin();
- p != mds_info.end();
- ++p) {
- string s = ceph_mds_state_name(p->second.state);
- if (p->second.laggy())
+ for (const auto &p : mds_info) {
+ string s = ceph_mds_state_name(p.second.state);
+ if (p.second.laggy())
s += "(laggy or crashed)";
- if (p->second.rank >= 0) {
+ if (p.second.rank >= 0) {
if (f) {
f->open_object_section("mds");
- f->dump_unsigned("rank", p->second.rank);
- f->dump_string("name", p->second.name);
+ f->dump_unsigned("rank", p.second.rank);
+ f->dump_string("name", p.second.name);
f->dump_string("status", s);
f->close_section();
} else {
- by_rank[p->second.rank] = p->second.name + "=" + s;
+ by_rank[p.second.rank] = p.second.name + "=" + s;
}
} else {
by_state[s]++;
void MDSMap::mds_info_t::encode_versioned(bufferlist& bl, uint64_t features) const
{
- ENCODE_START(5, 4, bl);
+ ENCODE_START(6, 4, bl);
::encode(global_id, bl);
::encode(name, bl);
::encode(rank, bl);
::encode(standby_for_name, bl);
::encode(export_targets, bl);
::encode(mds_features, bl);
+ ::encode(standby_for_ns, bl);
ENCODE_FINISH(bl);
}
void MDSMap::mds_info_t::decode(bufferlist::iterator& bl)
{
- DECODE_START_LEGACY_COMPAT_LEN(5, 4, 4, bl);
+ DECODE_START_LEGACY_COMPAT_LEN(6, 4, 4, bl);
::decode(global_id, bl);
::decode(name, bl);
::decode(rank, bl);
::decode(export_targets, bl);
if (struct_v >= 5)
::decode(mds_features, bl);
+ if (struct_v >= 6) {
+ ::decode(standby_for_ns, bl);
+ }
DECODE_FINISH(bl);
}
MDSMap::availability_t MDSMap::is_cluster_available() const
{
if (epoch == 0) {
- // This is ambiguous between "mds map was never initialized on mons" and
- // "we never got an mdsmap from the mons". Treat it like the latter.
+ // If I'm a client, this means I'm looking at an MDSMap instance
+ // that was never actually initialized from the mons. Client should
+ // wait.
return TRANSIENT_UNAVAILABLE;
}
-
// If a rank is marked damage (unavailable until operator intervenes)
if (damaged.size()) {
return STUCK_UNAVAILABLE;
return STUCK_UNAVAILABLE;
}
- for (const auto rank : in) {
- std::string name;
- if (up.count(rank) != 0) {
- name = mds_info.at(up.at(rank)).name;
- }
- const mds_gid_t replacement = find_replacement_for(rank, name, false);
- const bool standby_avail = (replacement != MDS_GID_NONE);
-
- // If the rank is unfilled, and there are no standbys, we're unavailable
- if (up.count(rank) == 0 && !standby_avail) {
- return STUCK_UNAVAILABLE;
- } else if (up.count(rank) && mds_info.at(up.at(rank)).laggy() && !standby_avail) {
- // If the daemon is laggy and there are no standbys, we're unavailable.
- // It would be nice to give it some grace here, but to do so callers
- // would have to poll this time-wise, vs. just waiting for updates
- // to mdsmap, so it's not worth the complexity.
- return STUCK_UNAVAILABLE;
- }
- }
+ for (const auto rank : in) {
+ if (up.count(rank) && mds_info.at(up.at(rank)).laggy()) {
+ // This might only be transient, but because we can't see
+ // standbys, we have no way of knowing whether there is a
+ // standby available to replace the laggy guy.
+ return STUCK_UNAVAILABLE;
+ }
+}
if (get_num_mds(CEPH_MDS_STATE_ACTIVE) > 0) {
// Nobody looks stuck, so indicate to client they should go ahead
return AVAILABLE;
} else {
// Nothing indicating we were stuck, but nobody active (yet)
- return TRANSIENT_UNAVAILABLE;
+ //return TRANSIENT_UNAVAILABLE;
+
+ // Because we don't have standbys in the MDSMap any more, we can't
+ // reliably indicate transient vs. stuck, so always say stuck so
+ // that the client doesn't block.
+ return STUCK_UNAVAILABLE;
}
}
utime_t laggy_since;
mds_rank_t standby_for_rank;
std::string standby_for_name;
+ fs_cluster_id_t standby_for_ns;
std::set<mds_rank_t> export_targets;
uint64_t mds_features;
mds_info_t() : global_id(MDS_GID_NONE), rank(MDS_RANK_NONE), inc(0), state(STATE_STANDBY), state_seq(0),
- standby_for_rank(MDS_NO_STANDBY_PREF) { }
+ standby_for_rank(MDS_NO_STANDBY_PREF),
+ standby_for_ns(FS_CLUSTER_ID_NONE)
+ { }
bool laggy() const { return !(laggy_since == utime_t()); }
void clear_laggy() { laggy_since = utime_t(); }
CompatSet compat;
friend class MDSMonitor;
+ friend class Filesystem;
+ friend class FSMap;
public:
MDSMap()
return utime_t(session_timeout,0);
}
uint64_t get_max_filesize() { return max_file_size; }
+ void set_max_filesize(uint64_t m) { max_file_size = m; }
int get_flags() const { return flags; }
int test_flag(int f) const { return flags & f; }
void set_flag(int f) { flags |= f; }
void clear_flag(int f) { flags &= ~f; }
+ const std::string &get_fs_name() const {return fs_name;}
+
void set_snaps_allowed() {
set_flag(CEPH_MDSMAP_ALLOW_SNAPS);
ever_allowed_snaps = true;
const std::set<int64_t> &get_data_pools() const { return data_pools; }
int64_t get_first_data_pool() const { return *data_pools.begin(); }
- int64_t get_cas_pool() const { return cas_pool; }
int64_t get_metadata_pool() const { return metadata_pool; }
bool is_data_pool(int64_t poolid) const {
return data_pools.count(poolid);
return get_enabled() && (is_data_pool(poolid) || metadata_pool == poolid);
}
- const std::map<mds_gid_t,mds_info_t>& get_mds_info() { return mds_info; }
- const mds_info_t& get_mds_info_gid(mds_gid_t gid) {
- assert(mds_info.count(gid));
- return mds_info[gid];
+ const std::map<mds_gid_t,mds_info_t>& get_mds_info() const { return mds_info; }
+ const mds_info_t& get_mds_info_gid(mds_gid_t gid) const {
+ return mds_info.at(gid);
}
- const mds_info_t& get_mds_info(mds_rank_t m) {
- assert(up.count(m) && mds_info.count(up[m]));
- return mds_info[up[m]];
+ const mds_info_t& get_mds_info(mds_rank_t m) const {
+ assert(up.count(m) && mds_info.count(up.at(m)));
+ return mds_info.at(up.at(m));
}
- mds_gid_t find_mds_gid_by_name(const std::string& s) {
+ mds_gid_t find_mds_gid_by_name(const std::string& s) const {
for (std::map<mds_gid_t,mds_info_t>::const_iterator p = mds_info.begin();
p != mds_info.end();
++p) {
}
// counts
- unsigned get_num_in_mds() {
+ unsigned get_num_in_mds() const {
return in.size();
}
- unsigned get_num_up_mds() {
+ unsigned get_num_up_mds() const {
return up.size();
}
- int get_num_failed_mds() {
+ int get_num_failed_mds() const {
return failed.size();
}
unsigned get_num_mds(int state) const {
}
// sets
- void get_mds_set(std::set<mds_rank_t>& s) {
+ void get_mds_set(std::set<mds_rank_t>& s) const {
s = in;
}
- void get_up_mds_set(std::set<mds_rank_t>& s) {
+ void get_up_mds_set(std::set<mds_rank_t>& s) const {
for (std::map<mds_rank_t, mds_gid_t>::const_iterator p = up.begin();
p != up.end();
++p)
s.insert(p->first);
}
- void get_active_mds_set(std::set<mds_rank_t>& s) {
+ void get_active_mds_set(std::set<mds_rank_t>& s) const {
get_mds_set(s, MDSMap::STATE_ACTIVE);
}
- void get_failed_mds_set(std::set<mds_rank_t>& s) {
+ void get_failed_mds_set(std::set<mds_rank_t>& s) const {
s = failed;
}
if (p->second.state >= STATE_CLIENTREPLAY && p->second.state <= STATE_STOPPING)
s.insert(p->second.rank);
}
- void get_mds_set(std::set<mds_rank_t>& s, DaemonState state) {
+ void get_mds_set(std::set<mds_rank_t>& s, DaemonState state) const {
for (std::map<mds_gid_t, mds_info_t>::const_iterator p = mds_info.begin();
p != mds_info.end();
++p)
s.insert(p->second.rank);
}
- int get_random_up_mds() {
- if (up.empty())
- return -1;
- std::map<mds_rank_t, mds_gid_t>::iterator p = up.begin();
- for (int n = rand() % up.size(); n; n--)
- ++p;
- return p->first;
- }
-
- const mds_info_t* find_by_name(const std::string& name) const {
- for (std::map<mds_gid_t, mds_info_t>::const_iterator p = mds_info.begin();
- p != mds_info.end();
- ++p) {
- if (p->second.name == name)
- return &p->second;
- }
- return NULL;
- }
-
- mds_gid_t find_standby_for(mds_rank_t mds, std::string& name) const {
- std::map<mds_gid_t, mds_info_t>::const_iterator generic_standby
- = mds_info.end();
- for (std::map<mds_gid_t, mds_info_t>::const_iterator p = mds_info.begin();
- p != mds_info.end();
- ++p) {
- if ((p->second.state != MDSMap::STATE_STANDBY && p->second.state != MDSMap::STATE_STANDBY_REPLAY) ||
- p->second.laggy() ||
- p->second.rank >= 0)
- continue;
- if (p->second.standby_for_rank == mds || (name.length() && p->second.standby_for_name == name))
- return p->first;
- if (p->second.standby_for_rank < 0 && p->second.standby_for_name.length() == 0)
- generic_standby = p;
- }
- if (generic_standby != mds_info.end())
- return generic_standby->first;
- return MDS_GID_NONE;
- }
-
- mds_gid_t find_unused_for(mds_rank_t mds, std::string& name,
- bool force_standby_active) const {
- for (std::map<mds_gid_t,mds_info_t>::const_iterator p = mds_info.begin();
- p != mds_info.end();
- ++p) {
- if (p->second.state != MDSMap::STATE_STANDBY ||
- p->second.laggy() ||
- p->second.rank >= 0)
- continue;
- if ((p->second.standby_for_rank == MDS_NO_STANDBY_PREF ||
- p->second.standby_for_rank == MDS_MATCHED_ACTIVE ||
- (p->second.standby_for_rank == MDS_STANDBY_ANY && force_standby_active))) {
- return p->first;
- }
- }
- return MDS_GID_NONE;
- }
-
- mds_gid_t find_replacement_for(mds_rank_t mds, std::string& name,
- bool force_standby_active) const {
- const mds_gid_t standby = find_standby_for(mds, name);
- if (standby)
- return standby;
- else
- return find_unused_for(mds, name, force_standby_active);
- }
-
void get_health(list<pair<health_status_t,std::string> >& summary,
list<pair<health_status_t,std::string> > *detail) const;
return i->second.state;
}
- mds_info_t& get_info(mds_rank_t m) { assert(up.count(m)); return mds_info[up[m]]; }
- mds_info_t& get_info_gid(mds_gid_t gid) { assert(mds_info.count(gid)); return mds_info[gid]; }
+ const mds_info_t& get_info(const mds_rank_t m) {
+ return mds_info.at(up.at(m));
+ }
+ const mds_info_t& get_info_gid(const mds_gid_t gid) {
+ return mds_info.at(gid);
+ }
bool is_boot(mds_rank_t m) const { return get_state(m) == STATE_BOOT; }
bool is_creating(mds_rank_t m) const { return get_state(m) == STATE_CREATING; }
return p->second.laggy();
}
-
- // cluster states
- bool is_full() const {
- return mds_rank_t(in.size()) >= max_mds;
- }
- bool is_degraded() const { // degraded = some recovery in process. fixes active membership and recovery_set.
+ // degraded = some recovery in process. fixes active membership and
+ // recovery_set.
+ bool is_degraded() const {
if (!failed.empty() || !damaged.empty())
return true;
for (std::map<mds_gid_t,mds_info_t>::const_iterator p = mds_info.begin();
return false;
}
- mds_rank_t get_rank_gid(mds_gid_t gid) {
- if (mds_info.count(gid))
- return mds_info[gid].rank;
- return MDS_RANK_NONE;
+ mds_rank_t get_rank_gid(mds_gid_t gid) const {
+ if (mds_info.count(gid)) {
+ return mds_info.at(gid).rank;
+ } else {
+ return MDS_RANK_NONE;
+ }
}
- int get_inc(mds_rank_t m) {
- if (up.count(m))
- return mds_info[up[m]].inc;
- return 0;
- }
int get_inc_gid(mds_gid_t gid) {
if (mds_info.count(gid))
return mds_info[gid].inc;
}
- void print(ostream& out);
- void print_summary(Formatter *f, ostream *out);
+ void print(ostream& out) const;
+ void print_summary(Formatter *f, ostream *out) const;
void dump(Formatter *f) const;
static void generate_test_instances(list<MDSMap*>& ls);
mds/MDSContext.h \
mds/MDSAuthCaps.h \
mds/MDSMap.h \
+ mds/FSMap.h \
mds/MDSTable.h \
mds/MDSTableServer.h \
mds/MDSTableClient.h \