Dispatcher(cct),
beacon_interval(g_conf()->mds_beacon_interval),
monc(monc),
- name(name)
+ name(name),
+ compat(MDSMap::get_compat_set_all())
{
}
if (mdsmap.get_epoch() >= epoch) {
epoch = mdsmap.get_epoch();
- compat = MDSMap::get_compat_set_default();
- compat.merge(mdsmap.compat);
}
}
f->dump_int("default_fscid", legacy_client_fscid);
f->open_object_section("compat");
- compat.dump(f);
+ default_compat.dump(f);
f->close_section();
f->open_object_section("feature_flags");
epoch = rhs.epoch;
next_filesystem_id = rhs.next_filesystem_id;
legacy_client_fscid = rhs.legacy_client_fscid;
- compat = rhs.compat;
+ default_compat = rhs.default_compat;
enable_multiple = rhs.enable_multiple;
mds_roles = rhs.mds_roles;
standby_daemons = rhs.standby_daemons;
out << "e" << epoch << std::endl;
out << "enable_multiple, ever_enabled_multiple: " << enable_multiple << ","
<< ever_enabled_multiple << std::endl;
- out << "compat: " << compat << std::endl;
+ out << "default compat: " << default_compat << std::endl;
out << "legacy client fscid: " << legacy_client_fscid << std::endl;
out << " " << std::endl;
fs->mds_map.data_pools.push_back(data_pool);
fs->mds_map.metadata_pool = metadata_pool;
fs->mds_map.cas_pool = -1;
- fs->mds_map.compat = compat;
+ fs->mds_map.compat = default_compat;
fs->mds_map.created = ceph_clock_now();
fs->mds_map.modified = ceph_clock_now();
fs->mds_map.enabled = true;
new_fs->mds_map.metadata_pool = fs->mds_map.metadata_pool;
new_fs->mds_map.cas_pool = fs->mds_map.cas_pool;
new_fs->mds_map.fs_name = fs->mds_map.fs_name;
- new_fs->mds_map.compat = compat;
+ new_fs->mds_map.compat = default_compat;
new_fs->mds_map.created = ceph_clock_now();
new_fs->mds_map.modified = ceph_clock_now();
new_fs->mds_map.standby_count_wanted = fs->mds_map.standby_count_wanted;
}
}
-void FSMap::update_compat(const CompatSet &c)
-{
- // We could do something more complicated here to enable
- // different filesystems to be served by different MDS versions,
- // but this is a lot simpler because it doesn't require us to
- // track the compat versions for standby daemons.
- compat = c;
- for (const auto &i : filesystems) {
- MDSMap &mds_map = i.second->mds_map;
- mds_map.compat = c;
- mds_map.epoch = epoch;
- }
-}
-
void FSMap::encode(bufferlist& bl, uint64_t features) const
{
ENCODE_START(STRUCT_VERSION, 6, bl);
encode(epoch, bl);
encode(next_filesystem_id, bl);
encode(legacy_client_fscid, bl);
- encode(compat, bl);
+ encode(default_compat, bl);
encode(enable_multiple, bl);
{
std::vector<Filesystem::ref> v;
decode(epoch, p);
decode(next_filesystem_id, p);
decode(legacy_client_fscid, p);
- decode(compat, p);
+ decode(default_compat, p);
decode(enable_multiple, p);
{
std::vector<Filesystem::ref> v;
return result;
}
-const MDSMap::mds_info_t* FSMap::get_available_standby(fs_cluster_id_t fscid) const
+const MDSMap::mds_info_t* FSMap::get_available_standby(const Filesystem& fs) const
{
+ const bool upgradeable = fs.is_upgradeable();
const mds_info_t* who = nullptr;
for (const auto& [gid, info] : standby_daemons) {
ceph_assert(info.rank == MDS_RANK_NONE);
if (info.laggy() || info.is_frozen()) {
continue;
+ } else if (!info.compat.writeable(fs.mds_map.compat)) {
+ /* standby is not compatible with this fs */
+ continue;
+ } else if (!upgradeable && !fs.mds_map.compat.writeable(info.compat)) {
+ /* promotion would change fs.mds_map.compat and we're not upgradeable */
+ continue;
}
- if (info.join_fscid == fscid) {
+ if (info.join_fscid == fs.fscid) {
who = &info;
break;
} else if (info.join_fscid == FS_CLUSTER_ID_NONE) {
/* the standby-replay is frozen, do nothing! */
return nullptr;
} else {
+ ceph_assert(info.compat.writeable(fs->mds_map.compat));
return &info;
}
}
}
- return get_available_standby(role.fscid);
+ return get_available_standby(*fs);
}
void FSMap::sanity() const
ceph_assert(filesystems.count(legacy_client_fscid) == 1);
}
- for (const auto &i : filesystems) {
- auto fs = i.second;
- ceph_assert(fs->mds_map.compat.compare(compat) == 0);
- ceph_assert(fs->fscid == i.first);
- for (const auto &j : fs->mds_map.mds_info) {
- ceph_assert(j.second.rank != MDS_RANK_NONE);
- ceph_assert(mds_roles.count(j.first) == 1);
- ceph_assert(standby_daemons.count(j.first) == 0);
- ceph_assert(standby_epochs.count(j.first) == 0);
- ceph_assert(mds_roles.at(j.first) == i.first);
- if (j.second.state != MDSMap::STATE_STANDBY_REPLAY) {
- ceph_assert(fs->mds_map.up.at(j.second.rank) == j.first);
- ceph_assert(fs->mds_map.failed.count(j.second.rank) == 0);
- ceph_assert(fs->mds_map.damaged.count(j.second.rank) == 0);
+ for (const auto& [fscid, fs] : filesystems) {
+ ceph_assert(fscid == fs->fscid);
+ for (const auto& [gid, info] : fs->mds_map.mds_info) {
+ ceph_assert(info.rank != MDS_RANK_NONE);
+ ceph_assert(mds_roles.at(gid) == fscid);
+ ceph_assert(standby_daemons.count(gid) == 0);
+ ceph_assert(standby_epochs.count(gid) == 0);
+ if (info.state != MDSMap::STATE_STANDBY_REPLAY) {
+ ceph_assert(fs->mds_map.up.at(info.rank) == gid);
+ ceph_assert(fs->mds_map.failed.count(info.rank) == 0);
+ ceph_assert(fs->mds_map.damaged.count(info.rank) == 0);
+ } else {
+ ceph_assert(fs->mds_map.allows_standby_replay());
}
+ ceph_assert(info.compat.writeable(fs->mds_map.compat));
}
for (const auto &j : fs->mds_map.up) {
standby_epochs.erase(standby_gid);
}
+ if (!filesystem.mds_map.compat.writeable(info.compat)) {
+ ceph_assert(filesystem.is_upgradeable());
+ filesystem.mds_map.compat.merge(info.compat);
+ }
+
// Indicate that Filesystem has been modified
mds_map.epoch = epoch;
}
void dump(ceph::Formatter *f) const;
void print(std::ostream& out) const;
+ bool is_upgradeable() const {
+ return !mds_map.allows_standby_replay() && mds_map.get_num_in_mds() <= 1;
+ }
+
/**
* Return true if a daemon is already assigned as
* STANDBY_REPLAY for the gid `who`
static const version_t STRUCT_VERSION = 7;
static const version_t STRUCT_VERSION_TRIM_TO = 7;
- FSMap() : compat(MDSMap::get_compat_set_default()) {}
+ FSMap() : default_compat(MDSMap::get_compat_set_default()) {}
FSMap(const FSMap &rhs)
:
epoch(rhs.epoch),
next_filesystem_id(rhs.next_filesystem_id),
legacy_client_fscid(rhs.legacy_client_fscid),
- compat(rhs.compat),
+ default_compat(rhs.default_compat),
enable_multiple(rhs.enable_multiple),
ever_enabled_multiple(rhs.ever_enabled_multiple),
mds_roles(rhs.mds_roles),
FSMap &operator=(const FSMap &rhs);
- const CompatSet &get_compat() const {return compat;}
+ const CompatSet &get_default_compat() const {return default_compat;}
void filter(const std::vector<string>& allowed)
{
*/
std::map<mds_gid_t, mds_info_t> get_mds_info() const;
- const mds_info_t* get_available_standby(fs_cluster_id_t fscid) const;
+ const mds_info_t* get_available_standby(const Filesystem& fs) const;
/**
* Resolve daemon name to GID
return filesystems.at(mds_roles.at(who))->get_standby_replay(who);
}
- /**
- * A daemon has told us it's compat, and it's too new
- * for the one we had previously. Impose the new one
- * on all filesystems.
- */
- void update_compat(const CompatSet &c);
-
Filesystem::const_ref get_legacy_filesystem()
{
if (legacy_client_fscid == FS_CLUSTER_ID_NONE) {
epoch_t epoch = 0;
uint64_t next_filesystem_id = FS_CLUSTER_ID_ANONYMOUS + 1;
fs_cluster_id_t legacy_client_fscid = FS_CLUSTER_ID_NONE;
- CompatSet compat;
+ CompatSet default_compat;
bool enable_multiple = true;
bool ever_enabled_multiple = true; // < the cluster had multiple FS enabled once
f->close_section();
f->dump_unsigned("features", mds_features);
f->dump_unsigned("flags", flags);
+ f->dump_object("compat", compat);
}
void MDSMap::mds_info_t::dump(std::ostream& o) const
if (join_fscid != FS_CLUSTER_ID_NONE) {
o << " join_fscid=" << join_fscid;
}
- o << " addr " << addrs << "]";
+ o << " addr " << addrs;
+ o << " compat ";
+ compat.printlite(o);
+ o << "]";
}
void MDSMap::mds_info_t::generate_test_instances(std::list<mds_info_t*>& ls)
void MDSMap::mds_info_t::encode_versioned(bufferlist& bl, uint64_t features) const
{
- __u8 v = 9;
+ __u8 v = 10;
if (!HAVE_FEATURE(features, SERVER_NAUTILUS)) {
v = 7;
}
if (v >= 9) {
encode(flags, bl);
}
+ if (v >= 10) {
+ encode(compat, bl);
+ }
ENCODE_FINISH(bl);
}
void MDSMap::mds_info_t::decode(bufferlist::const_iterator& bl)
{
- DECODE_START_LEGACY_COMPAT_LEN(9, 4, 4, bl);
+ DECODE_START_LEGACY_COMPAT_LEN(10, 4, 4, bl);
decode(global_id, bl);
decode(name, bl);
decode(rank, bl);
if (struct_v >= 9) {
decode(flags, bl);
}
+ if (struct_v >= 10) {
+ decode(compat, bl);
+ }
DECODE_FINISH(bl);
}
}
}
+ for (auto& p: mds_info) {
+ static const CompatSet empty;
+ auto& info = p.second;
+ if (empty.compare(info.compat) == 0) {
+ /* bootstrap old compat; mds_info_t::decode does not have access to MDSMap */
+ info.compat = compat;
+ }
+ }
+
DECODE_FINISH(p);
}
} availability_t;
struct mds_info_t {
+ enum mds_flags : uint64_t {
+ FROZEN = 1 << 0,
+ };
+
mds_info_t() = default;
bool laggy() const { return !(laggy_since == utime_t()); }
fs_cluster_id_t join_fscid = FS_CLUSTER_ID_NONE;
uint64_t mds_features = 0;
uint64_t flags = 0;
- enum mds_flags : uint64_t {
- FROZEN = 1 << 0,
- };
+ CompatSet compat;
private:
void encode_versioned(ceph::buffer::list& bl, uint64_t features) const;
void encode_unversioned(ceph::buffer::list& bl) const;
}
};
+class CompatSetHandler : public FileSystemCommandHandler
+{
+ public:
+ CompatSetHandler()
+ : FileSystemCommandHandler("fs compat")
+ {
+ }
+
+ int handle(
+ Monitor *mon,
+ FSMap &fsmap,
+ MonOpRequestRef op,
+ const cmdmap_t& cmdmap,
+ std::ostream &ss) override
+ {
+ static const std::set<std::string> subops = {"rm_incompat", "rm_compat", "add_incompat", "add_compat"};
+
+ std::string fs_name;
+ if (!cmd_getval(cmdmap, "fs_name", fs_name) || fs_name.empty()) {
+ ss << "Missing filesystem name";
+ return -EINVAL;
+ }
+ auto fs = fsmap.get_filesystem(fs_name);
+ if (fs == nullptr) {
+ ss << "Not found: '" << fs_name << "'";
+ return -ENOENT;
+ }
+
+ string subop;
+ if (!cmd_getval(cmdmap, "subop", subop) || subops.count(subop) == 0) {
+ ss << "subop `" << subop << "' not recognized. Must be one of: " << subops;
+ return -EINVAL;
+ }
+
+ int64_t feature;
+ if (!cmd_getval(cmdmap, "feature", feature) || feature <= 0) {
+ ss << "Invalid feature";
+ return -EINVAL;
+ }
+
+ if (fs->mds_map.get_num_up_mds() > 0) {
+ ss << "file system must be failed or down; use `ceph fs fail` to bring down";
+ return -EBUSY;
+ }
+
+ CompatSet cs = fs->mds_map.compat;
+ if (subop == "rm_compat") {
+ if (cs.compat.contains(feature)) {
+ ss << "removed compat feature " << feature;
+ cs.compat.remove(feature);
+ } else {
+ ss << "already removed compat feature " << feature;
+ }
+ } else if (subop == "rm_incompat") {
+ if (cs.incompat.contains(feature)) {
+ ss << "removed incompat feature " << feature;
+ cs.incompat.remove(feature);
+ } else {
+ ss << "already removed incompat feature " << feature;
+ }
+ } else if (subop == "add_compat" || subop == "add_incompat") {
+ string feature_str;
+ if (!cmd_getval(cmdmap, "feature_str", feature_str) || feature_str.empty()) {
+ ss << "adding a feature requires a feature string";
+ return -EINVAL;
+ }
+ auto f = CompatSet::Feature(feature, feature_str);
+ if (subop == "add_compat") {
+ if (cs.compat.contains(feature)) {
+ auto name = cs.compat.get_name(feature);
+ if (name == feature_str) {
+ ss << "feature already exists";
+ } else {
+ ss << "feature with differing name `" << name << "' exists";
+ return -EEXIST;
+ }
+ } else {
+ cs.compat.insert(f);
+ ss << "added compat feature " << f;
+ }
+ } else if (subop == "add_incompat") {
+ if (cs.incompat.contains(feature)) {
+ auto name = cs.incompat.get_name(feature);
+ if (name == feature_str) {
+ ss << "feature already exists";
+ } else {
+ ss << "feature with differing name `" << name << "' exists";
+ return -EEXIST;
+ }
+ } else {
+ cs.incompat.insert(f);
+ ss << "added incompat feature " << f;
+ }
+ } else ceph_assert(0);
+ } else ceph_assert(0);
+
+ auto modifyf = [cs = std::move(cs)](auto&& fs) {
+ fs->mds_map.compat = cs;
+ };
+
+ fsmap.modify_filesystem(fs->fscid, std::move(modifyf));
+ return 0;
+ }
+};
+
class RequiredClientFeaturesHandler : public FileSystemCommandHandler
{
public:
handlers.push_back(std::make_shared<SetHandler>());
handlers.push_back(std::make_shared<FailHandler>());
handlers.push_back(std::make_shared<FlagSetHandler>());
+ handlers.push_back(std::make_shared<CompatSetHandler>());
handlers.push_back(std::make_shared<RequiredClientFeaturesHandler>());
handlers.push_back(std::make_shared<AddDataPoolHandler>(paxos));
handlers.push_back(std::make_shared<RemoveDataPoolHandler>());
goto ignore;
}
- // check compat
- if (!m->get_compat().writeable(fsmap.compat)) {
- dout(1) << " mds " << m->get_orig_source()
- << " " << m->get_orig_source_addrs()
- << " can't write to fsmap " << fsmap.compat << dendl;
- goto ignore;
- }
-
// fw to leader?
if (!is_leader())
return false;
// Store health
pending_daemon_health[gid] = m->get_health();
- // boot?
+ const auto& cs = m->get_compat();
if (state == MDSMap::STATE_BOOT) {
// zap previous instance of this name?
if (g_conf()->mds_enforce_unique_name) {
mon.osdmon()->wait_for_writeable(op, new C_RetryMessage(this, op));
return false;
}
- const MDSMap::mds_info_t &existing_info =
- pending.get_info_gid(existing);
+ const auto& existing_info = pending.get_info_gid(existing);
mon.clog->info() << existing_info.human_name() << " restarted";
fail_mds_gid(pending, existing);
failed_mds = true;
new_info.mds_features = m->get_mds_features();
new_info.state = MDSMap::STATE_STANDBY;
new_info.state_seq = seq;
+ new_info.compat = cs;
if (m->get_fs().size()) {
fs_cluster_id_t fscid = FS_CLUSTER_ID_NONE;
auto f = pending.get_filesystem(m->get_fs());
beacon.stamp = mono_clock::now();
beacon.seq = seq;
- // new incompat?
- if (!pending.compat.writeable(m->get_compat())) {
- dout(10) << " fsmap " << pending.compat
- << " can't write to new mds' " << m->get_compat()
- << ", updating fsmap and killing old mds's"
- << dendl;
- pending.update_compat(m->get_compat());
- }
-
update_metadata(m->get_global_id(), m->get_sys_info());
} else {
// state update
}
const auto& info = pending.get_info_gid(gid);
+
+ // did the reported compat change? That's illegal!
+ if (cs.compare(info.compat) != 0) {
+ if (!mon.osdmon()->is_writeable()) {
+ mon.osdmon()->wait_for_writeable(op, new C_RetryMessage(this, op));
+ return false;
+ }
+ mon.clog->warn() << info.human_name() << " compat changed unexpectedly";
+ fail_mds_gid(pending, gid);
+ request_proposal(mon.osdmon());
+ return true;
+ }
+
if (info.state == MDSMap::STATE_STOPPING &&
state != MDSMap::STATE_STOPPING &&
state != MDSMap::STATE_STOPPED) {
count_metadata(field, f.get());
f->flush(ds);
r = 0;
+ } else if (prefix == "fs compat show") {
+ string fs_name;
+ cmd_getval(cmdmap, "fs_name", fs_name);
+ const auto &fs = fsmap.get_filesystem(fs_name);
+ if (fs == nullptr) {
+ ss << "filesystem '" << fs_name << "' not found";
+ r = -ENOENT;
+ goto out;
+ }
+
+ if (f) {
+ f->open_object_section("mds_compat");
+ fs->mds_map.compat.dump(f.get());
+ f->close_section();
+ f->flush(ds);
+ } else {
+ ds << fs->mds_map.compat;
+ }
+ r = 0;
} else if (prefix == "mds compat show") {
if (f) {
f->open_object_section("mds_compat");
- fsmap.compat.dump(f.get());
+ fsmap.default_compat.dump(f.get());
f->close_section();
f->flush(ds);
} else {
- ds << fsmap.compat;
+ ds << fsmap.default_compat;
}
r = 0;
} else if (prefix == "fs get") {
ss << "removed failed mds." << role;
return 0;
+ /* TODO: convert to fs commands to update defaults */
} else if (prefix == "mds compat rm_compat") {
int64_t f;
if (!cmd_getval(cmdmap, "feature", f)) {
<< cmd_vartype_stringify(cmdmap.at("feature")) << "'";
return -EINVAL;
}
- if (fsmap.compat.compat.contains(f)) {
+ if (fsmap.default_compat.compat.contains(f)) {
ss << "removing compat feature " << f;
- CompatSet modified = fsmap.compat;
- modified.compat.remove(f);
- fsmap.update_compat(modified);
+ fsmap.default_compat.compat.remove(f);
} else {
- ss << "compat feature " << f << " not present in " << fsmap.compat;
+ ss << "compat feature " << f << " not present in " << fsmap.default_compat;
}
r = 0;
} else if (prefix == "mds compat rm_incompat") {
<< cmd_vartype_stringify(cmdmap.at("feature")) << "'";
return -EINVAL;
}
- if (fsmap.compat.incompat.contains(f)) {
+ if (fsmap.default_compat.incompat.contains(f)) {
ss << "removing incompat feature " << f;
- CompatSet modified = fsmap.compat;
- modified.incompat.remove(f);
- fsmap.update_compat(modified);
+ fsmap.default_compat.incompat.remove(f);
} else {
- ss << "incompat feature " << f << " not present in " << fsmap.compat;
+ ss << "incompat feature " << f << " not present in " << fsmap.default_compat;
}
r = 0;
} else if (prefix == "mds repaired") {
const auto state = info.state;
const mds_info_t* rep_info = nullptr;
if (state == MDSMap::STATE_STANDBY_REPLAY) {
- rep_info = fsmap.get_available_standby(fscid);
+ rep_info = fsmap.get_available_standby(*fs);
} else if (state == MDSMap::STATE_ACTIVE) {
rep_info = fsmap.find_replacement_for({fscid, rank});
} else {
// as standby-replay daemons. Don't do this when the cluster is degraded
// as a standby-replay daemon may try to read a journal being migrated.
for (;;) {
- auto info = fsmap.get_available_standby(fs.fscid);
+ auto info = fsmap.get_available_standby(fs);
if (!info) break;
dout(20) << "standby available mds." << info->global_id << dendl;
bool changed = false;
"name=who,type=CephString "
"name=args,type=CephString,n=N",
"send command to particular mds", "mds", "rw", FLAG(OBSOLETE))
-COMMAND("mds compat show", "show mds compatibility settings",
- "mds", "r")
COMMAND_WITH_FLAG("mds stop name=role,type=CephString", "stop mds",
"mds", "rw", FLAG(OBSOLETE))
COMMAND_WITH_FLAG("mds deactivate name=role,type=CephString",
"remove failed rank", "mds", "rw", FLAG(HIDDEN))
COMMAND_WITH_FLAG("mds cluster_down", "take MDS cluster down", "mds", "rw", FLAG(OBSOLETE))
COMMAND_WITH_FLAG("mds cluster_up", "bring MDS cluster up", "mds", "rw", FLAG(OBSOLETE))
-COMMAND("mds compat rm_compat "
+COMMAND_WITH_FLAG("mds compat show", "show mds compatibility settings",
+ "mds", "r", FLAG(DEPRECATED))
+COMMAND("fs compat show "
+ "name=fs_name,type=CephString ",
+ "show fs compatibility settings",
+ "mds", "r")
+COMMAND_WITH_FLAG("mds compat rm_compat "
"name=feature,type=CephInt,range=0",
- "remove compatible feature", "mds", "rw")
-COMMAND("mds compat rm_incompat "
+ "remove compatible feature", "mds", "rw", FLAG(DEPRECATED))
+COMMAND_WITH_FLAG("mds compat rm_incompat "
"name=feature,type=CephInt,range=0",
- "remove incompatible feature", "mds", "rw")
+ "remove incompatible feature", "mds", "rw", FLAG(DEPRECATED))
COMMAND_WITH_FLAG("mds add_data_pool "
"name=pool,type=CephString",
"add data pool <pool>", "mds", "rw", FLAG(OBSOLETE))
"list available cephfs features to be set/unset",
"mds", "r")
+COMMAND("fs compat "
+ "name=fs_name,type=CephString "
+ "name=subop,type=CephChoices,strings=rm_compat|rm_incompat|add_compat|add_incompat "
+ "name=feature,type=CephInt "
+ "name=feature_str,type=CephString,req=false ",
+ "manipulate compat settings", "fs", "rw")
+
COMMAND("fs required_client_features "
"name=fs_name,type=CephString "
"name=subop,type=CephChoices,strings=add|rm "