PaxosService.cc
OSDMonitor.cc
MDSMonitor.cc
+ FSCommands.cc
MgrMonitor.cc
MonmapMonitor.cc
LogMonitor.cc
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2017 Red Hat Ltd
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+
+#include "OSDMonitor.h"
+#include "PGMonitor.h"
+
+#include "FSCommands.h"
+
+
+
+static const string EXPERIMENTAL_WARNING("Warning! This feature is experimental."
+"It may cause problems up to and including data loss."
+"Consult the documentation at ceph.com, and if unsure, do not proceed."
+"Add --yes-i-really-mean-it if you are certain.");
+
+
+
+class FlagSetHandler : public FileSystemCommandHandler
+{
+ public:
+ FlagSetHandler()
+ : FileSystemCommandHandler("fs flag set")
+ {
+ }
+
+ int handle(
+ Monitor *mon,
+ FSMap &fsmap,
+ MonOpRequestRef op,
+ map<string, cmd_vartype> &cmdmap,
+ std::stringstream &ss) override
+ {
+ string flag_name;
+ cmd_getval(g_ceph_context, cmdmap, "flag_name", flag_name);
+
+ string flag_val;
+ cmd_getval(g_ceph_context, cmdmap, "val", flag_val);
+
+ string confirm;
+ cmd_getval(g_ceph_context, cmdmap, "confirm", confirm);
+
+ if (flag_name == "enable_multiple") {
+ bool flag_bool = false;
+ int r = parse_bool(flag_val, &flag_bool, ss);
+ if (r != 0) {
+ ss << "Invalid boolean value '" << flag_val << "'";
+ return r;
+ }
+
+ bool jewel = mon->get_quorum_con_features() & CEPH_FEATURE_SERVER_JEWEL;
+ if (flag_bool && !jewel) {
+ ss << "Multiple-filesystems are forbidden until all mons are updated";
+ return -EINVAL;
+ }
+ if (confirm != "--yes-i-really-mean-it") {
+ ss << EXPERIMENTAL_WARNING;
+ }
+ fsmap.set_enable_multiple(flag_bool);
+ return 0;
+ } else {
+ ss << "Unknown flag '" << flag_name << "'";
+ return -EINVAL;
+ }
+ }
+};
+
+class FsNewHandler : public FileSystemCommandHandler
+{
+ public:
+ FsNewHandler()
+ : FileSystemCommandHandler("fs new")
+ {
+ }
+
+ int handle(
+ Monitor *mon,
+ FSMap &fsmap,
+ MonOpRequestRef op,
+ map<string, cmd_vartype> &cmdmap,
+ std::stringstream &ss)
+ {
+ string metadata_name;
+ cmd_getval(g_ceph_context, cmdmap, "metadata", metadata_name);
+ int64_t metadata = mon->osdmon()->osdmap.lookup_pg_pool_name(metadata_name);
+ if (metadata < 0) {
+ ss << "pool '" << metadata_name << "' does not exist";
+ return -ENOENT;
+ }
+
+ string force;
+ cmd_getval(g_ceph_context,cmdmap, "force", force);
+ int64_t metadata_num_objects = mon->pgmon()->pg_map.pg_pool_sum[metadata].stats.sum.num_objects;
+ if (force != "--force" && metadata_num_objects > 0) {
+ ss << "pool '" << metadata_name
+ << "' already contains some objects. Use an empty pool instead.";
+ return -EINVAL;
+ }
+
+ string data_name;
+ cmd_getval(g_ceph_context, cmdmap, "data", data_name);
+ int64_t data = mon->osdmon()->osdmap.lookup_pg_pool_name(data_name);
+ if (data < 0) {
+ ss << "pool '" << data_name << "' does not exist";
+ return -ENOENT;
+ }
+ if (data == 0) {
+ ss << "pool '" << data_name << "' has id 0, which CephFS does not allow. Use another pool or recreate it to get a non-zero pool id.";
+ return -EINVAL;
+ }
+
+ string fs_name;
+ cmd_getval(g_ceph_context, cmdmap, "fs_name", fs_name);
+ if (fs_name.empty()) {
+ // Ensure fs name is not empty so that we can implement
+ // commmands that refer to FS by name in future.
+ ss << "Filesystem name may not be empty";
+ return -EINVAL;
+ }
+
+ if (fsmap.get_filesystem(fs_name)) {
+ auto fs = fsmap.get_filesystem(fs_name);
+ if (*(fs->mds_map.get_data_pools().begin()) == data
+ && fs->mds_map.get_metadata_pool() == metadata) {
+ // Identical FS created already, this is a no-op
+ ss << "filesystem '" << fs_name << "' already exists";
+ return 0;
+ } else {
+ ss << "filesystem already exists with name '" << fs_name << "'";
+ return -EINVAL;
+ }
+ }
+
+ if (fsmap.filesystem_count() > 0
+ && !fsmap.get_enable_multiple()) {
+ ss << "Creation of multiple filesystems is disabled. To enable "
+ "this experimental feature, use 'ceph fs flag set enable_multiple "
+ "true'";
+ return -EINVAL;
+ }
+
+ pg_pool_t const *data_pool = mon->osdmon()->osdmap.get_pg_pool(data);
+ assert(data_pool != NULL); // Checked it existed above
+ pg_pool_t const *metadata_pool = mon->osdmon()->osdmap.get_pg_pool(metadata);
+ assert(metadata_pool != NULL); // Checked it existed above
+
+ // we must make these checks before we even allow ourselves to *think*
+ // about requesting a proposal to the osdmonitor and bail out now if
+ // we believe we must. bailing out *after* we request the proposal is
+ // bad business as we could have changed the osdmon's state and ending up
+ // returning an error to the user.
+ int r = _check_pool(mon->osdmon()->osdmap, data, false, &ss);
+ if (r < 0) {
+ return r;
+ }
+
+ r = _check_pool(mon->osdmon()->osdmap, metadata, true, &ss);
+ if (r < 0) {
+ return r;
+ }
+
+ // All checks passed, go ahead and create.
+ fsmap.create_filesystem(fs_name, metadata, data,
+ mon->get_quorum_con_features());
+ ss << "new fs with metadata pool " << metadata << " and data pool " << data;
+ return 0;
+ }
+};
+
+class SetHandler : public FileSystemCommandHandler
+{
+public:
+ SetHandler()
+ : FileSystemCommandHandler("fs set")
+ {}
+
+ int handle(
+ Monitor *mon,
+ FSMap &fsmap,
+ MonOpRequestRef op,
+ map<string, cmd_vartype> &cmdmap,
+ std::stringstream &ss) override
+ {
+ std::string fs_name;
+ if (!cmd_getval(g_ceph_context, cmdmap, "fs_name", fs_name) || fs_name.empty()) {
+ ss << "Missing filesystem name";
+ return -EINVAL;
+ }
+
+ auto fs = fsmap.get_filesystem(fs_name);
+ if (fs == nullptr) {
+ ss << "Not found: '" << fs_name << "'";
+ return -ENOENT;
+ }
+
+ string var;
+ if (!cmd_getval(g_ceph_context, cmdmap, "var", var) || var.empty()) {
+ ss << "Invalid variable";
+ return -EINVAL;
+ }
+ string val;
+ string interr;
+ int64_t n = 0;
+ if (!cmd_getval(g_ceph_context, cmdmap, "val", val)) {
+ return -EINVAL;
+ }
+ // we got a string. see if it contains an int.
+ n = strict_strtoll(val.c_str(), 10, &interr);
+ if (var == "max_mds") {
+ // NOTE: see also "mds set_max_mds", which can modify the same field.
+ if (interr.length()) {
+ ss << interr;
+ return -EINVAL;
+ }
+
+ if (n <= 0) {
+ ss << "You must specify at least one MDS";
+ return -EINVAL;
+ }
+
+ if (!fs->mds_map.allows_multimds() && n > fs->mds_map.get_max_mds() &&
+ n > 1) {
+ ss << "multi-MDS clusters are not enabled; set 'allow_multimds' to enable";
+ return -EINVAL;
+ }
+ if (n > MAX_MDS) {
+ ss << "may not have more than " << MAX_MDS << " MDS ranks";
+ return -EINVAL;
+ }
+ fsmap.modify_filesystem(
+ fs->fscid,
+ [n](std::shared_ptr<Filesystem> fs)
+ {
+ fs->mds_map.set_max_mds(n);
+ });
+ } else if (var == "inline_data") {
+ bool enable_inline = false;
+ int r = parse_bool(val, &enable_inline, ss);
+ if (r != 0) {
+ return r;
+ }
+
+ if (enable_inline) {
+ string confirm;
+ if (!cmd_getval(g_ceph_context, cmdmap, "confirm", confirm) ||
+ confirm != "--yes-i-really-mean-it") {
+ ss << EXPERIMENTAL_WARNING;
+ return -EPERM;
+ }
+ ss << "inline data enabled";
+
+ fsmap.modify_filesystem(
+ fs->fscid,
+ [](std::shared_ptr<Filesystem> fs)
+ {
+ fs->mds_map.set_inline_data_enabled(true);
+ });
+
+ // Update `compat`
+ CompatSet c = fsmap.get_compat();
+ c.incompat.insert(MDS_FEATURE_INCOMPAT_INLINE);
+ fsmap.update_compat(c);
+ } else {
+ ss << "inline data disabled";
+ fsmap.modify_filesystem(
+ fs->fscid,
+ [](std::shared_ptr<Filesystem> fs)
+ {
+ fs->mds_map.set_inline_data_enabled(false);
+ });
+ }
+ } else if (var == "balancer") {
+ ss << "setting the metadata load balancer to " << val;
+ fsmap.modify_filesystem(
+ fs->fscid,
+ [val](std::shared_ptr<Filesystem> fs)
+ {
+ fs->mds_map.set_balancer(val);
+ });
+ return true;
+ } else if (var == "max_file_size") {
+ if (interr.length()) {
+ ss << var << " requires an integer value";
+ return -EINVAL;
+ }
+ if (n < CEPH_MIN_STRIPE_UNIT) {
+ ss << var << " must at least " << CEPH_MIN_STRIPE_UNIT;
+ return -ERANGE;
+ }
+ fsmap.modify_filesystem(
+ fs->fscid,
+ [n](std::shared_ptr<Filesystem> fs)
+ {
+ fs->mds_map.set_max_filesize(n);
+ });
+ } else if (var == "allow_new_snaps") {
+ bool enable_snaps = false;
+ int r = parse_bool(val, &enable_snaps, ss);
+ if (r != 0) {
+ return r;
+ }
+
+ if (!enable_snaps) {
+ fsmap.modify_filesystem(
+ fs->fscid,
+ [](std::shared_ptr<Filesystem> fs)
+ {
+ fs->mds_map.clear_snaps_allowed();
+ });
+ ss << "disabled new snapshots";
+ } else {
+ string confirm;
+ if (!cmd_getval(g_ceph_context, cmdmap, "confirm", confirm) ||
+ confirm != "--yes-i-really-mean-it") {
+ ss << EXPERIMENTAL_WARNING;
+ return -EPERM;
+ }
+ fsmap.modify_filesystem(
+ fs->fscid,
+ [](std::shared_ptr<Filesystem> fs)
+ {
+ fs->mds_map.set_snaps_allowed();
+ });
+ ss << "enabled new snapshots";
+ }
+ } else if (var == "allow_multimds") {
+ bool enable_multimds = false;
+ int r = parse_bool(val, &enable_multimds, ss);
+ if (r != 0) {
+ return r;
+ }
+
+ if (!enable_multimds) {
+ fsmap.modify_filesystem(fs->fscid,
+ [](std::shared_ptr<Filesystem> fs)
+ {
+ fs->mds_map.clear_multimds_allowed();
+ });
+ ss << "disallowed increasing the cluster size past 1";
+ } else {
+ string confirm;
+ if (!cmd_getval(g_ceph_context, cmdmap, "confirm", confirm) ||
+ confirm != "--yes-i-really-mean-it") {
+ ss << EXPERIMENTAL_WARNING;
+ return -EPERM;
+ }
+ fsmap.modify_filesystem(
+ fs->fscid,
+ [](std::shared_ptr<Filesystem> fs)
+ {
+ fs->mds_map.set_multimds_allowed();
+ });
+ ss << "enabled creation of more than 1 active MDS";
+ }
+ } else if (var == "allow_dirfrags") {
+ bool enable_dirfrags = false;
+ int r = parse_bool(val, &enable_dirfrags, ss);
+ if (r != 0) {
+ return r;
+ }
+
+ if (!enable_dirfrags) {
+ fsmap.modify_filesystem(fs->fscid,
+ [](std::shared_ptr<Filesystem> fs)
+ {
+ fs->mds_map.clear_dirfrags_allowed();
+ });
+ ss << "disallowed new directory fragmentation";
+ } else {
+ string confirm;
+ if (!cmd_getval(g_ceph_context, cmdmap, "confirm", confirm) ||
+ confirm != "--yes-i-really-mean-it") {
+ ss << EXPERIMENTAL_WARNING;
+ return -EPERM;
+ }
+ fsmap.modify_filesystem(
+ fs->fscid,
+ [](std::shared_ptr<Filesystem> fs)
+ {
+ fs->mds_map.set_dirfrags_allowed();
+ });
+ ss << "enabled directory fragmentation";
+ }
+ } else if (var == "cluster_down") {
+ bool is_down = false;
+ int r = parse_bool(val, &is_down, ss);
+ if (r != 0) {
+ return r;
+ }
+
+ fsmap.modify_filesystem(
+ fs->fscid,
+ [is_down](std::shared_ptr<Filesystem> fs)
+ {
+ if (is_down) {
+ fs->mds_map.set_flag(CEPH_MDSMAP_DOWN);
+ } else {
+ fs->mds_map.clear_flag(CEPH_MDSMAP_DOWN);
+ }
+ });
+
+ ss << "marked " << (is_down ? "down" : "up");
+ } else {
+ ss << "unknown variable " << var;
+ return -EINVAL;
+ }
+
+ return 0;
+ }
+};
+
+class AddDataPoolHandler : public FileSystemCommandHandler
+{
+ public:
+ AddDataPoolHandler()
+ : FileSystemCommandHandler("fs add_data_pool")
+ {}
+
+ int handle(
+ Monitor *mon,
+ FSMap &fsmap,
+ MonOpRequestRef op,
+ map<string, cmd_vartype> &cmdmap,
+ std::stringstream &ss) override
+ {
+ string poolname;
+ cmd_getval(g_ceph_context, cmdmap, "pool", poolname);
+
+ std::string fs_name;
+ if (!cmd_getval(g_ceph_context, cmdmap, "fs_name", fs_name)
+ || fs_name.empty()) {
+ ss << "Missing filesystem name";
+ return -EINVAL;
+ }
+
+ auto fs = fsmap.get_filesystem(fs_name);
+ if (fs == nullptr) {
+ ss << "Not found: '" << fs_name << "'";
+ return -ENOENT;
+ }
+
+ int64_t poolid = mon->osdmon()->osdmap.lookup_pg_pool_name(poolname);
+ if (poolid < 0) {
+ string err;
+ poolid = strict_strtol(poolname.c_str(), 10, &err);
+ if (err.length()) {
+ ss << "pool '" << poolname << "' does not exist";
+ return -ENOENT;
+ }
+ }
+
+ int r = _check_pool(mon->osdmon()->osdmap, poolid, false, &ss);
+ if (r != 0) {
+ return r;
+ }
+
+ fsmap.modify_filesystem(
+ fs->fscid,
+ [poolid](std::shared_ptr<Filesystem> fs)
+ {
+ fs->mds_map.add_data_pool(poolid);
+ });
+
+ ss << "added data pool " << poolid << " to fsmap";
+
+ return 0;
+ }
+};
+
+
+class RemoveDataPoolHandler : public FileSystemCommandHandler
+{
+ public:
+ RemoveDataPoolHandler()
+ : FileSystemCommandHandler("fs rm_data_pool")
+ {}
+
+ int handle(
+ Monitor *mon,
+ FSMap &fsmap,
+ MonOpRequestRef op,
+ map<string, cmd_vartype> &cmdmap,
+ std::stringstream &ss) override
+ {
+ string poolname;
+ cmd_getval(g_ceph_context, cmdmap, "pool", poolname);
+
+ std::string fs_name;
+ if (!cmd_getval(g_ceph_context, cmdmap, "fs_name", fs_name)
+ || fs_name.empty()) {
+ ss << "Missing filesystem name";
+ return -EINVAL;
+ }
+
+ auto fs = fsmap.get_filesystem(fs_name);
+ if (fs == nullptr) {
+ ss << "Not found: '" << fs_name << "'";
+ return -ENOENT;
+ }
+
+ int64_t poolid = mon->osdmon()->osdmap.lookup_pg_pool_name(poolname);
+ if (poolid < 0) {
+ string err;
+ poolid = strict_strtol(poolname.c_str(), 10, &err);
+ if (err.length()) {
+ ss << "pool '" << poolname << "' does not exist";
+ return -ENOENT;
+ } else if (poolid < 0) {
+ ss << "invalid pool id '" << poolid << "'";
+ return -EINVAL;
+ }
+ }
+
+ assert(poolid >= 0); // Checked by parsing code above
+
+ if (fs->mds_map.get_first_data_pool() == poolid) {
+ ss << "cannot remove default data pool";
+ return -EINVAL;
+ }
+
+
+ int r = 0;
+ fsmap.modify_filesystem(fs->fscid,
+ [&r, poolid](std::shared_ptr<Filesystem> fs)
+ {
+ r = fs->mds_map.remove_data_pool(poolid);
+ });
+ if (r == -ENOENT) {
+ // It was already removed, succeed in silence
+ return 0;
+ } else if (r == 0) {
+ // We removed it, succeed
+ ss << "removed data pool " << poolid << " from fsmap";
+ return 0;
+ } else {
+ // Unexpected error, bubble up
+ return r;
+ }
+ }
+};
+
+
+/**
+ * For commands that refer to a particular filesystem,
+ * enable wrapping to implement the legacy version of
+ * the command (like "mds add_data_pool" vs "fs add_data_pool")
+ *
+ * The wrapped handler must expect a fs_name argument in
+ * its command map.
+ */
+template<typename T>
+class LegacyHandler : public T
+{
+ std::string legacy_prefix;
+
+ public:
+ LegacyHandler(const std::string &new_prefix)
+ : T()
+ {
+ legacy_prefix = new_prefix;
+ }
+
+ std::string const &get_prefix() override {return legacy_prefix;}
+
+ int handle(
+ Monitor *mon,
+ FSMap &fsmap,
+ MonOpRequestRef op,
+ map<string, cmd_vartype> &cmdmap,
+ std::stringstream &ss) override
+ {
+ auto fs = fsmap.get_legacy_filesystem();
+ if (fs == nullptr) {
+ ss << "No filesystem configured";
+ return -ENOENT;
+ }
+ std::map<string, cmd_vartype> modified = cmdmap;
+ modified["fs_name"] = fs->mds_map.get_fs_name();
+ return T::handle(mon, fsmap, op, modified, ss);
+ }
+};
+
+
+std::list<std::shared_ptr<FileSystemCommandHandler> > FileSystemCommandHandler::load()
+{
+ std::list<std::shared_ptr<FileSystemCommandHandler> > handlers;
+
+ handlers.push_back(std::make_shared<SetHandler>());
+ handlers.push_back(std::make_shared<LegacyHandler<SetHandler> >("mds set"));
+ handlers.push_back(std::make_shared<FlagSetHandler>());
+ handlers.push_back(std::make_shared<AddDataPoolHandler>());
+ handlers.push_back(std::make_shared<LegacyHandler<AddDataPoolHandler> >(
+ "mds add_data_pool"));
+ handlers.push_back(std::make_shared<RemoveDataPoolHandler>());
+ handlers.push_back(std::make_shared<LegacyHandler<RemoveDataPoolHandler> >(
+ "mds remove_data_pool"));
+ handlers.push_back(std::make_shared<LegacyHandler<RemoveDataPoolHandler> >(
+ "mds rm_data_pool"));
+ handlers.push_back(std::make_shared<FsNewHandler>());
+
+ return handlers;
+}
+
+int FileSystemCommandHandler::parse_bool(
+ const std::string &bool_str,
+ bool *result,
+ std::ostream &ss)
+{
+ assert(result != nullptr);
+
+ string interr;
+ int64_t n = strict_strtoll(bool_str.c_str(), 10, &interr);
+
+ if (bool_str == "false" || bool_str == "no"
+ || (interr.length() == 0 && n == 0)) {
+ *result = false;
+ return 0;
+ } else if (bool_str == "true" || bool_str == "yes"
+ || (interr.length() == 0 && n == 1)) {
+ *result = true;
+ return 0;
+ } else {
+ ss << "value must be false|no|0 or true|yes|1";
+ return -EINVAL;
+ }
+}
+
+int FileSystemCommandHandler::_check_pool(
+ OSDMap &osd_map,
+ const int64_t pool_id,
+ bool metadata,
+ std::stringstream *ss) const
+{
+ assert(ss != NULL);
+
+ const pg_pool_t *pool = osd_map.get_pg_pool(pool_id);
+ if (!pool) {
+ *ss << "pool id '" << pool_id << "' does not exist";
+ return -ENOENT;
+ }
+
+ const string& pool_name = osd_map.get_pool_name(pool_id);
+
+ if (pool->is_erasure() && metadata) {
+ *ss << "pool '" << pool_name << "' (id '" << pool_id << "')"
+ << " is an erasure-coded pool. Use of erasure-coded pools"
+ << " for CephFS metadata is not permitted";
+ return -EINVAL;
+ } else if (pool->is_erasure() && !pool->is_hacky_ecoverwrites()) {
+ // non-overwriteable EC pools are only acceptable with a cache tier overlay
+ if (!pool->has_tiers() || !pool->has_read_tier() || !pool->has_write_tier()) {
+ *ss << "pool '" << pool_name << "' (id '" << pool_id << "')"
+ << " is an erasure-coded pool, with no overwrite support";
+ return -EINVAL;
+ }
+
+ // That cache tier overlay must be writeback, not readonly (it's the
+ // write operations like modify+truncate we care about support for)
+ const pg_pool_t *write_tier = osd_map.get_pg_pool(
+ pool->write_tier);
+ assert(write_tier != NULL); // OSDMonitor shouldn't allow DNE tier
+ if (write_tier->cache_mode == pg_pool_t::CACHEMODE_FORWARD
+ || write_tier->cache_mode == pg_pool_t::CACHEMODE_READONLY) {
+ *ss << "EC pool '" << pool_name << "' has a write tier ("
+ << osd_map.get_pool_name(pool->write_tier)
+ << ") that is configured "
+ "to forward writes. Use a cache mode such as 'writeback' for "
+ "CephFS";
+ return -EINVAL;
+ }
+ }
+
+ if (pool->is_tier()) {
+ *ss << " pool '" << pool_name << "' (id '" << pool_id
+ << "') is already in use as a cache tier.";
+ return -EINVAL;
+ }
+
+ // Nothing special about this pool, so it is permissible
+ return 0;
+}
+
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2017 Red Hat Ltd
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+
+#ifndef FS_COMMANDS_H_
+#define FS_COMMANDS_H_
+
+#include "Monitor.h"
+
+#include "osd/OSDMap.h"
+#include "mds/FSMap.h"
+
+#include <string>
+#include <sstream>
+
+class FileSystemCommandHandler
+{
+protected:
+ std::string prefix;
+
+ /**
+ * Parse true|yes|1 style boolean string from `bool_str`
+ * `result` must be non-null.
+ * `ss` will be populated with error message on error.
+ *
+ * @return 0 on success, else -EINVAL
+ */
+ int parse_bool(
+ const std::string &bool_str,
+ bool *result,
+ std::ostream &ss);
+
+ /**
+ * Return 0 if the pool is suitable for use with CephFS, or
+ * in case of errors return a negative error code, and populate
+ * the passed stringstream with an explanation.
+ *
+ * @param metadata whether the pool will be for metadata (stricter checks)
+ */
+ int _check_pool(
+ OSDMap &osd_map,
+ const int64_t pool_id,
+ bool metadata,
+ std::stringstream *ss) const;
+
+ virtual std::string const &get_prefix() {return prefix;}
+
+public:
+ FileSystemCommandHandler(const std::string &prefix_)
+ : prefix(prefix_)
+ {}
+
+ virtual ~FileSystemCommandHandler()
+ {}
+
+ bool can_handle(std::string const &prefix_)
+ {
+ return get_prefix() == prefix_;
+ }
+
+ static std::list<std::shared_ptr<FileSystemCommandHandler> > load();
+
+ virtual int handle(
+ Monitor *mon,
+ FSMap &fsmap,
+ MonOpRequestRef op,
+ map<string, cmd_vartype> &cmdmap,
+ std::stringstream &ss) = 0;
+};
+
+#endif
#include <boost/utility.hpp>
#include "MDSMonitor.h"
+#include "FSCommands.h"
#include "Monitor.h"
#include "MonitorDBStore.h"
#include "OSDMonitor.h"
return cmd_getval(cct, cmdmap, k, (int64_t&)val);
}
-static const string EXPERIMENTAL_WARNING("Warning! This feature is experimental."
-"It may cause problems up to and including data loss."
-"Consult the documentation at ceph.com, and if unsure, do not proceed."
-"Add --yes-i-really-mean-it if you are certain.");
-
static const string MDS_METADATA_PREFIX("mds_metadata");
f->dump_unsigned("mdsmap_last_committed", get_last_committed());
}
-class FileSystemCommandHandler
-{
-protected:
- std::string prefix;
-
- /**
- * Parse true|yes|1 style boolean string from `bool_str`
- * `result` must be non-null.
- * `ss` will be populated with error message on error.
- *
- * @return 0 on success, else -EINVAL
- */
- int parse_bool(
- const std::string &bool_str,
- bool *result,
- std::ostream &ss)
- {
- assert(result != nullptr);
-
- string interr;
- int64_t n = strict_strtoll(bool_str.c_str(), 10, &interr);
-
- if (bool_str == "false" || bool_str == "no"
- || (interr.length() == 0 && n == 0)) {
- *result = false;
- return 0;
- } else if (bool_str == "true" || bool_str == "yes"
- || (interr.length() == 0 && n == 1)) {
- *result = true;
- return 0;
- } else {
- ss << "value must be false|no|0 or true|yes|1";
- return -EINVAL;
- }
- }
-
- /**
- * Return 0 if the pool is suitable for use with CephFS, or
- * in case of errors return a negative error code, and populate
- * the passed stringstream with an explanation.
- */
- int _check_pool(
- OSDMap &osd_map,
- const int64_t pool_id,
- std::stringstream *ss) const
- {
- assert(ss != NULL);
-
- const pg_pool_t *pool = osd_map.get_pg_pool(pool_id);
- if (!pool) {
- *ss << "pool id '" << pool_id << "' does not exist";
- return -ENOENT;
- }
-
- const string& pool_name = osd_map.get_pool_name(pool_id);
-
- if (pool->is_erasure()) {
- // EC pools are only acceptable with a cache tier overlay
- if (!pool->has_tiers() || !pool->has_read_tier() || !pool->has_write_tier()) {
- *ss << "pool '" << pool_name << "' (id '" << pool_id << "')"
- << " is an erasure-code pool";
- return -EINVAL;
- }
-
- // That cache tier overlay must be writeback, not readonly (it's the
- // write operations like modify+truncate we care about support for)
- const pg_pool_t *write_tier = osd_map.get_pg_pool(
- pool->write_tier);
- assert(write_tier != NULL); // OSDMonitor shouldn't allow DNE tier
- if (write_tier->cache_mode == pg_pool_t::CACHEMODE_FORWARD
- || write_tier->cache_mode == pg_pool_t::CACHEMODE_READONLY) {
- *ss << "EC pool '" << pool_name << "' has a write tier ("
- << osd_map.get_pool_name(pool->write_tier)
- << ") that is configured "
- "to forward writes. Use a cache mode such as 'writeback' for "
- "CephFS";
- return -EINVAL;
- }
- }
-
- if (pool->is_tier()) {
- *ss << " pool '" << pool_name << "' (id '" << pool_id
- << "') is already in use as a cache tier.";
- return -EINVAL;
- }
-
- // Nothing special about this pool, so it is permissible
- return 0;
- }
-
- virtual std::string const &get_prefix() {return prefix;}
-
-public:
- FileSystemCommandHandler(const std::string &prefix_)
- : prefix(prefix_)
- {
-
- }
- virtual ~FileSystemCommandHandler()
- {
- }
-
- bool can_handle(std::string const &prefix_)
- {
- return get_prefix() == prefix_;
- }
-
- virtual int handle(
- Monitor *mon,
- FSMap &fsmap,
- MonOpRequestRef op,
- map<string, cmd_vartype> &cmdmap,
- std::stringstream &ss) = 0;
-};
-
bool MDSMonitor::preprocess_command(MonOpRequestRef op)
{
op->mark_mdsmon_event(__func__);
}
}
-
-class FlagSetHandler : public FileSystemCommandHandler
-{
- public:
- FlagSetHandler()
- : FileSystemCommandHandler("fs flag set")
- {
- }
-
- int handle(
- Monitor *mon,
- FSMap &fsmap,
- MonOpRequestRef op,
- map<string, cmd_vartype> &cmdmap,
- std::stringstream &ss) override
- {
- string flag_name;
- cmd_getval(g_ceph_context, cmdmap, "flag_name", flag_name);
-
- string flag_val;
- cmd_getval(g_ceph_context, cmdmap, "val", flag_val);
-
- string confirm;
- cmd_getval(g_ceph_context, cmdmap, "confirm", confirm);
-
- if (flag_name == "enable_multiple") {
- bool flag_bool = false;
- int r = parse_bool(flag_val, &flag_bool, ss);
- if (r != 0) {
- ss << "Invalid boolean value '" << flag_val << "'";
- return r;
- }
-
- bool jewel = mon->get_quorum_con_features() & CEPH_FEATURE_SERVER_JEWEL;
- if (flag_bool && !jewel) {
- ss << "Multiple-filesystems are forbidden until all mons are updated";
- return -EINVAL;
- }
- if (confirm != "--yes-i-really-mean-it") {
- ss << EXPERIMENTAL_WARNING;
- }
- fsmap.set_enable_multiple(flag_bool);
- return 0;
- } else {
- ss << "Unknown flag '" << flag_name << "'";
- return -EINVAL;
- }
- }
-};
-
-class FsNewHandler : public FileSystemCommandHandler
-{
- public:
- FsNewHandler()
- : FileSystemCommandHandler("fs new")
- {
- }
-
- int handle(
- Monitor *mon,
- FSMap &fsmap,
- MonOpRequestRef op,
- map<string, cmd_vartype> &cmdmap,
- std::stringstream &ss)
- {
- string metadata_name;
- cmd_getval(g_ceph_context, cmdmap, "metadata", metadata_name);
- int64_t metadata = mon->osdmon()->osdmap.lookup_pg_pool_name(metadata_name);
- if (metadata < 0) {
- ss << "pool '" << metadata_name << "' does not exist";
- return -ENOENT;
- }
-
- string data_name;
- cmd_getval(g_ceph_context, cmdmap, "data", data_name);
- int64_t data = mon->osdmon()->osdmap.lookup_pg_pool_name(data_name);
- if (data < 0) {
- ss << "pool '" << data_name << "' does not exist";
- return -ENOENT;
- } else if (data == 0) {
- ss << "pool '" << data_name << "' has id 0, which CephFS does not allow. Use another pool or recreate it to get a non-zero pool id.";
- return -EINVAL;
- }
-
- string fs_name;
- cmd_getval(g_ceph_context, cmdmap, "fs_name", fs_name);
- if (fs_name.empty()) {
- // Ensure fs name is not empty so that we can implement
- // commmands that refer to FS by name in future.
- ss << "Filesystem name may not be empty";
- return -EINVAL;
- } else if (fsmap.get_filesystem(fs_name)) {
- auto fs = fsmap.get_filesystem(fs_name);
- if (*(fs->mds_map.get_data_pools().begin()) == data
- && fs->mds_map.get_metadata_pool() == metadata) {
- // Identical FS created already, this is a no-op
- ss << "filesystem '" << fs_name << "' already exists";
- return 0;
- } else {
- ss << "filesystem already exists with name '" << fs_name << "'";
- return -EEXIST;
- }
- }
-
- string force;
- cmd_getval(g_ceph_context,cmdmap, "force", force);
- int64_t metadata_num_objects = mon->pgmon()->pg_map.pg_pool_sum[metadata].stats.sum.num_objects;
- if (force != "--force" && metadata_num_objects > 0) {
- ss << "pool '" << metadata_name
- << "' already contains some objects. Use an empty pool instead.";
- return -EINVAL;
- }
-
- if (fsmap.filesystem_count() > 0
- && !fsmap.get_enable_multiple()) {
- ss << "Creation of multiple filesystems is disabled. To enable "
- "this experimental feature, use 'ceph fs flag set enable_multiple "
- "true'";
- return -EINVAL;
- }
-
- pg_pool_t const *data_pool = mon->osdmon()->osdmap.get_pg_pool(data);
- assert(data_pool != NULL); // Checked it existed above
- pg_pool_t const *metadata_pool = mon->osdmon()->osdmap.get_pg_pool(metadata);
- assert(metadata_pool != NULL); // Checked it existed above
-
- // we must make these checks before we even allow ourselves to *think*
- // about requesting a proposal to the osdmonitor and bail out now if
- // we believe we must. bailing out *after* we request the proposal is
- // bad business as we could have changed the osdmon's state and ending up
- // returning an error to the user.
- int r = _check_pool(mon->osdmon()->osdmap, data, &ss);
- if (r < 0) {
- return r;
- }
-
- r = _check_pool(mon->osdmon()->osdmap, metadata, &ss);
- if (r < 0) {
- return r;
- }
-
- // All checks passed, go ahead and create.
- fsmap.create_filesystem(fs_name, metadata, data,
- mon->get_quorum_con_features());
- ss << "new fs with metadata pool " << metadata << " and data pool " << data;
- return 0;
- }
-};
-
-
/**
* Handle a command for creating or removing a filesystem.
*
return relevant_fsmap->parse_role(role_str, role, ss);
}
-
-class SetHandler : public FileSystemCommandHandler
-{
-public:
- SetHandler()
- : FileSystemCommandHandler("fs set")
- {}
-
- int handle(
- Monitor *mon,
- FSMap &fsmap,
- MonOpRequestRef op,
- map<string, cmd_vartype> &cmdmap,
- std::stringstream &ss) override
- {
- std::string fs_name;
- if (!cmd_getval(g_ceph_context, cmdmap, "fs_name", fs_name) || fs_name.empty()) {
- ss << "Missing filesystem name";
- return -EINVAL;
- }
-
- auto fs = fsmap.get_filesystem(fs_name);
- if (fs == nullptr) {
- ss << "Not found: '" << fs_name << "'";
- return -ENOENT;
- }
-
- string var;
- if (!cmd_getval(g_ceph_context, cmdmap, "var", var) || var.empty()) {
- ss << "Invalid variable";
- return -EINVAL;
- }
- string val;
- string interr;
- int64_t n = 0;
- if (!cmd_getval(g_ceph_context, cmdmap, "val", val)) {
- return -EINVAL;
- }
- // we got a string. see if it contains an int.
- n = strict_strtoll(val.c_str(), 10, &interr);
- if (var == "max_mds") {
- // NOTE: see also "mds set_max_mds", which can modify the same field.
- if (interr.length()) {
- ss << interr;
- return -EINVAL;
- }
-
- if (n <= 0) {
- ss << "You must specify at least one MDS";
- return -EINVAL;
- }
-
- if (!fs->mds_map.allows_multimds() && n > fs->mds_map.get_max_mds() &&
- n > 1) {
- ss << "multi-MDS clusters are not enabled; set 'allow_multimds' to enable";
- return -EINVAL;
- }
- if (n > MAX_MDS) {
- ss << "may not have more than " << MAX_MDS << " MDS ranks";
- return -EINVAL;
- }
- fsmap.modify_filesystem(
- fs->fscid,
- [n](std::shared_ptr<Filesystem> fs)
- {
- fs->mds_map.set_max_mds(n);
- });
- } else if (var == "inline_data") {
- bool enable_inline = false;
- int r = parse_bool(val, &enable_inline, ss);
- if (r != 0) {
- return r;
- }
-
- if (enable_inline) {
- string confirm;
- if (!cmd_getval(g_ceph_context, cmdmap, "confirm", confirm) ||
- confirm != "--yes-i-really-mean-it") {
- ss << EXPERIMENTAL_WARNING;
- return -EPERM;
- }
- ss << "inline data enabled";
-
- fsmap.modify_filesystem(
- fs->fscid,
- [](std::shared_ptr<Filesystem> fs)
- {
- fs->mds_map.set_inline_data_enabled(true);
- });
-
- // Update `compat`
- CompatSet c = fsmap.get_compat();
- c.incompat.insert(MDS_FEATURE_INCOMPAT_INLINE);
- fsmap.update_compat(c);
- } else {
- ss << "inline data disabled";
- fsmap.modify_filesystem(
- fs->fscid,
- [](std::shared_ptr<Filesystem> fs)
- {
- fs->mds_map.set_inline_data_enabled(false);
- });
- }
- } else if (var == "balancer") {
- ss << "setting the metadata load balancer to " << val;
- fsmap.modify_filesystem(
- fs->fscid,
- [val](std::shared_ptr<Filesystem> fs)
- {
- fs->mds_map.set_balancer(val);
- });
- return true;
- } else if (var == "max_file_size") {
- if (interr.length()) {
- ss << var << " requires an integer value";
- return -EINVAL;
- }
- if (n < CEPH_MIN_STRIPE_UNIT) {
- ss << var << " must at least " << CEPH_MIN_STRIPE_UNIT;
- return -ERANGE;
- }
- fsmap.modify_filesystem(
- fs->fscid,
- [n](std::shared_ptr<Filesystem> fs)
- {
- fs->mds_map.set_max_filesize(n);
- });
- } else if (var == "allow_new_snaps") {
- bool enable_snaps = false;
- int r = parse_bool(val, &enable_snaps, ss);
- if (r != 0) {
- return r;
- }
-
- if (!enable_snaps) {
- fsmap.modify_filesystem(
- fs->fscid,
- [](std::shared_ptr<Filesystem> fs)
- {
- fs->mds_map.clear_snaps_allowed();
- });
- ss << "disabled new snapshots";
- } else {
- string confirm;
- if (!cmd_getval(g_ceph_context, cmdmap, "confirm", confirm) ||
- confirm != "--yes-i-really-mean-it") {
- ss << EXPERIMENTAL_WARNING;
- return -EPERM;
- }
- fsmap.modify_filesystem(
- fs->fscid,
- [](std::shared_ptr<Filesystem> fs)
- {
- fs->mds_map.set_snaps_allowed();
- });
- ss << "enabled new snapshots";
- }
- } else if (var == "allow_multimds") {
- bool enable_multimds = false;
- int r = parse_bool(val, &enable_multimds, ss);
- if (r != 0) {
- return r;
- }
-
- if (!enable_multimds) {
- fsmap.modify_filesystem(fs->fscid,
- [](std::shared_ptr<Filesystem> fs)
- {
- fs->mds_map.clear_multimds_allowed();
- });
- ss << "disallowed increasing the cluster size past 1";
- } else {
- string confirm;
- if (!cmd_getval(g_ceph_context, cmdmap, "confirm", confirm) ||
- confirm != "--yes-i-really-mean-it") {
- ss << EXPERIMENTAL_WARNING;
- return -EPERM;
- }
- fsmap.modify_filesystem(
- fs->fscid,
- [](std::shared_ptr<Filesystem> fs)
- {
- fs->mds_map.set_multimds_allowed();
- });
- ss << "enabled creation of more than 1 active MDS";
- }
- } else if (var == "allow_dirfrags") {
- bool enable_dirfrags = false;
- int r = parse_bool(val, &enable_dirfrags, ss);
- if (r != 0) {
- return r;
- }
-
- if (!enable_dirfrags) {
- fsmap.modify_filesystem(fs->fscid,
- [](std::shared_ptr<Filesystem> fs)
- {
- fs->mds_map.clear_dirfrags_allowed();
- });
- ss << "disallowed new directory fragmentation";
- } else {
- string confirm;
- if (!cmd_getval(g_ceph_context, cmdmap, "confirm", confirm) ||
- confirm != "--yes-i-really-mean-it") {
- ss << EXPERIMENTAL_WARNING;
- return -EPERM;
- }
- fsmap.modify_filesystem(
- fs->fscid,
- [](std::shared_ptr<Filesystem> fs)
- {
- fs->mds_map.set_dirfrags_allowed();
- });
- ss << "enabled directory fragmentation";
- }
- } else if (var == "cluster_down") {
- bool is_down = false;
- int r = parse_bool(val, &is_down, ss);
- if (r != 0) {
- return r;
- }
-
- fsmap.modify_filesystem(
- fs->fscid,
- [is_down](std::shared_ptr<Filesystem> fs)
- {
- if (is_down) {
- fs->mds_map.set_flag(CEPH_MDSMAP_DOWN);
- } else {
- fs->mds_map.clear_flag(CEPH_MDSMAP_DOWN);
- }
- });
-
- ss << "marked " << (is_down ? "down" : "up");
- } else {
- ss << "unknown variable " << var;
- return -EINVAL;
- }
-
- return 0;
- }
-};
-
-class AddDataPoolHandler : public FileSystemCommandHandler
-{
- public:
- AddDataPoolHandler()
- : FileSystemCommandHandler("fs add_data_pool")
- {}
-
- int handle(
- Monitor *mon,
- FSMap &fsmap,
- MonOpRequestRef op,
- map<string, cmd_vartype> &cmdmap,
- std::stringstream &ss) override
- {
- string poolname;
- cmd_getval(g_ceph_context, cmdmap, "pool", poolname);
-
- std::string fs_name;
- if (!cmd_getval(g_ceph_context, cmdmap, "fs_name", fs_name)
- || fs_name.empty()) {
- ss << "Missing filesystem name";
- return -EINVAL;
- }
-
- auto fs = fsmap.get_filesystem(fs_name);
- if (fs == nullptr) {
- ss << "Not found: '" << fs_name << "'";
- return -ENOENT;
- }
-
- int64_t poolid = mon->osdmon()->osdmap.lookup_pg_pool_name(poolname);
- if (poolid < 0) {
- string err;
- poolid = strict_strtol(poolname.c_str(), 10, &err);
- if (err.length()) {
- ss << "pool '" << poolname << "' does not exist";
- return -ENOENT;
- }
- }
-
- int r = _check_pool(mon->osdmon()->osdmap, poolid, &ss);
- if (r != 0) {
- return r;
- }
-
- fsmap.modify_filesystem(
- fs->fscid,
- [poolid](std::shared_ptr<Filesystem> fs)
- {
- fs->mds_map.add_data_pool(poolid);
- });
-
- ss << "added data pool " << poolid << " to fsmap";
-
- return 0;
- }
-};
-
-
-class RemoveDataPoolHandler : public FileSystemCommandHandler
-{
- public:
- RemoveDataPoolHandler()
- : FileSystemCommandHandler("fs rm_data_pool")
- {}
-
- int handle(
- Monitor *mon,
- FSMap &fsmap,
- MonOpRequestRef op,
- map<string, cmd_vartype> &cmdmap,
- std::stringstream &ss) override
- {
- string poolname;
- cmd_getval(g_ceph_context, cmdmap, "pool", poolname);
-
- std::string fs_name;
- if (!cmd_getval(g_ceph_context, cmdmap, "fs_name", fs_name)
- || fs_name.empty()) {
- ss << "Missing filesystem name";
- return -EINVAL;
- }
-
- auto fs = fsmap.get_filesystem(fs_name);
- if (fs == nullptr) {
- ss << "Not found: '" << fs_name << "'";
- return -ENOENT;
- }
-
- int64_t poolid = mon->osdmon()->osdmap.lookup_pg_pool_name(poolname);
- if (poolid < 0) {
- string err;
- poolid = strict_strtol(poolname.c_str(), 10, &err);
- if (err.length()) {
- ss << "pool '" << poolname << "' does not exist";
- return -ENOENT;
- } else if (poolid < 0) {
- ss << "invalid pool id '" << poolid << "'";
- return -EINVAL;
- }
- }
-
- assert(poolid >= 0); // Checked by parsing code above
-
- if (fs->mds_map.get_first_data_pool() == poolid) {
- ss << "cannot remove default data pool";
- return -EINVAL;
- }
-
-
- int r = 0;
- fsmap.modify_filesystem(fs->fscid,
- [&r, poolid](std::shared_ptr<Filesystem> fs)
- {
- r = fs->mds_map.remove_data_pool(poolid);
- });
- if (r == -ENOENT) {
- // It was already removed, succeed in silence
- return 0;
- } else if (r == 0) {
- // We removed it, succeed
- ss << "removed data pool " << poolid << " from fsmap";
- return 0;
- } else {
- // Unexpected error, bubble up
- return r;
- }
- }
-};
-
-
-/**
- * For commands that refer to a particular filesystem,
- * enable wrapping to implement the legacy version of
- * the command (like "mds add_data_pool" vs "fs add_data_pool")
- *
- * The wrapped handler must expect a fs_name argument in
- * its command map.
- */
-template<typename T>
-class LegacyHandler : public T
-{
- std::string legacy_prefix;
-
- public:
- LegacyHandler(const std::string &new_prefix)
- : T()
- {
- legacy_prefix = new_prefix;
- }
-
- std::string const &get_prefix() override {return legacy_prefix;}
-
- int handle(
- Monitor *mon,
- FSMap &fsmap,
- MonOpRequestRef op,
- map<string, cmd_vartype> &cmdmap,
- std::stringstream &ss) override
- {
- auto fs = fsmap.get_legacy_filesystem();
- if (fs == nullptr) {
- ss << "No filesystem configured";
- return -ENOENT;
- }
- std::map<string, cmd_vartype> modified = cmdmap;
- modified["fs_name"] = fs->mds_map.get_fs_name();
- return T::handle(mon, fsmap, op, modified, ss);
- }
-};
-
int MDSMonitor::filesystem_command(
MonOpRequestRef op,
std::string const &prefix,
MDSMonitor::MDSMonitor(Monitor *mn, Paxos *p, string service_name)
: PaxosService(mn, p, service_name)
{
- handlers.push_back(std::make_shared<SetHandler>());
- handlers.push_back(std::make_shared<LegacyHandler<SetHandler> >("mds set"));
- handlers.push_back(std::make_shared<FlagSetHandler>());
- handlers.push_back(std::make_shared<AddDataPoolHandler>());
- handlers.push_back(std::make_shared<LegacyHandler<AddDataPoolHandler> >(
- "mds add_data_pool"));
- handlers.push_back(std::make_shared<RemoveDataPoolHandler>());
- handlers.push_back(std::make_shared<LegacyHandler<RemoveDataPoolHandler> >(
- "mds remove_data_pool"));
- handlers.push_back(std::make_shared<LegacyHandler<RemoveDataPoolHandler> >(
- "mds rm_data_pool"));
- handlers.push_back(std::make_shared<FsNewHandler>());
+ handlers = FileSystemCommandHandler::load();
}
void MDSMonitor::on_restart()