Taking the cluster down rapidly for deletion or disaster recovery
-----------------------------------------------------------------
-To allow rapidly deleting a file system (for testing) or to quickly bring MDS
-daemons down, the operator may also set a flag to prevent standbys from
-activating on the file system. This is done using the ``joinable`` flag:
+To allow rapidly deleting a file system (for testing) or to quickly bring the
+file system and MDS daemons down, use the ``fs fail`` command:
+
+::
+
+ fs fail <fs_name>
+
+This command sets a file system flag to prevent standbys from
+activating on the file system (the ``joinable`` flag).
+
+This process can also be done manually by doing the following:
::
Once all ranks are inactive, the file system may also be deleted or left in
this state for other purposes (perhaps disaster recovery).
+To bring the cluster back up, simply set the joinable flag:
+
+::
+
+ fs set <fs_name> joinable true
+
Daemons
-------
}
};
+class FailHandler : public FileSystemCommandHandler
+{
+ public:
+ FailHandler()
+ : FileSystemCommandHandler("fs fail")
+ {
+ }
+
+ int handle(
+ Monitor* mon,
+ FSMap& fsmap,
+ MonOpRequestRef op,
+ const cmdmap_t& cmdmap,
+ std::stringstream& ss) override
+ {
+ if (!mon->osdmon()->is_writeable()) {
+ // not allowed to write yet, so retry when we can
+ mon->osdmon()->wait_for_writeable(op, new PaxosService::C_RetryMessage(mon->mdsmon(), op));
+ return -EAGAIN;
+ }
+
+ std::string fs_name;
+ if (!cmd_getval(g_ceph_context, cmdmap, "fs_name", fs_name) || fs_name.empty()) {
+ ss << "Missing filesystem name";
+ return -EINVAL;
+ }
+
+ auto fs = fsmap.get_filesystem(fs_name);
+ if (fs == nullptr) {
+ ss << "Not found: '" << fs_name << "'";
+ return -ENOENT;
+ }
+
+ auto f = [](auto fs) {
+ fs->mds_map.set_flag(CEPH_MDSMAP_NOT_JOINABLE);
+ };
+ fsmap.modify_filesystem(fs->fscid, std::move(f));
+
+ std::vector<mds_gid_t> to_fail;
+ for (const auto& p : fs->mds_map.get_mds_info()) {
+ to_fail.push_back(p.first);
+ }
+
+ for (const auto& gid : to_fail) {
+ mon->mdsmon()->fail_mds_gid(fsmap, gid);
+ }
+ mon->osdmon()->propose_pending();
+
+ ss << fs_name;
+ ss << " marked not joinable; MDS cannot join the cluster. All MDS ranks marked failed.";
+
+ return 0;
+ }
+};
+
class FsNewHandler : public FileSystemCommandHandler
{
public:
// Check that no MDS daemons are active
if (fs->mds_map.get_num_up_mds() > 0) {
- ss << "all MDS daemons must be inactive before removing filesystem";
+ ss << "all MDS daemons must be inactive/failed before removing filesystem. See `ceph fs fail`.";
return -EINVAL;
}
std::list<std::shared_ptr<FileSystemCommandHandler> > handlers;
handlers.push_back(std::make_shared<SetHandler>());
+ handlers.push_back(std::make_shared<FailHandler>());
handlers.push_back(std::make_shared<FlagSetHandler>());
handlers.push_back(std::make_shared<AddDataPoolHandler>(paxos));
handlers.push_back(std::make_shared<RemoveDataPoolHandler>());
"name=allow_dangerous_metadata_overlay,type=CephBool,req=false", \
"make new filesystem using named pools <metadata> and <data>", \
"fs", "rw")
+COMMAND("fs fail " \
+ "name=fs_name,type=CephString ", \
+ "bring the file system down and all of its ranks", \
+ "fs", "rw")
COMMAND("fs rm " \
"name=fs_name,type=CephString " \
"name=yes_i_really_mean_it,type=CephBool,req=false", \