From: Patrick Donnelly Date: Thu, 7 Nov 2019 22:12:03 +0000 (-0800) Subject: mon/MDSMonitor: warn when creating fs with default EC data pool X-Git-Tag: v14.2.8~64^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=1ee9f2cdf8eef7180061a3a6c87ceb75a838d8f8;p=ceph.git mon/MDSMonitor: warn when creating fs with default EC data pool Rationale can be found in [1]. Point is that EC pools incur a significant performance penalty when dealing with small files and xattr updates. This is because _every_ inode has a corresponding data pool object with backtrace information stored in its xattr. [1] doc/cephfs/createfs.rst Fixes: https://tracker.ceph.com/issues/42450 Signed-off-by: Patrick Donnelly (cherry picked from commit 3e0aee5ed68b7b4018fb03d5160f3f19da0f6458) --- diff --git a/src/mon/FSCommands.cc b/src/mon/FSCommands.cc index 0cb0f9a994f8..d45afb840724 100644 --- a/src/mon/FSCommands.cc +++ b/src/mon/FSCommands.cc @@ -238,12 +238,12 @@ class FsNewHandler : public FileSystemCommandHandler pg_pool_t const *metadata_pool = mon->osdmon()->osdmap.get_pg_pool(metadata); ceph_assert(metadata_pool != NULL); // Checked it existed above - int r = _check_pool(mon->osdmon()->osdmap, data, false, force, &ss); + int r = _check_pool(mon->osdmon()->osdmap, data, POOL_DATA_DEFAULT, force, &ss); if (r < 0) { return r; } - r = _check_pool(mon->osdmon()->osdmap, metadata, true, force, &ss); + r = _check_pool(mon->osdmon()->osdmap, metadata, POOL_METADATA, force, &ss); if (r < 0) { return r; } @@ -679,7 +679,7 @@ class AddDataPoolHandler : public FileSystemCommandHandler } } - int r = _check_pool(mon->osdmon()->osdmap, poolid, false, false, &ss); + int r = _check_pool(mon->osdmon()->osdmap, poolid, POOL_DATA_EXTRA, false, &ss); if (r != 0) { return r; } @@ -986,7 +986,7 @@ FileSystemCommandHandler::load(Paxos *paxos) int FileSystemCommandHandler::_check_pool( OSDMap &osd_map, const int64_t pool_id, - bool metadata, + int type, bool force, std::stringstream *ss) const { @@ -1000,32 +1000,41 @@ int FileSystemCommandHandler::_check_pool( const string& pool_name = osd_map.get_pool_name(pool_id); - if (pool->is_erasure() && metadata) { + if (pool->is_erasure()) { + if (type == POOL_METADATA) { *ss << "pool '" << pool_name << "' (id '" << pool_id << "')" << " is an erasure-coded pool. Use of erasure-coded pools" << " for CephFS metadata is not permitted"; - return -EINVAL; - } else if (pool->is_erasure() && !pool->allows_ecoverwrites()) { - // non-overwriteable EC pools are only acceptable with a cache tier overlay - if (!pool->has_tiers() || !pool->has_read_tier() || !pool->has_write_tier()) { + return -EINVAL; + } else if (type == POOL_DATA_DEFAULT && !force) { *ss << "pool '" << pool_name << "' (id '" << pool_id << "')" - << " is an erasure-coded pool, with no overwrite support"; + " is an erasure-coded pool." + " Use of an EC pool for the default data pool is discouraged;" + " see the online CephFS documentation for more information." + " Use --force to override."; return -EINVAL; - } + } else if (!pool->allows_ecoverwrites()) { + // non-overwriteable EC pools are only acceptable with a cache tier overlay + if (!pool->has_tiers() || !pool->has_read_tier() || !pool->has_write_tier()) { + *ss << "pool '" << pool_name << "' (id '" << pool_id << "')" + << " is an erasure-coded pool, with no overwrite support"; + return -EINVAL; + } - // That cache tier overlay must be writeback, not readonly (it's the - // write operations like modify+truncate we care about support for) - const pg_pool_t *write_tier = osd_map.get_pg_pool( - pool->write_tier); - ceph_assert(write_tier != NULL); // OSDMonitor shouldn't allow DNE tier - if (write_tier->cache_mode == pg_pool_t::CACHEMODE_FORWARD - || write_tier->cache_mode == pg_pool_t::CACHEMODE_READONLY) { - *ss << "EC pool '" << pool_name << "' has a write tier (" - << osd_map.get_pool_name(pool->write_tier) - << ") that is configured " - "to forward writes. Use a cache mode such as 'writeback' for " - "CephFS"; - return -EINVAL; + // That cache tier overlay must be writeback, not readonly (it's the + // write operations like modify+truncate we care about support for) + const pg_pool_t *write_tier = osd_map.get_pg_pool( + pool->write_tier); + ceph_assert(write_tier != NULL); // OSDMonitor shouldn't allow DNE tier + if (write_tier->cache_mode == pg_pool_t::CACHEMODE_FORWARD + || write_tier->cache_mode == pg_pool_t::CACHEMODE_READONLY) { + *ss << "EC pool '" << pool_name << "' has a write tier (" + << osd_map.get_pool_name(pool->write_tier) + << ") that is configured " + "to forward writes. Use a cache mode such as 'writeback' for " + "CephFS"; + return -EINVAL; + } } } diff --git a/src/mon/FSCommands.h b/src/mon/FSCommands.h index 69662d2b29e5..66e0286a465f 100644 --- a/src/mon/FSCommands.h +++ b/src/mon/FSCommands.h @@ -30,6 +30,11 @@ class FileSystemCommandHandler : protected CommandHandler protected: std::string prefix; + enum { + POOL_METADATA, + POOL_DATA_DEFAULT, + POOL_DATA_EXTRA, + }; /** * Return 0 if the pool is suitable for use with CephFS, or * in case of errors return a negative error code, and populate @@ -40,7 +45,7 @@ protected: int _check_pool( OSDMap &osd_map, const int64_t pool_id, - bool metadata, + int type, bool force, std::stringstream *ss) const;