From f581731eaeb0f0c625f62928bf3724f8e705132c Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 1 Sep 2017 14:45:12 -0400 Subject: [PATCH] mon/OSDMonitor: prevent pg_num from exceeding mon_pg_warn_max_per_osd Check total pg count for the cluster vs osd count and max pgs per osd before allowing pool creation, pg_num change, or pool size change. "in" OSDs are the ones we distribute data too, so this should be the right count to use. (Whether they happen to be up or down at the moment is incidental.) If the user really wants to create the pool, they can change the configurable limit. Signed-off-by: Sage Weil (cherry picked from commit 3ea2e518d27e6c06182c2cb3d9c0b9a0dab8dd22) --- PendingReleaseNotes | 11 +++++++++++ src/mon/OSDMonitor.cc | 42 ++++++++++++++++++++++++++++++++++++++++++ src/mon/OSDMonitor.h | 1 + 3 files changed, 54 insertions(+) diff --git a/PendingReleaseNotes b/PendingReleaseNotes index 9ca48cdabe968..4ab5301cc22fb 100644 --- a/PendingReleaseNotes +++ b/PendingReleaseNotes @@ -27,3 +27,14 @@ limit (5% by default). Limits by inode count are still supported using mds_cache_size. Setting mds_cache_size to 0 (the default) disables the inode limit. + +* The maximum number of PGs per OSD before the monitor issues a + warning has been reduced from 300 to 200 PGs. 200 is still twice + the generally recommended target of 100 PGs per OSD. This limit can + be adjusted via the ``mon_pg_warn_max_per_osd`` option on the + monitors. + +* Creating pools or adjusting pg_num will now fail if the change would + make the number of PGs per OSD exceed the configured + ``mon_pg_warn_max_per_osd`` limit. The option can be adjusted if it + is really necessary to create a pool with more PGs. diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 46f702f4023e4..a047b26f27ccf 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -5797,6 +5797,35 @@ int OSDMonitor::get_crush_rule(const string &rule_name, return 0; } +int OSDMonitor::check_pg_num(int64_t pool, int pg_num, int size, ostream *ss) +{ + int64_t max_pgs_per_osd = g_conf->mon_pg_warn_max_per_osd; + int64_t max_pgs = max_pgs_per_osd * osdmap.get_num_in_osds(); + int64_t projected = 0; + if (pool < 0) { + projected += pg_num * size; + } + for (const auto& i : osdmap.get_pools()) { + if (i.first == pool) { + projected += pg_num * size; + } else { + projected += i.second.get_pg_num() * i.second.get_size(); + } + } + if (projected > max_pgs) { + if (pool >= 0) { + *ss << "pool id " << pool; + } + *ss << " pg_num " << pg_num << " size " << size + << " would mean " << projected + << " total pgs, which exceeds max " << max_pgs + << " (mon_pg_warn_max_per_osd " << max_pgs_per_osd + << " * num_in_osds " << osdmap.get_num_in_osds() << ")"; + return -ERANGE; + } + return 0; +} + /** * @param name The name of the new pool * @param auid The auid of the pool owner. Can be -1 @@ -5876,6 +5905,11 @@ int OSDMonitor::prepare_new_pool(string& name, uint64_t auid, dout(10) << " prepare_pool_size returns " << r << dendl; return r; } + r = check_pg_num(-1, pg_num, size, ss); + if (r) { + dout(10) << " prepare_pool_size returns " << r << dendl; + return r; + } if (!osdmap.crush->check_crush_rule(crush_rule, pool_type, size, *ss)) { return -EINVAL; @@ -6052,6 +6086,10 @@ int OSDMonitor::prepare_command_pool_set(map &cmdmap, ss << "pool size must be between 1 and 10"; return -EINVAL; } + int r = check_pg_num(pool, p.get_pg_num(), n, &ss); + if (r < 0) { + return r; + } p.size = n; if (n < p.min_size) p.min_size = n; @@ -6121,6 +6159,10 @@ int OSDMonitor::prepare_command_pool_set(map &cmdmap, << " (you may adjust 'mon max pool pg num' for higher values)"; return -ERANGE; } + int r = check_pg_num(pool, n, p.get_size(), &ss); + if (r) { + return r; + } string force; cmd_getval(g_ceph_context,cmdmap, "force", force); if (p.cache_mode != pg_pool_t::CACHEMODE_NONE && diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h index baee6a894d1d9..40853116969fc 100644 --- a/src/mon/OSDMonitor.h +++ b/src/mon/OSDMonitor.h @@ -346,6 +346,7 @@ private: const string &erasure_code_profile, unsigned *stripe_width, ostream *ss); + int check_pg_num(int64_t pool, int pg_num, int size, ostream* ss); int prepare_new_pool(string& name, uint64_t auid, int crush_rule, const string &crush_rule_name, -- 2.39.5