From 9d5d931c60104823b3b20dcfb09480d65ffaa5ed Mon Sep 17 00:00:00 2001 From: Greg Farnum Date: Tue, 3 Dec 2013 10:57:09 -0800 Subject: [PATCH] OSDMonitor: use a different approach to prevent extreme multipliers on PG splits Signed-off-by: Greg Farnum Reviewed-by: Sage Weil (cherry picked from commit d8ccd73968fbd0753ca08916ebf1062cdb4d5ac1) Conflicts: src/mon/OSDMonitor.cc --- src/common/config_opts.h | 2 +- src/mon/OSDMonitor.cc | 11 ++++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/common/config_opts.h b/src/common/config_opts.h index f8333819568..90f85c2baf9 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -147,7 +147,7 @@ OPTION(mon_osd_down_out_subtree_limit, OPT_STR, "rack") // smallest crush unit OPTION(mon_osd_min_up_ratio, OPT_DOUBLE, .3) // min osds required to be up to mark things down OPTION(mon_osd_min_in_ratio, OPT_DOUBLE, .3) // min osds required to be in to mark things out OPTION(mon_osd_max_op_age, OPT_DOUBLE, 32) // max op age before we get concerned (make it a power of 2) -OPTION(mon_osd_max_split_ratio, OPT_INT, 8) // largest multiple allowed when doing PG split +OPTION(mon_osd_max_split_count, OPT_INT, 32) // largest number of PGs per "involved" OSD to let split create OPTION(mon_stat_smooth_intervals, OPT_INT, 2) // smooth stats over last N PGMap maps OPTION(mon_lease, OPT_FLOAT, 5) // lease interval OPTION(mon_lease_renew_interval, OPT_FLOAT, 3) // on leader, to renew the lease diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index e6d6983ec96..d98d37192b0 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -3608,11 +3608,16 @@ done: pending_inc.new_pools[pool].crash_replay_interval = n; ss << "set pool " << pool << " to crash_replay_interval to " << n; } else if (var == "pg_num") { + int expected_osds = MIN(p->get_pg_num(), osdmap.get_num_osds()); + int64_t new_pgs = n - p->get_pg_num(); + int64_t pgs_per_osd = new_pgs / expected_osds; if (n <= p->get_pg_num()) { ss << "specified pg_num " << n << " <= current " << p->get_pg_num(); - } else if (n > (int)p->get_pg_num() * g_conf->mon_osd_max_split_ratio) { - ss << "specified pg_num " << n << " is too large (> current " - << p->get_pg_num() << '*' << g_conf->mon_osd_max_split_ratio << ')'; + } else if (pgs_per_osd > g_conf->mon_osd_max_split_count) { + ss << "specified pg_num " << n << " is too large (creating " + << new_pgs << " new PGs on ~" << expected_osds + << " OSDs exceeds per-OSD max of" << g_conf->mon_osd_max_split_count + << ')'; err = -E2BIG; } else if (!mon->pgmon()->pg_map.creating_pgs.empty()) { ss << "currently creating pgs, wait"; -- 2.47.3