From: Greg Farnum Date: Tue, 3 Dec 2013 18:57:09 +0000 (-0800) Subject: OSDMonitor: use a different approach to prevent extreme multipliers on PG splits X-Git-Tag: v0.73~1^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=d8ccd73968fbd0753ca08916ebf1062cdb4d5ac1;p=ceph.git OSDMonitor: use a different approach to prevent extreme multipliers on PG splits Signed-off-by: Greg Farnum --- diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 4d8c2bb02b0..18b2f65d0e9 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -148,7 +148,7 @@ OPTION(mon_osd_down_out_subtree_limit, OPT_STR, "rack") // smallest crush unit OPTION(mon_osd_min_up_ratio, OPT_DOUBLE, .3) // min osds required to be up to mark things down OPTION(mon_osd_min_in_ratio, OPT_DOUBLE, .3) // min osds required to be in to mark things out OPTION(mon_osd_max_op_age, OPT_DOUBLE, 32) // max op age before we get concerned (make it a power of 2) -OPTION(mon_osd_max_split_ratio, OPT_INT, 8) // largest multiple allowed when doing PG split +OPTION(mon_osd_max_split_count, OPT_INT, 32) // largest number of PGs per "involved" OSD to let split create OPTION(mon_stat_smooth_intervals, OPT_INT, 2) // smooth stats over last N PGMap maps OPTION(mon_lease, OPT_FLOAT, 5) // lease interval OPTION(mon_lease_renew_interval, OPT_FLOAT, 3) // on leader, to renew the lease diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index fd7ed80adc1..9bd516ec995 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -2806,11 +2806,17 @@ int OSDMonitor::prepare_command_pool_set(map &cmdmap, return -EEXIST; else return 0; - } else if (n > (int)p.get_pg_num() * g_conf->mon_osd_max_split_ratio) { - ss << "specified pg_num " << n << " is too large (> current " - << p.get_pg_num() << '*' << g_conf->mon_osd_max_split_ratio << ')'; - return -E2BIG; } else { + int expected_osds = MIN(p.get_pg_num(), osdmap.get_num_osds()); + int64_t new_pgs = n - p.get_pg_num(); + int64_t pgs_per_osd = new_pgs / expected_osds; + if (pgs_per_osd > g_conf->mon_osd_max_split_count) { + ss << "specified pg_num " << n << " is too large (creating " + << new_pgs << " new PGs on ~" << expected_osds + << " OSDs exceeds per-OSD max of" << g_conf->mon_osd_max_split_count + << ')'; + return -E2BIG; + } for(set::iterator i = mon->pgmon()->pg_map.creating_pgs.begin(); i != mon->pgmon()->pg_map.creating_pgs.end(); ++i) {