From: Kamoltat Date: Tue, 13 Apr 2021 21:12:40 +0000 (+0000) Subject: pybind/mgr/pg_autoscaler: Added autoscale-profile feature X-Git-Tag: v17.1.0~1913^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=0e49683b538026e4c51a040950da3dbb9c7e92d5;p=ceph.git pybind/mgr/pg_autoscaler: Added autoscale-profile feature The autoscaler by default will start out each pool with minimal pgs and `scale-up` the pgs when there is more usage in each pool. Users can now use the commands: `osd pool set autoscale-profile scale-down` to make the pools start out with a full complement of pgs and only `scale-down` when usage ratio across the pools are not even. `osd pool set autoscale-profile scale-up` (by default) to make the pools start out with minimal pgs and `scale-up` the pgs when there is more usage in each pool. Edited KVMonitor.cc file to make the `autoscale_profile` variable persistent. Edited tests/test_cal_final_pg_target.py so that it takes into account the new `profile` argument when calling cal_final_pg_target(). Also, added some new test cases for when profile is `scale-up` Renamed tests/test_autoscaler.py to a more appropriate name: tests/test_cal_ratio.py Signed-off-by: Kamoltat --- diff --git a/src/mon/KVMonitor.cc b/src/mon/KVMonitor.cc index 699cbe4170395..4d24d727bbe0a 100644 --- a/src/mon/KVMonitor.cc +++ b/src/mon/KVMonitor.cc @@ -47,6 +47,9 @@ void KVMonitor::create_initial() dout(10) << __func__ << dendl; version = 0; pending.clear(); + bufferlist bl; + bl.append("scale-down"); + pending["config/mgr/mgr/pg_autoscaler/autoscale_profile"] = bl; } void KVMonitor::update_from_paxos(bool *need_bootstrap) diff --git a/src/pybind/mgr/pg_autoscaler/module.py b/src/pybind/mgr/pg_autoscaler/module.py index 0c5bfd2523363..e2948ea57a2d5 100644 --- a/src/pybind/mgr/pg_autoscaler/module.py +++ b/src/pybind/mgr/pg_autoscaler/module.py @@ -7,7 +7,7 @@ import mgr_util import threading import uuid from prettytable import PrettyTable -from mgr_module import MgrModule +from mgr_module import MgrModule, Option """ Some terminology is made up for the purposes of this module: @@ -56,7 +56,6 @@ def effective_target_ratio(target_ratio, total_target_ratio, total_target_bytes, return target_ratio - class PgAdjustmentProgress(object): """ Keeps the initial and target pg_num values @@ -87,8 +86,20 @@ class PgAutoscaler(MgrModule): { "cmd": "osd pool autoscale-status", "desc": "report on pool pg_num sizing recommendation and intent", - "perm": "r" + "perm": "r", + }, + { + "cmd": "osd pool set autoscale-profile scale-up", + "desc": ("set the autoscaler behavior to start out with minimum pgs" + "and scales up when there is pressure"), + "perm": "rw", }, + { + "cmd": "osd pool set autoscale-profile scale-down", + "desc": ("set the autoscaler behavior to start out with full pgs and" + "scales down when there is pressure"), + "perm": "rw", + } ] NATIVE_OPTIONS = [ @@ -101,6 +112,17 @@ class PgAutoscaler(MgrModule): 'name': 'sleep_interval', 'default': str(60), }, + Option( + 'autoscale_profile', + default='scale-up', + type='str', + desc='pg_autoscale profiler', + long_desc=('Determines the behavior of the autoscaler algorithm ' + '`scale-up` means that it starts out with minmum pgs ', + 'and scales up when there is pressure, `scale-down` ', + 'means starts out with full pgs and scales down when ', + 'there is pressure '), + runtime=True), ] def __init__(self, *args, **kwargs): @@ -124,11 +146,28 @@ class PgAutoscaler(MgrModule): self.get_module_option(opt['name'])) self.log.debug(' mgr option %s = %s', opt['name'], getattr(self, opt['name'])) - + # if the profiler option is not set, this means it is an old cluster + autoscale_profile = self.get_module_option("autoscale_profile") + if not autoscale_profile: + self.set_module_option("autoscale_profile", "scale-up") def handle_command(self, inbuf, cmd): if cmd['prefix'] == "osd pool autoscale-status": retval = self._command_autoscale_status(cmd) + + elif cmd['prefix'] == "osd pool set autoscale-profile scale-up": + if self.autoscale_profile == "scale-up": + retval = 0, "", "autoscale-profile is already a scale-up!" + else: + self.set_module_option("autoscale_profile", "scale-up") + retval = 0, "", "autoscale-profile is now scale-up" + + elif cmd['prefix'] == "osd pool set autoscale-profile scale-down": + if self.autoscale_profile == "scale-down": + retval = 0, "", "autoscale-profile is already a scale-down!" + else: + self.set_module_option("autoscale_profile", "scale-down") + retval = 0, "", "autoscale-profile is now scale-down" else: assert False # ceph-mgr should never pass us unknown cmds return retval @@ -136,7 +175,8 @@ class PgAutoscaler(MgrModule): def _command_autoscale_status(self, cmd): osdmap = self.get_osdmap() pools = osdmap.get_pools_by_name() - ps, root_map, pool_root = self._get_pool_status(osdmap, pools) + profile = self.autoscale_profile + ps, root_map, pool_root = self._get_pool_status(osdmap, pools, profile) if cmd.get('format') == 'json' or cmd.get('format') == 'json-pretty': return 0, json.dumps(ps, indent=4, sort_keys=True), '' @@ -294,57 +334,67 @@ class PgAutoscaler(MgrModule): return result, pool_root def _calc_final_pg_target( - self, - p, - pool_name, + self, + p, + pool_name, root_map, root_id, - capacity_ratio, - even_pools, - bias, + capacity_ratio, + even_pools, + bias, is_used, + profile, ): """ - is_used flag used to determine if this is the first + `profile` determines behaviour of the autoscaler. + `is_used` flag used to determine if this is the first pass where the caller tries to calculate/adjust pools that has used_ratio > even_ratio else this is the second pass, we calculate final_ratio by giving it 1 / pool_count - of the root we are looking. + of the root we are currently looking at. """ - if is_used: - even_ratio = 1 / root_map[root_id].pool_count - used_ratio = capacity_ratio + if profile == "scale-up": + final_ratio = capacity_ratio + # So what proportion of pg allowance should we be using? + pool_pg_target = (final_ratio * root_map[root_id].pg_target) / p['size'] * bias + final_pg_target = max(p.get('options', {}).get('pg_num_min', PG_NUM_MIN), + nearest_power_of_two(pool_pg_target)) - if used_ratio > even_ratio: - root_map[root_id].pool_used += 1 - else: - # keep track of even_pools to be used in second pass - # of the caller function - even_pools[pool_name] = p - return None, None, None + else: + if is_used: + even_ratio = 1 / root_map[root_id].pool_count + used_ratio = capacity_ratio - final_ratio = max(used_ratio, even_ratio) - used_pg = final_ratio * root_map[root_id].pg_target - root_map[root_id].pg_left -= used_pg - pool_pg_target = used_pg / p['size'] * bias + if used_ratio > even_ratio: + root_map[root_id].pool_used += 1 + else: + # keep track of even_pools to be used in second pass + # of the caller function + even_pools[pool_name] = p + return None, None, None - else: - final_ratio = 1 / (root_map[root_id].pool_count - root_map[root_id].pool_used) - pool_pg_target = (final_ratio * root_map[root_id].pg_left) / p['size'] * bias - - final_pg_target = max(p.get('options', {}).get('pg_num_min', PG_NUM_MIN), - nearest_power_of_two(pool_pg_target)) - - self.log.info("Pool '{0}' root_id {1} using {2} of space, bias {3}, " - "pg target {4} quantized to {5} (current {6})".format( - p['pool_name'], - root_id, - capacity_ratio, - bias, - pool_pg_target, - final_pg_target, - p['pg_num_target'] - )) + final_ratio = max(used_ratio, even_ratio) + used_pg = final_ratio * root_map[root_id].pg_target + root_map[root_id].pg_left -= used_pg + pool_pg_target = used_pg / p['size'] * bias + + else: + final_ratio = 1 / (root_map[root_id].pool_count - root_map[root_id].pool_used) + pool_pg_target = (final_ratio * root_map[root_id].pg_left) / p['size'] * bias + + final_pg_target = max(p.get('options', {}).get('pg_num_min', PG_NUM_MIN), + nearest_power_of_two(pool_pg_target)) + + self.log.info("Pool '{0}' root_id {1} using {2} of space, bias {3}, " + "pg target {4} quantized to {5} (current {6})".format( + p['pool_name'], + root_id, + capacity_ratio, + bias, + pool_pg_target, + final_pg_target, + p['pg_num_target'] + )) return final_ratio, pool_pg_target, final_pg_target @@ -359,11 +409,14 @@ class PgAutoscaler(MgrModule): ret, threshold, is_used, + profile, ): """ Calculates final_pg_target of each pools and determine if it needs - scaling by starting out with a full complement of pgs and only - descreasing it when other pools need more due to increased usage. + scaling, this depends on the profile of the autoscaler. For scale-down, + we start out with a full complement of pgs and only descrease it when other + pools needs more pgs due to increased usage. For scale-up, we start out with + the minimal amount of pgs and only scale when there is increase in usage. """ even_pools = {} for pool_name, p in pools.items(): @@ -413,7 +466,9 @@ class PgAutoscaler(MgrModule): capacity_ratio = max(capacity_ratio, target_ratio) final_ratio, pool_pg_target, final_pg_target = self._calc_final_pg_target(p, - pool_name, root_map, root_id, capacity_ratio, even_pools, bias, is_used) + pool_name, root_map, root_id, capacity_ratio, even_pools, bias, is_used, + profile, + ) if final_ratio == None: continue @@ -449,11 +504,11 @@ class PgAutoscaler(MgrModule): return ret, even_pools - def _get_pool_status( self, osdmap, pools, + profile, threshold=3.0, ): assert threshold >= 2.0 @@ -464,17 +519,21 @@ class PgAutoscaler(MgrModule): pool_stats = dict([(p['id'], p['stats']) for p in df['pools']]) ret = [] + # Iterate over all pools to determine how they should be sized. - # First call is to find/adjust pools that uses more capacaity than + # First call of _calc_pool_targets() is to find/adjust pools that uses more capacaity than # the even_ratio of other pools and we adjust those first. # Second call make use of the even_pools we keep track of in the first call. # All we need to do is iterate over those and give them 1/pool_count of the # total pgs. - ret, even_pools = self._calc_pool_targets(osdmap, pools, crush_map, root_map, pool_root, - pool_stats, ret, threshold, True) - - ret, _ = self._calc_pool_targets(osdmap, even_pools, crush_map, root_map, pool_root, - pool_stats, ret, threshold, False) + + ret, even_pools = self._calc_pool_targets(osdmap, pools, crush_map, root_map, pool_root, + pool_stats, ret, threshold, True, profile) + + if profile == "scale-down": + # We only have adjust even_pools when we use scale-down profile + ret, _ = self._calc_pool_targets(osdmap, even_pools, crush_map, root_map, pool_root, + pool_stats, ret, threshold, False, profile) return (ret, root_map, pool_root) @@ -497,7 +556,8 @@ class PgAutoscaler(MgrModule): if osdmap.get_require_osd_release() < 'nautilus': return pools = osdmap.get_pools_by_name() - ps, root_map, pool_root = self._get_pool_status(osdmap, pools) + profile = self.autoscale_profile + ps, root_map, pool_root = self._get_pool_status(osdmap, pools, profile) # Anyone in 'warn', set the health message for them and then # drop them from consideration. diff --git a/src/pybind/mgr/pg_autoscaler/tests/test_autoscaler.py b/src/pybind/mgr/pg_autoscaler/tests/test_autoscaler.py deleted file mode 100644 index 122d95274d071..0000000000000 --- a/src/pybind/mgr/pg_autoscaler/tests/test_autoscaler.py +++ /dev/null @@ -1,34 +0,0 @@ -from pg_autoscaler import effective_target_ratio -from pytest import approx - -def check_simple_ratio(target_ratio, tot_ratio): - etr = effective_target_ratio(target_ratio, tot_ratio, 0, 0) - assert (target_ratio / tot_ratio) == approx(etr) - return etr - -def test_simple(): - etr1 = check_simple_ratio(0.2, 0.9) - etr2 = check_simple_ratio(2, 9) - etr3 = check_simple_ratio(20, 90) - assert etr1 == approx(etr2) - assert etr1 == approx(etr3) - - etr = check_simple_ratio(0.9, 0.9) - assert etr == approx(1.0) - etr1 = check_simple_ratio(1, 2) - etr2 = check_simple_ratio(0.5, 1.0) - assert etr1 == approx(etr2) - -def test_total_bytes(): - etr = effective_target_ratio(1, 10, 5, 10) - assert etr == approx(0.05) - etr = effective_target_ratio(0.1, 1, 5, 10) - assert etr == approx(0.05) - etr = effective_target_ratio(1, 1, 5, 10) - assert etr == approx(0.5) - etr = effective_target_ratio(1, 1, 0, 10) - assert etr == approx(1.0) - etr = effective_target_ratio(0, 1, 5, 10) - assert etr == approx(0.0) - etr = effective_target_ratio(1, 1, 10, 10) - assert etr == approx(0.0) diff --git a/src/pybind/mgr/pg_autoscaler/tests/test_cal_final_pg_target.py b/src/pybind/mgr/pg_autoscaler/tests/test_cal_final_pg_target.py index 78960eca652c2..35269d5971759 100644 --- a/src/pybind/mgr/pg_autoscaler/tests/test_cal_final_pg_target.py +++ b/src/pybind/mgr/pg_autoscaler/tests/test_cal_final_pg_target.py @@ -17,30 +17,107 @@ class RootMapItem: class TestPgAutoscaler(object): def setup(self): - # a bunch of attributes for testing + # a bunch of attributes for testing. self.autoscaler = module.PgAutoscaler('module_name', 0, 0) - def helper_test(self, pools, root_map, bias): - + def helper_test(self, pools, root_map, bias, profile): + # Here we simulate how _calc_pool_target() works. even_pools = {} for pool_name, p in pools.items(): - final_ratio, pool_pg_target, final_pg_target = self.autoscaler._calc_final_pg_target(p, pool_name, root_map, p['root_id'], p['capacity_ratio'], even_pools, bias, True) - + final_ratio, pool_pg_target, final_pg_target = self.autoscaler._calc_final_pg_target(p, pool_name, root_map, + p['root_id'], p['capacity_ratio'], even_pools, bias, True, profile) + if final_ratio == None: + # no final_ratio means current pool is an even pool + # and we do not have to do any assertion on it. + # You will never hit this case with a scale up profile. continue assert p['expected_final_pg_target'] == final_pg_target assert p['expected_final_ratio'] == final_ratio - assert not p['even_pools'] and pool_name not in even_pools - for pool_name, p in even_pools.items(): - final_ratio, pool_pg_target, final_pg_target = self.autoscaler._calc_final_pg_target(p, pool_name, root_map, p['root_id'], p['capacity_ratio'], even_pools, bias, False) + if profile == "scale-down": + # We only care about even_pools when profile is a scale-down + assert not p['even_pools'] and pool_name not in even_pools - assert p['expected_final_pg_target'] == final_pg_target - assert p['expected_final_ratio'] == final_ratio - assert p['even_pools'] and pool_name in even_pools + if profile == "scale-down": + for pool_name, p in even_pools.items(): + final_ratio, pool_pg_target, final_pg_target = self.autoscaler._calc_final_pg_target(p, pool_name, root_map, + p['root_id'], p['capacity_ratio'], even_pools, bias, False, profile) - def test_all_even_pools(self): + assert p['expected_final_pg_target'] == final_pg_target + assert p['expected_final_ratio'] == final_ratio + assert p['even_pools'] and pool_name in even_pools + + def test_all_even_pools_scale_up(self): + pools = { + + "test0":{ + + "pool": 0, + "pool_name": "test0", + "pg_num_target": 32, + "capacity_ratio": 0.2, + "root_id":"0", + "expected_final_pg_target": 64, + "expected_final_ratio": 0.2, + "even_pools": True, + "size": 1, + }, + + "test1":{ + + "pool": 1, + "pool_name": "test1", + "pg_num_target": 32, + "capacity_ratio": 0.2, + "root_id":"0", + "expected_final_pg_target": 64, + "expected_final_ratio": 0.2, + "even_pools": True, + "size": 1, + }, + + "test2":{ + + "pool": 2, + "pool_name": "test2", + "pg_num_target": 32, + "capacity_ratio": 0.2, + "root_id":"0", + "expected_final_pg_target": 64, + "expected_final_ratio": 0.2, + "even_pools": True, + "size": 1, + }, + + "test3":{ + + "pool": 3, + "pool_name": "test3", + "pg_num_target": 32, + "capacity_ratio": 0.1, + "root_id": "0", + "expected_final_pg_target": 32, + "expected_final_ratio": 0.1, + "even_pools": True, + "size": 1, + }, + + } + + root_map = { + + "0": RootMapItem(4, 400, 400), + "1": RootMapItem(4, 400, 400), + + } + + profile = "scale-up" + bias = 1 + self.helper_test(pools, root_map, bias, profile) + + def test_all_even_pools_scale_down(self): pools = { "test0":{ @@ -104,10 +181,79 @@ class TestPgAutoscaler(object): } + profile = "scale-down" bias = 1 - self.helper_test(pools, root_map, bias) + self.helper_test(pools, root_map, bias, profile) - def test_uneven_pools(self): + def test_uneven_pools_scale_up(self): + pools = { + + "test0":{ + + "pool": 0, + "pool_name": "test0", + "pg_num_target": 32, + "capacity_ratio": 0.1, + "root_id":"0", + "expected_final_pg_target": 32, + "expected_final_ratio": 0.1, + "even_pools": True, + "size": 1, + }, + + "test1":{ + + "pool": 1, + "pool_name": "test1", + "pg_num_target": 32, + "capacity_ratio": 0.5, + "root_id":"0", + "expected_final_pg_target": 256, + "expected_final_ratio": 0.5, + "even_pools": False, + "size": 1, + }, + + "test2":{ + + "pool": 2, + "pool_name": "test2", + "pg_num_target": 32, + "capacity_ratio": 0.1, + "root_id":"0", + "expected_final_pg_target": 32, + "expected_final_ratio": 0.1, + "even_pools": True, + "size": 1, + }, + + "test3":{ + + "pool": 3, + "pool_name": "test3", + "pg_num_target": 32, + "capacity_ratio": 0.1, + "root_id": "0", + "expected_final_pg_target": 32, + "expected_final_ratio": 0.1, + "even_pools": True, + "size": 1, + }, + + } + + root_map = { + + "0": RootMapItem(4, 400, 400), + "1": RootMapItem(4, 400, 400), + + } + + profile = "scale-up" + bias = 1 + self.helper_test(pools, root_map, bias, profile) + + def test_uneven_pools_scale_down(self): pools = { "test0":{ @@ -171,10 +317,92 @@ class TestPgAutoscaler(object): } + profile = "scale-down" + bias = 1 + self.helper_test(pools, root_map, bias, profile) + + def test_uneven_pools_with_diff_roots_scale_up(self): + pools = { + + "test0":{ + + "pool": 0, + "pool_name": "test0", + "pg_num_target": 32, + "capacity_ratio": 0.4, + "root_id":"0", + "expected_final_pg_target": 2048, + "expected_final_ratio": 0.4, + "even_pools": False, + "size": 1, + }, + + "test1":{ + + "pool": 1, + "pool_name": "test1", + "pg_num_target": 32, + "capacity_ratio": 0.6, + "root_id":"1", + "expected_final_pg_target": 2048, + "expected_final_ratio": 0.6, + "even_pools": False, + "size": 1, + }, + + "test2":{ + + "pool": 2, + "pool_name": "test2", + "pg_num_target": 32, + "capacity_ratio": 0.5, + "root_id":"0", + "expected_final_pg_target": 2048, + "expected_final_ratio": 0.5, + "even_pools": False, + "size": 1, + }, + + "test3":{ + + "pool": 3, + "pool_name": "test3", + "pg_num_target": 32, + "capacity_ratio": 0.1, + "root_id": "0", + "expected_final_pg_target": 512, + "expected_final_ratio": 0.1, + "even_pools": True, + "size": 1, + }, + + "test4":{ + + "pool": 4, + "pool_name": "test4", + "pg_num_target": 32, + "capacity_ratio": 0.4, + "root_id": "1", + "expected_final_pg_target": 2048, + "expected_final_ratio": 0.4, + "even_pools": True, + "size": 1, + }, + + } + + root_map = { + + "0": RootMapItem(3, 5000, 5000), + "1": RootMapItem(2, 5000, 5000), + + } + + profile = "scale-up" bias = 1 - self.helper_test(pools, root_map, bias) + self.helper_test(pools, root_map, bias, profile) - def test_uneven_pools_with_diff_roots(self): + def test_uneven_pools_with_diff_roots_scale_down(self): pools = { "test0":{ @@ -251,5 +479,6 @@ class TestPgAutoscaler(object): } + profile = "scale-down" bias = 1 - self.helper_test(pools, root_map, bias) + self.helper_test(pools, root_map, bias, profile) diff --git a/src/pybind/mgr/pg_autoscaler/tests/test_cal_ratio.py b/src/pybind/mgr/pg_autoscaler/tests/test_cal_ratio.py new file mode 100644 index 0000000000000..122d95274d071 --- /dev/null +++ b/src/pybind/mgr/pg_autoscaler/tests/test_cal_ratio.py @@ -0,0 +1,34 @@ +from pg_autoscaler import effective_target_ratio +from pytest import approx + +def check_simple_ratio(target_ratio, tot_ratio): + etr = effective_target_ratio(target_ratio, tot_ratio, 0, 0) + assert (target_ratio / tot_ratio) == approx(etr) + return etr + +def test_simple(): + etr1 = check_simple_ratio(0.2, 0.9) + etr2 = check_simple_ratio(2, 9) + etr3 = check_simple_ratio(20, 90) + assert etr1 == approx(etr2) + assert etr1 == approx(etr3) + + etr = check_simple_ratio(0.9, 0.9) + assert etr == approx(1.0) + etr1 = check_simple_ratio(1, 2) + etr2 = check_simple_ratio(0.5, 1.0) + assert etr1 == approx(etr2) + +def test_total_bytes(): + etr = effective_target_ratio(1, 10, 5, 10) + assert etr == approx(0.05) + etr = effective_target_ratio(0.1, 1, 5, 10) + assert etr == approx(0.05) + etr = effective_target_ratio(1, 1, 5, 10) + assert etr == approx(0.5) + etr = effective_target_ratio(1, 1, 0, 10) + assert etr == approx(1.0) + etr = effective_target_ratio(0, 1, 5, 10) + assert etr == approx(0.0) + etr = effective_target_ratio(1, 1, 10, 10) + assert etr == approx(0.0)