From 5f0f967c9337385fcf50548c36b114f809477b58 Mon Sep 17 00:00:00 2001 From: Kamoltat Date: Tue, 7 Dec 2021 21:15:36 +0000 Subject: [PATCH] mon: osd pool create with --bulk flag Creating the pool with `--bulk` will allow the pg_autoscaler to use the `scale-down` mode on. Creating pool: `ceph osd pool create --bulk` Get var: `ceph osd pool get bulk` Set var: `ceph osd pool set bulk=true/false/1/0` Removed `autoscale_profile` and incorporate bulk flag into calculating `final_pg_target` for each pool. bin/ceph osd pool autoscale-status no longer has `PROFILE` column but has `BULK` instead. Signed-off-by: Kamoltat --- doc/rados/operations/pools.rst | 9 ++ src/common/options/global.yaml.in | 11 +- src/mon/KVMonitor.cc | 3 - src/mon/MonCommands.h | 5 +- src/mon/OSDMonitor.cc | 18 ++- src/mon/OSDMonitor.h | 1 + src/osd/OSDMap.cc | 2 + src/osd/osd_types.h | 4 + src/pybind/mgr/pg_autoscaler/module.py | 204 +++++++++++-------------- 9 files changed, 134 insertions(+), 123 deletions(-) diff --git a/doc/rados/operations/pools.rst b/doc/rados/operations/pools.rst index 82cfaddd82901..3b6d227aad741 100644 --- a/doc/rados/operations/pools.rst +++ b/doc/rados/operations/pools.rst @@ -420,6 +420,15 @@ You may set values for the following keys: :Valid Range: 1 sets flag, 0 unsets flag :Version: Version ``FIXME`` +.. _bulk: + +.. describe:: bulk + + Set/Unset bulk flag on a given pool. + + :Type: Boolean + :Valid Range: true/1 sets flag, false/0 unsets flag + .. _write_fadvise_dontneed: .. describe:: write_fadvise_dontneed diff --git a/src/common/options/global.yaml.in b/src/common/options/global.yaml.in index 2a00edfe3e8ef..80e7b9dc34a5c 100644 --- a/src/common/options/global.yaml.in +++ b/src/common/options/global.yaml.in @@ -2566,6 +2566,15 @@ options: services: - mon with_legacy: true +- name: osd_pool_default_flag_bulk + type: bool + level: advanced + desc: set bulk flag on new pools + fmt_desc: Set the ``bulk`` flag on new pools. Allowing autoscaler to use scale-down mode. + default: false + services: + - mon + with_legacy: true - name: osd_pool_default_hit_set_bloom_fpp type: float level: advanced @@ -6096,4 +6105,4 @@ options: services: - rgw - osd - with_legacy: true \ No newline at end of file + with_legacy: true diff --git a/src/mon/KVMonitor.cc b/src/mon/KVMonitor.cc index a919a29eed20a..37a81a8048d4e 100644 --- a/src/mon/KVMonitor.cc +++ b/src/mon/KVMonitor.cc @@ -53,9 +53,6 @@ void KVMonitor::create_initial() dout(10) << __func__ << dendl; version = 0; pending.clear(); - bufferlist bl; - bl.append("scale-up"); - pending["config/mgr/mgr/pg_autoscaler/autoscale_profile"] = bl; } void KVMonitor::update_from_paxos(bool *need_bootstrap) diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h index f4f4fef9804e7..0a8ce0599df8c 100644 --- a/src/mon/MonCommands.h +++ b/src/mon/MonCommands.h @@ -1058,6 +1058,7 @@ COMMAND("osd pool create " "name=size,type=CephInt,range=0,req=false " "name=pg_num_min,type=CephInt,range=0,req=false " "name=autoscale_mode,type=CephChoices,strings=on|off|warn,req=false " + "name=bulk,type=CephBool,req=false " "name=target_size_bytes,type=CephInt,range=0,req=false " "name=target_size_ratio,type=CephFloat,range=0|1,req=false",\ "create pool", "osd", "rw") @@ -1082,11 +1083,11 @@ COMMAND("osd pool rename " "rename to ", "osd", "rw") COMMAND("osd pool get " "name=pool,type=CephPoolname " - "name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_objects|target_max_bytes|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|erasure_code_profile|min_read_recency_for_promote|all|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_autoscale_bias|pg_num_min|target_size_bytes|target_size_ratio|dedup_tier|dedup_chunk_algorithm|dedup_cdc_chunk_size|eio", + "name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_objects|target_max_bytes|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|erasure_code_profile|min_read_recency_for_promote|all|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_autoscale_bias|pg_num_min|target_size_bytes|target_size_ratio|dedup_tier|dedup_chunk_algorithm|dedup_cdc_chunk_size|eio|bulk", "get pool parameter ", "osd", "r") COMMAND("osd pool set " "name=pool,type=CephPoolname " - "name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|pgp_num_actual|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|min_read_recency_for_promote|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_autoscale_bias|pg_num_min|target_size_bytes|target_size_ratio|dedup_tier|dedup_chunk_algorithm|dedup_cdc_chunk_size|eio " + "name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|pgp_num_actual|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|min_read_recency_for_promote|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_autoscale_bias|pg_num_min|target_size_bytes|target_size_ratio|dedup_tier|dedup_chunk_algorithm|dedup_cdc_chunk_size|eio|bulk " "name=val,type=CephString " "name=yes_i_really_mean_it,type=CephBool,req=false", "set pool parameter to ", "osd", "rw") diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index b563e375092a6..a75be6d4ca25c 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -5357,7 +5357,7 @@ namespace { CSUM_TYPE, CSUM_MAX_BLOCK, CSUM_MIN_BLOCK, FINGERPRINT_ALGORITHM, PG_AUTOSCALE_MODE, PG_NUM_MIN, TARGET_SIZE_BYTES, TARGET_SIZE_RATIO, PG_AUTOSCALE_BIAS, DEDUP_TIER, DEDUP_CHUNK_ALGORITHM, - DEDUP_CDC_CHUNK_SIZE, POOL_EIO }; + DEDUP_CDC_CHUNK_SIZE, POOL_EIO, BULK }; std::set subtract_second_from_first(const std::set& first, @@ -6092,6 +6092,7 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op) {"dedup_tier", DEDUP_TIER}, {"dedup_chunk_algorithm", DEDUP_CHUNK_ALGORITHM}, {"dedup_cdc_chunk_size", DEDUP_CDC_CHUNK_SIZE}, + {"bulk", BULK} }; typedef std::set choices_set_t; @@ -6209,6 +6210,7 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op) case HASHPSPOOL: case POOL_EIO: case NODELETE: + case BULK: case NOPGCHANGE: case NOSIZECHANGE: case WRITE_FADVISE_DONTNEED: @@ -6437,6 +6439,7 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op) case HASHPSPOOL: case POOL_EIO: case NODELETE: + case BULK: case NOPGCHANGE: case NOSIZECHANGE: case WRITE_FADVISE_DONTNEED: @@ -7259,11 +7262,12 @@ int OSDMonitor::prepare_new_pool(MonOpRequestRef op) string erasure_code_profile; stringstream ss; string rule_name; + bool bulk = false; int ret = 0; ret = prepare_new_pool(m->name, m->crush_rule, rule_name, 0, 0, 0, 0, 0, 0.0, erasure_code_profile, - pg_pool_t::TYPE_REPLICATED, 0, FAST_READ_OFF, {}, + pg_pool_t::TYPE_REPLICATED, 0, FAST_READ_OFF, {}, bulk, &ss); if (ret < 0) { @@ -7885,6 +7889,7 @@ int OSDMonitor::prepare_new_pool(string& name, const uint64_t expected_num_objects, FastReadType fast_read, const string& pg_autoscale_mode, + bool bulk, ostream *ss) { if (name.length() == 0) @@ -8005,6 +8010,11 @@ int OSDMonitor::prepare_new_pool(string& name, pi->type = pool_type; pi->fast_read = fread; pi->flags = g_conf()->osd_pool_default_flags; + if (bulk) { + pi->set_flag(pg_pool_t::FLAG_BULK); + } else if (g_conf()->osd_pool_default_flag_bulk) { + pi->set_flag(pg_pool_t::FLAG_BULK); + } if (g_conf()->osd_pool_default_flag_hashpspool) pi->set_flag(pg_pool_t::FLAG_HASHPSPOOL); if (g_conf()->osd_pool_default_flag_nodelete) @@ -8438,7 +8448,7 @@ int OSDMonitor::prepare_command_pool_set(const cmdmap_t& cmdmap, p.crush_rule = id; } else if (var == "nodelete" || var == "nopgchange" || var == "nosizechange" || var == "write_fadvise_dontneed" || - var == "noscrub" || var == "nodeep-scrub") { + var == "noscrub" || var == "nodeep-scrub" || var == "bulk") { uint64_t flag = pg_pool_t::get_flag_by_name(var); // make sure we only compare against 'n' if we didn't receive a string if (val == "true" || (interr.empty() && n == 1)) { @@ -12880,6 +12890,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, string pg_autoscale_mode; cmd_getval(cmdmap, "autoscale_mode", pg_autoscale_mode); + bool bulk = cmd_getval_or(cmdmap, "bulk", 0); err = prepare_new_pool(poolstr, -1, // default crush rule rule_name, @@ -12889,6 +12900,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, (uint64_t)expected_num_objects, fast_read, pg_autoscale_mode, + bulk, &ss); if (err < 0) { switch(err) { diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h index 7757119751638..aa789e2e26255 100644 --- a/src/mon/OSDMonitor.h +++ b/src/mon/OSDMonitor.h @@ -528,6 +528,7 @@ private: const uint64_t expected_num_objects, FastReadType fast_read, const std::string& pg_autoscale_mode, + bool bulk, std::ostream *ss); int prepare_new_pool(MonOpRequestRef op); diff --git a/src/osd/OSDMap.cc b/src/osd/OSDMap.cc index 1168b6dc35600..d449543e204f5 100644 --- a/src/osd/OSDMap.cc +++ b/src/osd/OSDMap.cc @@ -4242,6 +4242,8 @@ int OSDMap::build_simple_optioned(CephContext *cct, epoch_t e, uuid_d &fsid, pools[pool].set_flag(pg_pool_t::FLAG_NOPGCHANGE); if (cct->_conf->osd_pool_default_flag_nosizechange) pools[pool].set_flag(pg_pool_t::FLAG_NOSIZECHANGE); + if (cct->_conf->osd_pool_default_flag_bulk) + pools[pool].set_flag(pg_pool_t::FLAG_BULK); pools[pool].size = cct->_conf.get_val("osd_pool_default_size"); pools[pool].min_size = cct->_conf.get_osd_pool_default_min_size( pools[pool].size); diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 65c0cf1f4109d..fcc3939ccde00 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -1256,6 +1256,7 @@ struct pg_pool_t { FLAG_POOL_SNAPS = 1<<14, // pool has pool snaps FLAG_CREATING = 1<<15, // initial pool PGs are being created FLAG_EIO = 1<<16, // return EIO for all client ops + FLAG_BULK = 1<<17, //pool is large }; static const char *get_flag_name(uint64_t f) { @@ -1277,6 +1278,7 @@ struct pg_pool_t { case FLAG_POOL_SNAPS: return "pool_snaps"; case FLAG_CREATING: return "creating"; case FLAG_EIO: return "eio"; + case FLAG_BULK: return "bulk"; default: return "???"; } } @@ -1329,6 +1331,8 @@ struct pg_pool_t { return FLAG_CREATING; if (name == "eio") return FLAG_EIO; + if (name == "bulk") + return FLAG_BULK; return 0; } diff --git a/src/pybind/mgr/pg_autoscaler/module.py b/src/pybind/mgr/pg_autoscaler/module.py index 487531bb6a9ee..414afa7ce4954 100644 --- a/src/pybind/mgr/pg_autoscaler/module.py +++ b/src/pybind/mgr/pg_autoscaler/module.py @@ -31,7 +31,7 @@ if TYPE_CHECKING: else: from typing_extensions import Literal - ScaleModeT = Literal['scale-up', 'scale-down'] + PassT = Literal['first', 'second', 'third'] def nearest_power_of_two(n: int) -> int: @@ -126,17 +126,7 @@ class PgAutoscaler(MgrModule): name='sleep_interval', type='secs', default=60), - Option( - 'autoscale_profile', - default='scale-up', - type='str', - desc='pg_autoscale profiler', - long_desc=('Determines the behavior of the autoscaler algorithm, ' - '`scale-up` means that it starts out with minmum pgs ' - 'and scales up when there is pressure' - '`scale-down means start out with full pgs and scales' - 'down when there is pressure'), - runtime=True), + Option( name='threshold', type='float', @@ -156,7 +146,6 @@ class PgAutoscaler(MgrModule): # to just keep a copy of the pythonized version. self._osd_map = None if TYPE_CHECKING: - self.autoscale_profile: 'ScaleModeT' = 'scale-up' self.sleep_interval = 60 self.mon_target_pg_per_osd = 0 self.threshold = 3.0 @@ -173,10 +162,6 @@ class PgAutoscaler(MgrModule): self.get_module_option(opt['name'])) self.log.debug(' mgr option %s = %s', opt['name'], getattr(self, opt['name'])) - # if the profiler option is not set, this means it is an old cluster - autoscale_profile = self.get_module_option("autoscale_profile") - if not autoscale_profile: - self.set_module_option("autoscale_profile", "scale-up") @CLIReadCommand('osd pool autoscale-status') def _command_autoscale_status(self, format: str = 'plain') -> Tuple[int, str, str]: @@ -185,8 +170,7 @@ class PgAutoscaler(MgrModule): """ osdmap = self.get_osdmap() pools = osdmap.get_pools_by_name() - profile = self.autoscale_profile - ps, root_map = self._get_pool_status(osdmap, pools, profile) + ps, root_map = self._get_pool_status(osdmap, pools) if format in ('json', 'json-pretty'): return 0, json.dumps(ps, indent=4, sort_keys=True), '' @@ -199,7 +183,7 @@ class PgAutoscaler(MgrModule): 'PG_NUM', # 'IDEAL', 'NEW PG_NUM', 'AUTOSCALE', - 'PROFILE'], + 'BULK'], border=False) table.left_padding_width = 0 table.right_padding_width = 2 @@ -216,7 +200,7 @@ class PgAutoscaler(MgrModule): # table.align['IDEAL'] = 'r' table.align['NEW PG_NUM'] = 'r' table.align['AUTOSCALE'] = 'l' - table.align['PROFILE'] = 'l' + table.align['BULK'] = 'l' for p in ps: if p['would_adjust']: final = str(p['pg_num_final']) @@ -248,7 +232,7 @@ class PgAutoscaler(MgrModule): # p['pg_num_ideal'], final, p['pg_autoscale_mode'], - profile + str(p['bulk']) ]) return 0, table.get_string(), '' @@ -263,29 +247,6 @@ class PgAutoscaler(MgrModule): self.set_module_option("threshold", num) return 0, "threshold updated", "" - @CLIWriteCommand("osd pool set autoscale-profile scale-up") - def set_profile_scale_up(self) -> Tuple[int, str, str]: - """ - set the autoscaler behavior to start out with minimum pgs and scales up when there is pressure - """ - if self.autoscale_profile == "scale-up": - return 0, "", "autoscale-profile is already a scale-up!" - else: - self.set_module_option("autoscale_profile", "scale-up") - return 0, "", "autoscale-profile is now scale-up" - - @CLIWriteCommand("osd pool set autoscale-profile scale-down") - def set_profile_scale_down(self) -> Tuple[int, str, str]: - """ - set the autoscaler behavior to start out with full pgs and - scales down when there is pressure - """ - if self.autoscale_profile == "scale-down": - return 0, "", "autoscale-profile is already a scale-down!" - else: - self.set_module_option("autoscale_profile", "scale-down") - return 0, "", "autoscale-profile is now scale-down" - def serve(self) -> None: self.config_notify() while not self._shutdown.is_set(): @@ -393,73 +354,80 @@ class PgAutoscaler(MgrModule): root_map: Dict[int, CrushSubtreeResourceStatus], root_id: int, capacity_ratio: float, - even_pools: Dict[str, Dict[str, Any]], bias: float, - is_used: bool, - profile: 'ScaleModeT', + even_pools: Dict[str, Dict[str, Any]], + bulk_pools: Dict[str, Dict[str, Any]], + func_pass: 'PassT', + bulk: bool, ) -> Union[Tuple[float, int, int], Tuple[None, None, None]]: """ `profile` determines behaviour of the autoscaler. - `is_used` flag used to determine if this is the first + `first_pass` flag used to determine if this is the first pass where the caller tries to calculate/adjust pools that has used_ratio > even_ratio else this is the second pass, we calculate final_ratio by giving it 1 / pool_count of the root we are currently looking at. """ - if profile == "scale-up": - final_ratio = capacity_ratio - # So what proportion of pg allowance should we be using? - pg_target = root_map[root_id].pg_target - assert pg_target is not None - pool_pg_target = (final_ratio * pg_target) / p['size'] * bias - final_pg_target = max(p.get('options', {}).get('pg_num_min', PG_NUM_MIN), - nearest_power_of_two(pool_pg_target)) - - else: - if is_used: - pool_count = root_map[root_id].pool_count - assert pool_count is not None - even_ratio = 1 / pool_count - used_ratio = capacity_ratio - - if used_ratio > even_ratio: - root_map[root_id].pool_used += 1 - else: - # keep track of even_pools to be used in second pass - # of the caller function - even_pools[pool_name] = p - return None, None, None - - final_ratio = max(used_ratio, even_ratio) - pg_target = root_map[root_id].pg_target - assert pg_target is not None - used_pg = final_ratio * pg_target + if func_pass == 'first': + # first pass to deal with small pools (no bulk flag) + # calculating final_pg_target based on capacity ratio + # we also keep track of bulk_pools to be used in second pass + if not bulk: + final_ratio = capacity_ratio + pg_left = root_map[root_id].pg_left + assert pg_left is not None + used_pg = final_ratio * pg_left root_map[root_id].pg_left -= int(used_pg) + root_map[root_id].pool_used += 1 pool_pg_target = used_pg / p['size'] * bias - else: - pool_count = root_map[root_id].pool_count - assert pool_count is not None - final_ratio = 1 / (pool_count - root_map[root_id].pool_used) - pool_pg_target = (final_ratio * root_map[root_id].pg_left) / p['size'] * bias - - final_pg_target = max(p.get('options', {}).get('pg_num_min', PG_NUM_MIN), - nearest_power_of_two(pool_pg_target)) - - self.log.info("Pool '{0}' root_id {1} using {2} of space, bias {3}, " - "pg target {4} quantized to {5} (current {6})".format( - p['pool_name'], - root_id, - capacity_ratio, - bias, - pool_pg_target, - final_pg_target, - p['pg_num_target'] - )) + bulk_pools[pool_name] = p + return None, None, None + + elif func_pass == 'second': + # second pass we calculate the final_pg_target + # for pools that have used_ratio > even_ratio + # and we keep track of even pools to be used in third pass + pool_count = root_map[root_id].pool_count + assert pool_count is not None + even_ratio = 1 / (pool_count - root_map[root_id].pool_used) + used_ratio = capacity_ratio + + if used_ratio > even_ratio: + root_map[root_id].pool_used += 1 + else: + even_pools[pool_name] = p + return None, None, None + + final_ratio = max(used_ratio, even_ratio) + pg_left = root_map[root_id].pg_left + assert pg_left is not None + used_pg = final_ratio * pg_left + root_map[root_id].pg_left -= int(used_pg) + pool_pg_target = used_pg / p['size'] * bias + else: + # third pass we just split the pg_left to all even_pools + pool_count = root_map[root_id].pool_count + assert pool_count is not None + final_ratio = 1 / (pool_count - root_map[root_id].pool_used) + pool_pg_target = (final_ratio * root_map[root_id].pg_left) / p['size'] * bias + + final_pg_target = max(p.get('options', {}).get('pg_num_min', PG_NUM_MIN), + nearest_power_of_two(pool_pg_target)) + self.log.info("Pool '{0}' root_id {1} using {2} of space, bias {3}, " + "pg target {4} quantized to {5} (current {6})".format( + p['pool_name'], + root_id, + capacity_ratio, + bias, + pool_pg_target, + final_pg_target, + p['pg_num_target'] + )) return final_ratio, pool_pg_target, final_pg_target - def _calc_pool_targets( + def _get_pool_pg_targets( self, osdmap: OSDMap, pools: Dict[str, Dict[str, Any]], @@ -468,10 +436,9 @@ class PgAutoscaler(MgrModule): pool_stats: Dict[int, Dict[str, int]], ret: List[Dict[str, Any]], threshold: float, - is_used: bool, - profile: 'ScaleModeT', + func_pass: 'PassT', overlapped_roots: Set[int], - ) -> Tuple[List[Dict[str, Any]], Dict[str, Dict[str, Any]]]: + ) -> Tuple[List[Dict[str, Any]], Dict[str, Dict[str, Any]] , Dict[str, Dict[str, Any]]]: """ Calculates final_pg_target of each pools and determine if it needs scaling, this depends on the profile of the autoscaler. For scale-down, @@ -480,6 +447,7 @@ class PgAutoscaler(MgrModule): the minimal amount of pgs and only scale when there is increase in usage. """ even_pools: Dict[str, Dict[str, Any]] = {} + bulk_pools: Dict[str, Dict[str, Any]] = {} for pool_name, p in pools.items(): pool_id = p['pool'] if pool_id not in pool_stats: @@ -493,8 +461,8 @@ class PgAutoscaler(MgrModule): cr_name = crush_rule['rule_name'] root_id = crush_map.get_rule_root(cr_name) assert root_id is not None - if root_id in overlapped_roots and profile == "scale-down": - # for scale-down profile skip pools + if root_id in overlapped_roots: + # skip pools # with overlapping roots self.log.warn("pool %d contains an overlapping root %d" "... skipping scaling", pool_id, root_id) @@ -532,9 +500,17 @@ class PgAutoscaler(MgrModule): root_map[root_id].total_target_bytes, capacity) + # determine if the pool is a bulk + bulk = False + flags = p['flags_names'].split(",") + if "bulk" in flags: + bulk = True + capacity_ratio = max(capacity_ratio, target_ratio) final_ratio, pool_pg_target, final_pg_target = self._calc_final_pg_target( - p, pool_name, root_map, root_id, capacity_ratio, even_pools, bias, is_used, profile) + p, pool_name, root_map, root_id, + capacity_ratio, bias, even_pools, + bulk_pools, func_pass, bulk) if final_ratio is None: continue @@ -567,15 +543,15 @@ class PgAutoscaler(MgrModule): 'pg_num_final': final_pg_target, 'would_adjust': adjust, 'bias': p.get('options', {}).get('pg_autoscale_bias', 1.0), + 'bulk': bulk, }) - return ret, even_pools + return ret, bulk_pools, even_pools def _get_pool_status( self, osdmap: OSDMap, pools: Dict[str, Dict[str, Any]], - profile: 'ScaleModeT', ) -> Tuple[List[Dict[str, Any]], Dict[int, CrushSubtreeResourceStatus]]: threshold = self.threshold @@ -589,19 +565,20 @@ class PgAutoscaler(MgrModule): ret: List[Dict[str, Any]] = [] # Iterate over all pools to determine how they should be sized. - # First call of _calc_pool_targets() is to find/adjust pools that uses more capacaity than + # First call of _get_pool_pg_targets() is to find/adjust pools that uses more capacaity than # the even_ratio of other pools and we adjust those first. # Second call make use of the even_pools we keep track of in the first call. # All we need to do is iterate over those and give them 1/pool_count of the # total pgs. - ret, even_pools = self._calc_pool_targets(osdmap, pools, crush_map, root_map, - pool_stats, ret, threshold, True, profile, overlapped_roots) + ret, bulk_pools, _ = self._get_pool_pg_targets(osdmap, pools, crush_map, root_map, + pool_stats, ret, threshold, 'first', overlapped_roots) + + ret, _, even_pools = self._get_pool_pg_targets(osdmap, bulk_pools, crush_map, root_map, + pool_stats, ret, threshold, 'second', overlapped_roots) - if profile == "scale-down": - # We only have adjust even_pools when we use scale-down profile - ret, _ = self._calc_pool_targets(osdmap, even_pools, crush_map, root_map, - pool_stats, ret, threshold, False, profile, overlapped_roots) + ret, _, _ = self._get_pool_pg_targets(osdmap, even_pools, crush_map, root_map, + pool_stats, ret, threshold, 'third', overlapped_roots) return (ret, root_map) @@ -624,8 +601,7 @@ class PgAutoscaler(MgrModule): if osdmap.get_require_osd_release() < 'nautilus': return pools = osdmap.get_pools_by_name() - profile = self.autoscale_profile - ps, root_map = self._get_pool_status(osdmap, pools, profile) + ps, root_map = self._get_pool_status(osdmap, pools) # Anyone in 'warn', set the health message for them and then # drop them from consideration. -- 2.39.5