From 884546399589ba619eaebec248edc23cd2cb5dfd Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 26 Nov 2018 17:32:55 -0600 Subject: [PATCH] osd/osd_types: add pool pg_autoscale_mode, pg_num_min, target_size_{bytes,ratio} properties These will direct PG autoscaling. Signed-off-by: Sage Weil --- src/mon/MonCommands.h | 4 ++-- src/mon/OSDMonitor.cc | 39 ++++++++++++++++++++++++++++++++++++++- src/osd/osd_types.cc | 16 +++++++++++++++- src/osd/osd_types.h | 30 ++++++++++++++++++++++++++++++ 4 files changed, 85 insertions(+), 4 deletions(-) diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h index 184a5aed114..4606ced6cd2 100644 --- a/src/mon/MonCommands.h +++ b/src/mon/MonCommands.h @@ -963,11 +963,11 @@ COMMAND("osd pool rename " \ "rename to ", "osd", "rw") COMMAND("osd pool get " \ "name=pool,type=CephPoolname " \ - "name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_objects|target_max_bytes|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|erasure_code_profile|min_read_recency_for_promote|all|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm", \ + "name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_objects|target_max_bytes|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|erasure_code_profile|min_read_recency_for_promote|all|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_num_min|target_size_bytes|target_size_ratio", \ "get pool parameter ", "osd", "r") COMMAND("osd pool set " \ "name=pool,type=CephPoolname " \ - "name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|min_read_recency_for_promote|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm " \ + "name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|min_read_recency_for_promote|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_num_min|target_size_bytes|target_size_ratio " \ "name=val,type=CephString " \ "name=yes_i_really_mean_it,type=CephBool,req=false", \ "set pool parameter to ", "osd", "rw") diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index b9a64849d1b..4cebaecbfb0 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -4499,7 +4499,8 @@ namespace { RECOVERY_PRIORITY, RECOVERY_OP_PRIORITY, SCRUB_PRIORITY, COMPRESSION_MODE, COMPRESSION_ALGORITHM, COMPRESSION_REQUIRED_RATIO, COMPRESSION_MAX_BLOB_SIZE, COMPRESSION_MIN_BLOB_SIZE, - CSUM_TYPE, CSUM_MAX_BLOCK, CSUM_MIN_BLOCK, FINGERPRINT_ALGORITHM }; + CSUM_TYPE, CSUM_MAX_BLOCK, CSUM_MIN_BLOCK, FINGERPRINT_ALGORITHM, + PG_AUTOSCALE_MODE, PG_NUM_MIN, TARGET_SIZE_BYTES, TARGET_SIZE_RATIO }; std::set subtract_second_from_first(const std::set& first, @@ -5107,6 +5108,10 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op) {"csum_max_block", CSUM_MAX_BLOCK}, {"csum_min_block", CSUM_MIN_BLOCK}, {"fingerprint_algorithm", FINGERPRINT_ALGORITHM}, + {"pg_autoscale_mode", PG_AUTOSCALE_MODE}, + {"pg_num_min", PG_NUM_MIN}, + {"target_size_bytes", TARGET_SIZE_BYTES}, + {"target_size_ratio", TARGET_SIZE_RATIO}, }; typedef std::set choices_set_t; @@ -5209,6 +5214,11 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op) f->dump_bool("allow_ec_overwrites", p->has_flag(pg_pool_t::FLAG_EC_OVERWRITES)); break; + case PG_AUTOSCALE_MODE: + f->dump_string("pg_autoscale_mode", + pg_pool_t::get_pg_autoscale_mode_name( + p->pg_autoscale_mode)); + break; case HASHPSPOOL: case NODELETE: case NOPGCHANGE: @@ -5314,6 +5324,9 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op) case CSUM_MAX_BLOCK: case CSUM_MIN_BLOCK: case FINGERPRINT_ALGORITHM: + case PG_NUM_MIN: + case TARGET_SIZE_BYTES: + case TARGET_SIZE_RATIO: pool_opts_t::key_t key = pool_opts_t::get_opt_desc(i->first).key; if (p->opts.is_set(key)) { if(*it == CSUM_TYPE) { @@ -5354,6 +5367,10 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op) ss << "crush_rule: " << p->get_crush_rule() << "\n"; } break; + case PG_AUTOSCALE_MODE: + ss << "pg_autoscale_mode: " << pg_pool_t::get_pg_autoscale_mode_name( + p->pg_autoscale_mode); + break; case HIT_SET_PERIOD: ss << "hit_set_period: " << p->hit_set_period << "\n"; break; @@ -5463,6 +5480,9 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op) case CSUM_MAX_BLOCK: case CSUM_MIN_BLOCK: case FINGERPRINT_ALGORITHM: + case PG_NUM_MIN: + case TARGET_SIZE_BYTES: + case TARGET_SIZE_RATIO: for (i = ALL_CHOICES.begin(); i != ALL_CHOICES.end(); ++i) { if (i->second == *it) break; @@ -7080,6 +7100,13 @@ int OSDMonitor::prepare_command_pool_set(const cmdmap_t& cmdmap, return -EINVAL; } p.set_pgp_num_target(n); + } else if (var == "pg_autoscale_mode") { + n = pg_pool_t::get_pg_autoscale_mode_by_name(val); + if (n < 0) { + ss << "specified invalid mode " << val; + return -EINVAL; + } + p.pg_autoscale_mode = n; } else if (var == "crush_rule") { int id = osdmap.crush->get_rule_id(val); if (id == -ENOENT) { @@ -7357,6 +7384,16 @@ int OSDMonitor::prepare_command_pool_set(const cmdmap_t& cmdmap, return -EINVAL; } } + } else if (var == "pg_num_min") { + if (interr.length()) { + ss << "error parsing int value '" << val << "': " << interr; + return -EINVAL; + } + if (n > (int)p.get_pg_num_target()) { + ss << "specified pg_num_min " << n + << " > pg_num " << p.get_pg_num_target(); + return -EINVAL; + } } pool_opts_t::opt_desc_t desc = pool_opts_t::get_opt_desc(var); diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index 2b548536800..102c3a80dff 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -1110,7 +1110,13 @@ static opt_mapping_t opt_mapping = boost::assign::map_list_of ("csum_min_block", pool_opts_t::opt_desc_t( pool_opts_t::CSUM_MIN_BLOCK, pool_opts_t::INT)) ("fingerprint_algorithm", pool_opts_t::opt_desc_t( - pool_opts_t::FINGERPRINT_ALGORITHM, pool_opts_t::STR)); + pool_opts_t::FINGERPRINT_ALGORITHM, pool_opts_t::STR)) + ("pg_num_min", pool_opts_t::opt_desc_t( + pool_opts_t::PG_NUM_MIN, pool_opts_t::INT)) + ("target_size_bytes", pool_opts_t::opt_desc_t( + pool_opts_t::TARGET_SIZE_BYTES, pool_opts_t::INT)) + ("target_size_ratio", pool_opts_t::opt_desc_t( + pool_opts_t::TARGET_SIZE_RATIO, pool_opts_t::DOUBLE)); bool pool_opts_t::is_opt_name(const std::string& name) { return opt_mapping.count(name); @@ -1273,6 +1279,8 @@ void pg_pool_t::dump(Formatter *f) const f->dump_int("min_size", get_min_size()); f->dump_int("crush_rule", get_crush_rule()); f->dump_int("object_hash", get_object_hash()); + f->dump_string("pg_autoscale_mode", + get_pg_autoscale_mode_name(pg_autoscale_mode)); f->dump_unsigned("pg_num", get_pg_num()); f->dump_unsigned("pg_placement_num", get_pgp_num()); f->dump_unsigned("pg_placement_num_target", get_pgp_num_target()); @@ -1786,6 +1794,7 @@ void pg_pool_t::encode(bufferlist& bl, uint64_t features) const encode(pg_num_dec_last_epoch_started, bl); encode(pg_num_dec_last_epoch_clean, bl); encode(last_force_op_resend, bl); + encode(pg_autoscale_mode, bl); } ENCODE_FINISH(bl); } @@ -1952,11 +1961,13 @@ void pg_pool_t::decode(bufferlist::const_iterator& bl) decode(pg_num_dec_last_epoch_started, bl); decode(pg_num_dec_last_epoch_clean, bl); decode(last_force_op_resend, bl); + decode(pg_autoscale_mode, bl); } else { pg_num_target = pg_num; pgp_num_target = pgp_num; pg_num_pending = pg_num; last_force_op_resend = last_force_op_resend_prenautilus; + pg_autoscale_mode = PG_AUTOSCALE_MODE_WARN; // default to warn on upgrade } DECODE_FINISH(bl); calc_pg_masks(); @@ -2055,6 +2066,9 @@ ostream& operator<<(ostream& out, const pg_pool_t& p) out << " dles/c " << p.get_pg_num_dec_last_epoch_started() << "/" << p.get_pg_num_dec_last_epoch_clean(); } + if (p.pg_autoscale_mode) { + out << " autoscale_mode " << p.get_pg_autoscale_mode_name(p.pg_autoscale_mode); + } out << " last_change " << p.get_last_change(); if (p.get_last_force_op_resend() || p.get_last_force_op_resend_prenautilus() || diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 0a82a0ac356..3b48e4d567e 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -1007,6 +1007,9 @@ public: CSUM_MAX_BLOCK, CSUM_MIN_BLOCK, FINGERPRINT_ALGORITHM, + PG_NUM_MIN, // min pg_num + TARGET_SIZE_BYTES, // total bytes in pool + TARGET_SIZE_RATIO, // fraction of total cluster }; enum type_t { @@ -1244,12 +1247,39 @@ struct pg_pool_t { } } + enum { + PG_AUTOSCALE_MODE_OFF = 0, + PG_AUTOSCALE_MODE_WARN = 1, + PG_AUTOSCALE_MODE_ON = 2, + }; + static const char *get_pg_autoscale_mode_name(int m) { + switch (m) { + case PG_AUTOSCALE_MODE_OFF: return "off"; + case PG_AUTOSCALE_MODE_ON: return "on"; + case PG_AUTOSCALE_MODE_WARN: return "warn"; + default: return "???"; + } + } + static int get_pg_autoscale_mode_by_name(const string& m) { + if (m == "off") { + return PG_AUTOSCALE_MODE_OFF; + } + if (m == "warn") { + return PG_AUTOSCALE_MODE_WARN; + } + if (m == "on") { + return PG_AUTOSCALE_MODE_ON; + } + return -1; + } + utime_t create_time; uint64_t flags; ///< FLAG_* __u8 type; ///< TYPE_* __u8 size, min_size; ///< number of osds in each pg __u8 crush_rule; ///< crush placement rule __u8 object_hash; ///< hash mapping object name to ps + __u8 pg_autoscale_mode; ///< PG_AUTOSCALE_MODE_ private: __u32 pg_num = 0, pgp_num = 0; ///< number of pgs __u32 pg_num_pending = 0; ///< pg_num we are about to merge down to -- 2.39.5