]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: Add 'read_ratio' pool parameterr
authorJosh Salomon <41079547+JoshSalomon@users.noreply.github.com>
Tue, 26 Dec 2023 08:41:18 +0000 (10:41 +0200)
committerLaura Flores <lflores@ibm.com>
Tue, 30 Jan 2024 19:24:43 +0000 (19:24 +0000)
This parameter is used for better read balancing with non identical
devices.
- This parameter is controlled using the commands 'ceph osd pool set/get'
- This parameter is applicable only for replicated pools
- Valid values are integers in the range [0..100] and represent the
  percentage of read IOs out of all IOs in the pool
  - Value of 0 unsets this parameter and the value will be the default
    value (this is the generic behavior of the command 'ceph osd pool
    set'
  - default value can be set by config parameter
    `osd_pool_default_read_ratio`

Signed-off-by: Josh Salomon <41079547+JoshSalomon@users.noreply.github.com>
src/common/options/global.yaml.in
src/mon/MonCommands.h
src/mon/OSDMonitor.cc
src/osd/osd_types.cc
src/osd/osd_types.h

index 88c896f902626d5b8d33fe10e95bdc4cb8baebab..cebf59304a6e4fd06c1b75ea439b0276e1590706 100644 (file)
@@ -2550,6 +2550,18 @@ options:
   - mon
   flags:
   - runtime
+- name: osd_pool_default_read_ratio
+  type: uint
+  level: advanced
+  desc: Default read ratio (the percent of read IOs out of all IOs) for a pool.
+  long_desc: Default read ratio (the percent of read IOs out of all IOs) for a pool.
+    applicable to replicated pools only. This value is used to improve read balancing
+    when OSDs have different weights.
+  default: 70
+  services:
+  - mon
+  flags:
+  - runtime
 - name: osd_erasure_code_plugins
   type: str
   level: advanced
index 954dcb077fbcb6745ed5177753943fd51f39d032..14bb3602c9b574033987d9775349ed140c723013 100644 (file)
@@ -1137,11 +1137,11 @@ COMMAND("osd pool rename "
        "rename <srcpool> to <destpool>", "osd", "rw")
 COMMAND("osd pool get "
        "name=pool,type=CephPoolname "
-       "name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_objects|target_max_bytes|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|erasure_code_profile|min_read_recency_for_promote|all|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_autoscale_bias|pg_num_min|pg_num_max|target_size_bytes|target_size_ratio|dedup_tier|dedup_chunk_algorithm|dedup_cdc_chunk_size|eio|bulk",
+       "name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_objects|target_max_bytes|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|erasure_code_profile|min_read_recency_for_promote|all|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_autoscale_bias|pg_num_min|pg_num_max|target_size_bytes|target_size_ratio|dedup_tier|dedup_chunk_algorithm|dedup_cdc_chunk_size|eio|bulk|read_ratio",
        "get pool parameter <var>", "osd", "r")
 COMMAND("osd pool set "
        "name=pool,type=CephPoolname "
-       "name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|pgp_num_actual|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|min_read_recency_for_promote|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_autoscale_bias|pg_num_min|pg_num_max|target_size_bytes|target_size_ratio|dedup_tier|dedup_chunk_algorithm|dedup_cdc_chunk_size|eio|bulk "
+       "name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|pgp_num_actual|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|min_read_recency_for_promote|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_autoscale_bias|pg_num_min|pg_num_max|target_size_bytes|target_size_ratio|dedup_tier|dedup_chunk_algorithm|dedup_cdc_chunk_size|eio|bulk|read_ratio "
        "name=val,type=CephString "
        "name=yes_i_really_mean_it,type=CephBool,req=false",
        "set pool parameter <var> to <val>", "osd", "rw")
index f8e379326f25f4439e685e75cc18fb391c08ab30..37cb3033189fccdc2cf975fa6939e724cfc654eb 100644 (file)
@@ -5398,7 +5398,7 @@ namespace {
     CSUM_TYPE, CSUM_MAX_BLOCK, CSUM_MIN_BLOCK, FINGERPRINT_ALGORITHM,
     PG_AUTOSCALE_MODE, PG_NUM_MIN, TARGET_SIZE_BYTES, TARGET_SIZE_RATIO,
     PG_AUTOSCALE_BIAS, DEDUP_TIER, DEDUP_CHUNK_ALGORITHM, 
-    DEDUP_CDC_CHUNK_SIZE, POOL_EIO, BULK, PG_NUM_MAX };
+    DEDUP_CDC_CHUNK_SIZE, POOL_EIO, BULK, PG_NUM_MAX, READ_RATIO };
 
   std::set<osd_pool_get_choices>
     subtract_second_from_first(const std::set<osd_pool_get_choices>& first,
@@ -6148,7 +6148,8 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op)
       {"dedup_tier", DEDUP_TIER},
       {"dedup_chunk_algorithm", DEDUP_CHUNK_ALGORITHM},
       {"dedup_cdc_chunk_size", DEDUP_CDC_CHUNK_SIZE},
-      {"bulk", BULK}
+      {"bulk", BULK},
+      {"read_ratio", READ_RATIO}
     };
 
     typedef std::set<osd_pool_get_choices> choices_set_t;
@@ -6165,6 +6166,9 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op)
     const choices_set_t ONLY_ERASURE_CHOICES = {
       EC_OVERWRITES, ERASURE_CODE_PROFILE
     };
+    const choices_set_t ONLY_REPLICA_CHOICES = {
+      READ_RATIO
+    };
 
     choices_set_t selected_choices;
     if (var == "all") {
@@ -6182,6 +6186,10 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op)
        selected_choices = subtract_second_from_first(selected_choices,
                                                      ONLY_ERASURE_CHOICES);
       }
+      if(!p->is_replicated()) {
+        selected_choices = subtract_second_from_first(selected_choices,
+                                                     ONLY_REPLICA_CHOICES);
+      }
     } else /* var != "all" */  {
       choices_map_t::const_iterator found = ALL_CHOICES.find(var);
       if (found == ALL_CHOICES.end()) {
@@ -6210,6 +6218,15 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op)
        goto reply;
       }
 
+      if (!p->is_replicated() &&
+         ONLY_REPLICA_CHOICES.find(selected)
+         != ONLY_REPLICA_CHOICES.end()) {
+       ss << "pool '" << poolstr
+          << "' is not a replicated pool: variable not applicable";
+       r = -EACCES;
+       goto reply;
+      }
+
       if (pool_opts_t::is_opt_name(var) &&
          !p->opts.is_set(pool_opts_t::get_opt_desc(var).key)) {
        ss << "option '" << var << "' is not set on pool '" << poolstr << "'";
@@ -6378,6 +6395,7 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op)
          case DEDUP_TIER:
          case DEDUP_CHUNK_ALGORITHM:
          case DEDUP_CDC_CHUNK_SIZE:
+          case READ_RATIO:
             pool_opts_t::key_t key = pool_opts_t::get_opt_desc(i->first).key;
             if (p->opts.is_set(key)) {
               if(*it == CSUM_TYPE) {
@@ -6541,6 +6559,7 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op)
          case DEDUP_TIER:
          case DEDUP_CHUNK_ALGORITHM:
          case DEDUP_CDC_CHUNK_SIZE:
+          case READ_RATIO:
            for (i = ALL_CHOICES.begin(); i != ALL_CHOICES.end(); ++i) {
              if (i->second == *it)
                break;
@@ -8314,6 +8333,11 @@ int OSDMonitor::prepare_command_pool_set(const cmdmap_t& cmdmap,
     return -EACCES;
   }
 
+  if (!p.is_replicated() &&
+      (var == "read_ratio")) {
+    return -EACCES;
+  }
+
   if (var == "size") {
     if (p.has_flag(pg_pool_t::FLAG_NOSIZECHANGE)) {
       ss << "pool size change is disabled; you must unset nosizechange flag for the pool first";
@@ -8948,6 +8972,15 @@ int OSDMonitor::prepare_command_pool_set(const cmdmap_t& cmdmap,
         ss << "error parsing int value '" << val << "': " << interr;
         return -EINVAL;
       }
+    } else if (var == "read_ratio") {
+      if (interr.length()) {
+        ss << "error parsing int value '" << val << "': " << interr;
+        return -EINVAL;
+      }
+      if (n < 0 || n > 100) {
+        ss << "read_ratio must be between 0 and 100";
+        return -ERANGE;
+      }
     }
 
     pool_opts_t::opt_desc_t desc = pool_opts_t::get_opt_desc(var);
index 7596723a0e30d49abbeb2ac3859fb4026eb23251..2af3894f1e7dd12fba7bd398ebf739f556eee0d9 100644 (file)
@@ -1376,7 +1376,9 @@ static opt_mapping_t opt_mapping = boost::assign::map_list_of
            ("dedup_cdc_chunk_size", pool_opts_t::opt_desc_t(
             pool_opts_t::DEDUP_CDC_CHUNK_SIZE, pool_opts_t::INT))
           ("pg_num_max", pool_opts_t::opt_desc_t(
-             pool_opts_t::PG_NUM_MAX, pool_opts_t::INT));
+             pool_opts_t::PG_NUM_MAX, pool_opts_t::INT))
+          ("read_ratio", pool_opts_t::opt_desc_t(
+             pool_opts_t::READ_RATIO, pool_opts_t::INT));
 
 bool pool_opts_t::is_opt_name(const std::string& name)
 {
index 66f39a91ac4d16a92e8349eab6eb63c4b682b04d..8f08e298ee530bd6bdc3fe3c9dde05b8567b6d32 100644 (file)
@@ -1101,6 +1101,7 @@ public:
     DEDUP_CHUNK_ALGORITHM,
     DEDUP_CDC_CHUNK_SIZE,
     PG_NUM_MAX, // max pg_num
+    READ_RATIO, // read ration for the read balancer work [0-100]
   };
 
   enum type_t {