]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: Add EC_OPTIMIZATIONS flag and add ability to switch it on.
authorAlex Ainscow <aainscow@uk.ibm.com>
Tue, 11 Mar 2025 10:59:13 +0000 (10:59 +0000)
committerAlex Ainscow <aainscow@uk.ibm.com>
Wed, 2 Apr 2025 22:34:08 +0000 (23:34 +0100)
This is the basic command. On its own, this does nothing functional, except set
a flag which can be queried.

Signed-off-by: Alex Ainscow <aainscow@uk.ibm.com>
src/mon/MonCommands.h
src/mon/OSDMonitor.cc
src/osd/osd_types.h

index fc32051badcecbff0ebd70b25a2937a82d51cc51..91dd806c6ec174692b267f8a23134342cbacc644 100644 (file)
@@ -1170,11 +1170,11 @@ COMMAND("osd pool rename "
        "rename <srcpool> to <destpool>", "osd", "rw")
 COMMAND("osd pool get "
        "name=pool,type=CephPoolname "
-       "name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_objects|target_max_bytes|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|erasure_code_profile|min_read_recency_for_promote|all|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_autoscale_bias|pg_num_min|pg_num_max|target_size_bytes|target_size_ratio|dedup_tier|dedup_chunk_algorithm|dedup_cdc_chunk_size|eio|bulk|read_ratio|pct_update_delay",
+       "name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_objects|target_max_bytes|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|erasure_code_profile|min_read_recency_for_promote|all|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_autoscale_bias|pg_num_min|pg_num_max|target_size_bytes|target_size_ratio|dedup_tier|dedup_chunk_algorithm|dedup_cdc_chunk_size|eio|bulk|read_ratio|pct_update_delay|allow_ec_optimizations",
        "get pool parameter <var>", "osd", "r")
 COMMAND("osd pool set "
        "name=pool,type=CephPoolname "
-       "name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|pgp_num_actual|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|min_read_recency_for_promote|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_autoscale_bias|pg_num_min|pg_num_max|target_size_bytes|target_size_ratio|dedup_tier|dedup_chunk_algorithm|dedup_cdc_chunk_size|eio|bulk|read_ratio|pct_update_delay "
+       "name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|pgp_num_actual|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|min_read_recency_for_promote|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_autoscale_bias|pg_num_min|pg_num_max|target_size_bytes|target_size_ratio|dedup_tier|dedup_chunk_algorithm|dedup_cdc_chunk_size|eio|bulk|read_ratio|pct_update_delay|allow_ec_optimizations "
        "name=val,type=CephString "
        "name=yes_i_really_mean_it,type=CephBool,req=false",
        "set pool parameter <var> to <val>", "osd", "rw")
index 928492fb2d092d636f467204d97fff6d9f70fc4e..495a78e4293b21c06d2647913ab042ab6df804e8 100644 (file)
@@ -5410,7 +5410,8 @@ namespace {
     CSUM_TYPE, CSUM_MAX_BLOCK, CSUM_MIN_BLOCK, FINGERPRINT_ALGORITHM,
     PG_AUTOSCALE_MODE, PG_NUM_MIN, TARGET_SIZE_BYTES, TARGET_SIZE_RATIO,
     PG_AUTOSCALE_BIAS, DEDUP_TIER, DEDUP_CHUNK_ALGORITHM, 
-    DEDUP_CDC_CHUNK_SIZE, POOL_EIO, BULK, PG_NUM_MAX, READ_RATIO };
+    DEDUP_CDC_CHUNK_SIZE, POOL_EIO, BULK, PG_NUM_MAX, READ_RATIO,
+    EC_OPTIMIZATIONS };
 
   std::set<osd_pool_get_choices>
     subtract_second_from_first(const std::set<osd_pool_get_choices>& first,
@@ -6215,7 +6216,8 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op)
       {"dedup_chunk_algorithm", DEDUP_CHUNK_ALGORITHM},
       {"dedup_cdc_chunk_size", DEDUP_CDC_CHUNK_SIZE},
       {"bulk", BULK},
-      {"read_ratio", READ_RATIO}
+      {"read_ratio", READ_RATIO},
+      {"allow_ec_optimizations", EC_OPTIMIZATIONS}
     };
 
     typedef std::set<osd_pool_get_choices> choices_set_t;
@@ -6230,7 +6232,7 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op)
       HIT_SET_GRADE_DECAY_RATE, HIT_SET_SEARCH_LAST_N
     };
     const choices_set_t ONLY_ERASURE_CHOICES = {
-      EC_OVERWRITES, ERASURE_CODE_PROFILE
+      EC_OVERWRITES, ERASURE_CODE_PROFILE, EC_OPTIMIZATIONS
     };
     const choices_set_t ONLY_REPLICA_CHOICES = {
       READ_RATIO
@@ -6462,17 +6464,23 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op)
          case DEDUP_CHUNK_ALGORITHM:
          case DEDUP_CDC_CHUNK_SIZE:
           case READ_RATIO:
-            pool_opts_t::key_t key = pool_opts_t::get_opt_desc(i->first).key;
-            if (p->opts.is_set(key)) {
-              if(*it == CSUM_TYPE) {
-                int64_t val;
-                p->opts.get(pool_opts_t::CSUM_TYPE, &val);
-                f->dump_string(i->first.c_str(), Checksummer::get_csum_type_string(val));
-              } else {
-                p->opts.dump(i->first, f.get());
-              }
+           {
+             pool_opts_t::key_t key = pool_opts_t::get_opt_desc(i->first).key;
+             if (p->opts.is_set(key)) {
+               if(*it == CSUM_TYPE) {
+                 int64_t val;
+                 p->opts.get(pool_opts_t::CSUM_TYPE, &val);
+                 f->dump_string(i->first.c_str(), Checksummer::get_csum_type_string(val));
+               } else {
+                 p->opts.dump(i->first, f.get());
+               }
+             }
            }
             break;
+         case EC_OPTIMIZATIONS:
+           f->dump_bool("allow_ec_optimizations",
+                        p->has_flag(pg_pool_t::FLAG_EC_OPTIMIZATIONS));
+           break;
        }
       }
       f->close_section();
@@ -6644,6 +6652,11 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op)
              }
            }
            break;
+         case EC_OPTIMIZATIONS:
+           ss << "allow_ec_optimizations: " <<
+             (p->has_flag(pg_pool_t::FLAG_EC_OPTIMIZATIONS) ? "true" : "false") <<
+             "\n";
+           break;
        }
        rdata.append(ss.str());
        ss.str("");
@@ -8796,8 +8809,31 @@ int OSDMonitor::prepare_command_pool_set(const cmdmap_t& cmdmap,
     if (val == "true" || (interr.empty() && n == 1)) {
        p.flags |= pg_pool_t::FLAG_EC_OVERWRITES;
     } else if (val == "false" || (interr.empty() && n == 0)) {
-      ss << "ec overwrites cannot be disabled once enabled";
+      if ((p.flags & pg_pool_t::FLAG_EC_OVERWRITES) != 0) {
+       ss << "ec overwrites cannot be disabled once enabled";
+       return -EINVAL;
+      }
+    } else {
+      ss << "expecting value 'true', 'false', '0', or '1'";
       return -EINVAL;
+    }
+  } else if (var == "allow_ec_optimizations") {
+    if (!p.is_erasure()) {
+      ss << "allow_ec_optimizations can only be enabled for an erasure coded pool";
+      return -EINVAL;
+    }
+    if (osdmap.require_osd_release < ceph_release_t::tentacle) {
+      ss << "All OSDs must be upgraded to tentacle or "
+           << "later before setting allow_ec_optimizations";
+        return -EINVAL;
+      }
+    if (val == "true" || (interr.empty() && n == 1)) {
+      p.flags |= pg_pool_t::FLAG_EC_OPTIMIZATIONS;
+    } else if (val == "false" || (interr.empty() && n == 0)) {
+      if ((p.flags & pg_pool_t::FLAG_EC_OPTIMIZATIONS) != 0) {
+       ss << "allow_ec_optimizations cannot be disabled once enabled";
+       return -EINVAL;
+      }
     } else {
       ss << "expecting value 'true', 'false', '0', or '1'";
       return -EINVAL;
index c654cb71cd685b74e4bda3a946d8bc0f8f358266..e30f160b7cb53a3b63ccb25ff41ee75f37baddb1 100644 (file)
@@ -1297,6 +1297,7 @@ struct pg_pool_t {
     // Pool features are restricted to those supported by crimson-osd.
     // Note, does not prohibit being created on classic osd.
     FLAG_CRIMSON = 1<<18,
+    FLAG_EC_OPTIMIZATIONS = 1<<19, // enable optimizations, once enabled, cannot be disabled
   };
 
   static const char *get_flag_name(uint64_t f) {
@@ -1320,6 +1321,7 @@ struct pg_pool_t {
     case FLAG_EIO: return "eio";
     case FLAG_BULK: return "bulk";
     case FLAG_CRIMSON: return "crimson";
+    case FLAG_EC_OPTIMIZATIONS: return "ec_optimizations";
     default: return "???";
     }
   }
@@ -1376,6 +1378,8 @@ struct pg_pool_t {
       return FLAG_BULK;
     if (name == "crimson")
       return FLAG_CRIMSON;
+    if (name == "ec_optimizations")
+      return FLAG_EC_OPTIMIZATIONS;
     return 0;
   }
 
@@ -1790,6 +1794,10 @@ public:
     return has_flag(FLAG_EC_OVERWRITES);
   }
 
+  bool allows_ecoptimizations() const {
+    return has_flag(FLAG_EC_OPTIMIZATIONS);
+  }
+
   bool is_crimson() const {
     return has_flag(FLAG_CRIMSON);
   }