Introduced a "supports_omap" pool flag which is always enabled for Replicated pools and currently always disabled for EC pools.
Introduced wrappers around omap read operations in PGBackend to include updates from the journal in EC pools with optimisations enabled.
Introduced a function for encoding an EC_OMAP operation in the ObjectModDesc::Visitor class and a function for committing an operation in the Trimmer struct.
Signed-off-by: Matty Williams <Matty.Williams@ibm.com>
"rename <srcpool> to <destpool>", "osd", "rw")
COMMAND("osd pool get "
"name=pool,type=CephPoolname "
- "name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_objects|target_max_bytes|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|erasure_code_profile|min_read_recency_for_promote|all|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_autoscale_bias|pg_num_min|pg_num_max|target_size_bytes|target_size_ratio|dedup_tier|dedup_chunk_algorithm|dedup_cdc_chunk_size|eio|bulk|read_ratio|pct_update_delay|allow_ec_optimizations|ec_data_shard_count|ec_coding_shard_count",
+ "name=var,type=CephChoices,strings=all"
+ "|allow_ec_optimizations"
+ "|allow_ec_overwrites"
+ "|bulk"
+ "|cache_min_evict_age"
+ "|cache_min_flush_age"
+ "|cache_target_dirty_high_ratio"
+ "|cache_target_dirty_ratio"
+ "|cache_target_full_ratio"
+ "|compression_algorithm"
+ "|compression_max_blob_size"
+ "|compression_min_blob_size"
+ "|compression_mode"
+ "|compression_required_ratio"
+ "|crush_rule"
+ "|csum_max_block"
+ "|csum_min_block"
+ "|csum_type"
+ "|deep_scrub_interval"
+ "|dedup_cdc_chunk_size"
+ "|dedup_chunk_algorithm"
+ "|dedup_tier"
+ "|ec_coding_shard_count"
+ "|ec_data_shard_count"
+ "|eio"
+ "|erasure_code_profile"
+ "|fast_read"
+ "|fingerprint_algorithm"
+ "|hashpspool"
+ "|hit_set_count"
+ "|hit_set_fpp"
+ "|hit_set_grade_decay_rate"
+ "|hit_set_period"
+ "|hit_set_search_last_n"
+ "|hit_set_type"
+ "|min_read_recency_for_promote"
+ "|min_size"
+ "|min_write_recency_for_promote"
+ "|nodeep-scrub"
+ "|nodelete"
+ "|nopgchange"
+ "|noscrub"
+ "|nosizechange"
+ "|pct_update_delay"
+ "|pg_autoscale_bias"
+ "|pg_autoscale_mode"
+ "|pg_num"
+ "|pg_num_max"
+ "|pg_num_min"
+ "|pgp_num"
+ "|read_ratio"
+ "|recovery_op_priority"
+ "|recovery_priority"
+ "|scrub_max_interval"
+ "|scrub_min_interval"
+ "|scrub_priority"
+ "|size"
+ "|supports_omap"
+ "|target_max_bytes"
+ "|target_max_objects"
+ "|target_size_bytes"
+ "|target_size_ratio"
+ "|use_gmt_hitset"
+ "|write_fadvise_dontneed",
"get pool parameter <var>", "osd", "r")
COMMAND("osd pool set "
"name=pool,type=CephPoolname "
- "name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|pgp_num_actual|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|min_read_recency_for_promote|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_autoscale_bias|pg_num_min|pg_num_max|target_size_bytes|target_size_ratio|dedup_tier|dedup_chunk_algorithm|dedup_cdc_chunk_size|eio|bulk|read_ratio|pct_update_delay|allow_ec_optimizations|set_pool_flags|unset_pool_flags "
+ "name=var,type=CephChoices,strings=allow_ec_optimizations"
+ "|allow_ec_overwrites"
+ "|bulk"
+ "|cache_min_evict_age"
+ "|cache_min_flush_age"
+ "|cache_target_dirty_high_ratio"
+ "|cache_target_dirty_ratio"
+ "|cache_target_full_ratio"
+ "|compression_algorithm"
+ "|compression_max_blob_size"
+ "|compression_min_blob_size"
+ "|compression_mode"
+ "|compression_required_ratio"
+ "|crush_rule"
+ "|csum_max_block"
+ "|csum_min_block"
+ "|csum_type"
+ "|deep_scrub_interval"
+ "|dedup_cdc_chunk_size"
+ "|dedup_chunk_algorithm"
+ "|dedup_tier"
+ "|eio"
+ "|fast_read"
+ "|fingerprint_algorithm"
+ "|hashpspool"
+ "|hit_set_count"
+ "|hit_set_fpp"
+ "|hit_set_grade_decay_rate"
+ "|hit_set_period"
+ "|hit_set_search_last_n"
+ "|hit_set_type"
+ "|min_read_recency_for_promote"
+ "|min_size"
+ "|min_write_recency_for_promote"
+ "|nodeep-scrub"
+ "|nodelete"
+ "|nopgchange"
+ "|noscrub"
+ "|nosizechange"
+ "|pct_update_delay"
+ "|pg_autoscale_bias"
+ "|pg_autoscale_mode"
+ "|pg_num"
+ "|pg_num_max"
+ "|pg_num_min"
+ "|pgp_num"
+ "|pgp_num_actual"
+ "|read_ratio"
+ "|recovery_op_priority"
+ "|recovery_priority"
+ "|scrub_max_interval"
+ "|scrub_min_interval"
+ "|scrub_priority"
+ "|set_pool_flags"
+ "|size"
+ "|supports_omap"
+ "|target_max_bytes"
+ "|target_max_objects"
+ "|target_size_bytes"
+ "|target_size_ratio"
+ "|unset_pool_flags"
+ "|use_gmt_hitset"
+ "|write_fadvise_dontneed "
"name=val,type=CephString "
"name=yes_i_really_mean_it,type=CephBool,req=false",
"set pool parameter <var> to <val>", "osd", "rw")
maybe_enable_pool_split_ops(pending_inc.new_pools[id]);
}
}
+
+ // Auto-enable omap support for replicated pools
+ for (auto& [pool_id, pool] : tmp.get_pools()) {
+ if (!pool.has_flag(pg_pool_t::FLAG_OMAP) && pool.is_replicated()) {
+ pg_pool_t p = pool;
+ p.flags |= pg_pool_t::FLAG_OMAP;
+ pending_inc.new_pools[pool_id] = p;
+ dout(10) << __func__ << " replicated pool " << pool_id
+ << " has OMAP support auto-enabled" << dendl;
+ }
+ }
}
}
PG_AUTOSCALE_MODE, PG_NUM_MIN, TARGET_SIZE_BYTES, TARGET_SIZE_RATIO,
PG_AUTOSCALE_BIAS, DEDUP_TIER, DEDUP_CHUNK_ALGORITHM,
DEDUP_CDC_CHUNK_SIZE, POOL_EIO, BULK, PG_NUM_MAX, READ_RATIO,
- EC_OPTIMIZATIONS, EC_DATA_SHARD_COUNT, EC_CODING_SHARD_COUNT };
+ EC_OPTIMIZATIONS, EC_DATA_SHARD_COUNT, EC_CODING_SHARD_COUNT,
+ SUPPORTS_OMAP };
std::set<osd_pool_get_choices>
subtract_second_from_first(const std::set<osd_pool_get_choices>& first,
{"allow_ec_optimizations", EC_OPTIMIZATIONS},
{"ec_data_shard_count", EC_DATA_SHARD_COUNT},
{"ec_coding_shard_count", EC_CODING_SHARD_COUNT},
+ {"supports_omap", SUPPORTS_OMAP},
};
typedef std::set<osd_pool_get_choices> choices_set_t;
f->dump_unsigned("ec_coding_shard_count",
p->ec_coding_shard_count.value_or(0));
break;
+ case SUPPORTS_OMAP:
+ f->dump_bool("supports_omap", p->supports_omap());
+ break;
}
}
f->close_section();
<< static_cast<unsigned int>(p->ec_coding_shard_count.value_or(0))
<< "\n";
break;
+ case SUPPORTS_OMAP:
+ ss << "supports_omap: " <<
+ (p->supports_omap() ? "true" : "false") << "\n";
+ break;
}
rdata.append(ss.str());
ss.str("");
if (crimson) {
pi->set_flag(pg_pool_t::FLAG_CRIMSON);
}
+ if (pool_type == pg_pool_t::TYPE_REPLICATED
+ && osdmap.require_osd_release >= ceph_release_t::umbrella) {
+ pi->set_flag(pg_pool_t::FLAG_OMAP);
+ }
pi->size = size;
pi->min_size = min_size;
} else {
p.unset_flag(n);
}
+ } else if (var == "supports_omap") {
+ if ((val == "true") && osdmap.require_osd_release < ceph_release_t::umbrella) {
+ ss << "supports_omap cannot be enabled until require_osd_release is set to umbrella or later";
+ return -EPERM;
+ }
+ // Disabling omap support will leave omap data in RocksDB which cannot be cleaned up
+ // It will also break any services that depend on this pool to store metadata
+ if ((val == "false") && (p.has_flag(pg_pool_t::FLAG_OMAP))) {
+ ss << "supports_omap cannot be disabled once enabled";
+ return -EINVAL;
+ }
+ // This restriction is temporary until omap support is well tested in Fast EC pools
+ if ((val == "true") && p.is_erasure()) {
+ ss << "supports_omap cannot be enabled in ec pools";
+ return -EINVAL;
+ }
+ if (val == "true") {
+ p.flags |= pg_pool_t::FLAG_OMAP;
+ }
} else if (var == "target_max_objects") {
if (interr.length()) {
ss << "error parsing int '" << val << "': " << interr;
o.omap_digest_present = true;
return 0;
}
+
+bool ECBackend::remove_ec_omap_journal_entry(const hobject_t &hoid, const ECOmapJournalEntry &entry) {
+ return ec_omap_journal.remove_entry(hoid, entry);
+}
+
+std::pair<gen_t, bool> ECBackend::omap_get_generation(const hobject_t& hoid) {
+ return ec_omap_journal.get_generation(hoid);
+}
+
+void ECBackend::omap_trim_delete_from_journal(const hobject_t &hoid, const version_t version) {
+ return ec_omap_journal.trim_delete(hoid, version);
+}
+
+int ECBackend::omap_iterate (
+ ObjectStore::CollectionHandle &c_, ///< [in] collection
+ const ghobject_t &oid, ///< [in] object
+ const ObjectStore::omap_iter_seek_t &start_from,
+ ///^ [in] where the iterator should point to at the beginning
+ const OmapIterFunction &f, ///< [in] function to call for each key/value pair
+ ObjectStore *store
+) {
+ // Updates in update_map take priority over removed_ranges
+ auto [update_map, removed_ranges] = ec_omap_journal.get_value_updates(oid.hobj);
+
+ auto journal_it = update_map.begin();
+ if (!start_from.seek_position.empty()) {
+ journal_it = update_map.lower_bound(start_from.seek_position);
+ }
+
+ auto wrapper = [&](const std::string_view store_key, const std::string_view store_value) {
+ bool found_store_key_in_journal = false;
+
+ while (journal_it != update_map.end() && journal_it->first <= store_key) {
+ if (journal_it->first == store_key) {
+ found_store_key_in_journal = true;
+ }
+ if (journal_it->second.value.has_value()) {
+ ObjectStore::omap_iter_ret_t r = f(
+ journal_it->first,
+ std::string_view(
+ journal_it->second.value->c_str(),
+ journal_it->second.value->length()
+ )
+ );
+ if (r == ObjectStore::omap_iter_ret_t::STOP) {
+ return r;
+ }
+ }
+ ++journal_it;
+ }
+
+ if (found_store_key_in_journal) {
+ return ObjectStore::omap_iter_ret_t::NEXT;
+ }
+
+ if (should_be_removed(removed_ranges, store_key)) {
+ return ObjectStore::omap_iter_ret_t::NEXT;
+ }
+
+ return f(store_key, store_value);
+ };
+
+ if (const auto result = store->omap_iterate(c_, oid, start_from, wrapper);
+ result < 0) {
+ return result;
+ } else if (result > 0) {
+ return 1;
+ }
+
+ auto ret = ObjectStore::omap_iter_ret_t::NEXT;
+ while (journal_it != update_map.end()) {
+ if (journal_it->second.value.has_value()) {
+ ret = f(journal_it->first, std::string_view(journal_it->second.value->c_str(), journal_it->second.value->length()));
+ if (ret == ObjectStore::omap_iter_ret_t::STOP) {
+ break;
+ }
+ }
+ ++journal_it;
+ }
+
+ return ret == ObjectStore::omap_iter_ret_t::STOP ? 1 : 0;
+}
+
+int ECBackend::omap_get_values(
+ ObjectStore::CollectionHandle &c_, ///< [in] collection
+ const ghobject_t &oid, ///< [in] object
+ const std::set<std::string> &keys, ///< [in] keys to get
+ std::map<std::string, ceph::buffer::list> *out, ///< [out] returned key/values
+ ObjectStore *store
+) {
+ auto [update_map, removed_ranges] = ec_omap_journal.get_value_updates(oid.hobj);
+
+ set<string> keys_still_to_get;
+ for (auto &key : keys) {
+ if (auto it = update_map.find(key);
+ it != update_map.end()) {
+ if (!it->second.value.has_value()) {
+ continue;
+ }
+ (*out)[key] = *(it->second.value);
+ } else if (should_be_removed(removed_ranges, key)) {
+ continue;
+ } else {
+ keys_still_to_get.insert(key);
+ }
+ }
+ store->omap_get_values(c_, oid, keys_still_to_get, out);
+
+ return 0;
+}
+
+int ECBackend::omap_get_header(
+ ObjectStore::CollectionHandle &c_, ///< [in] Collection containing oid
+ const ghobject_t &oid, ///< [in] Object containing omap
+ ceph::buffer::list *header, ///< [out] omap header
+ const bool allow_eio, ///< [in] don't assert on eio
+ ObjectStore *store
+) {
+ std::optional<ceph::buffer::list> header_from_journal = ec_omap_journal.get_updated_header(oid.hobj);
+ if (header_from_journal) {
+ *header = *header_from_journal;
+ } else {
+ store->omap_get_header(c_, oid, header, allow_eio);
+ }
+ return 0;
+}
+
+int ECBackend::omap_get(
+ ObjectStore::CollectionHandle &c_, ///< [in] Collection containing oid
+ const ghobject_t &oid, ///< [in] Object containing omap
+ ceph::buffer::list *header, ///< [out] omap header
+ std::map<std::string, ceph::buffer::list> *out, /// < [out] Key to value map
+ ObjectStore *store
+) {
+ // Update map takes priority over removed_ranges
+ auto [update_map, removed_ranges] = ec_omap_journal.get_value_updates(oid.hobj);
+ const auto updated_header = ec_omap_journal.get_updated_header(oid.hobj);
+
+ if (const int r = store->omap_get(c_, oid, header, out);
+ r < 0) {
+ return r;
+ }
+
+ // Update header if present
+ if (updated_header) {
+ *header = *updated_header;
+ }
+
+ // Remove keys in removed_ranges
+ for (auto out_it = out->begin(); out_it != out->end(); ++out_it) {
+ if (should_be_removed(removed_ranges, out_it->first)) {
+ out->erase(out_it->first);
+ }
+ }
+
+ // Apply updates in update_map
+ for (const auto &[key, val_opt] : update_map) {
+ if (val_opt.value.has_value()) {
+ (*out)[key] = *(val_opt.value);
+ } else {
+ out->erase(key);
+ }
+ }
+
+ return 0;
+}
+
+int ECBackend::omap_check_keys(
+ ObjectStore::CollectionHandle &c_, ///< [in] Collection containing oid
+ const ghobject_t &oid, ///< [in] Object containing omap
+ const std::set<std::string> &keys, ///< [in] Keys to check
+ std::set<std::string> *out, ///< [out] Subset of keys defined on oid
+ ObjectStore *store
+) {
+ // Update map takes priority over removed_ranges
+ auto [update_map, removed_ranges] = ec_omap_journal.get_value_updates(oid.hobj);
+ auto updated_header = ec_omap_journal.get_updated_header(oid.hobj);
+
+ // First check keys in update_map and removed_ranges
+ set<string> keys_to_check_on_disk;
+ for (const auto &key : keys) {
+ if (auto it = update_map.find(key);
+ it != update_map.end()) {
+ if (it->second.value.has_value()) {
+ out->insert(key);
+ }
+ } else if (should_be_removed(removed_ranges, key)) {
+ continue;
+ } else {
+ keys_to_check_on_disk.insert(key);
+ }
+ }
+
+ const int r = store->omap_check_keys(c_, oid, keys_to_check_on_disk, out);
+
+ return r;
+}
+
+bool ECBackend::should_be_removed(
+ const std::map<std::string, std::optional<std::string>>& removed_ranges,
+ const std::string_view key) {
+ if (removed_ranges.empty()) {
+ return false;
+ }
+
+ // Find range that comes after this key
+ auto it = removed_ranges.upper_bound(std::string(key));
+
+ // If all ranges start after the key, it can't be in any range
+ if (it == removed_ranges.begin()) {
+ return false;
+ }
+
+ // Go back to the previous range
+ --it;
+ // If this range contains the key, return true
+ const auto& end_opt = it->second;
+ if (!end_opt || key < *end_opt) {
+ return true;
+ }
+
+ // No ranges contain the key, return false
+ return false;
+}
}
return object_size_to_shard_size(logical_size, shard_id);
}
+
+ bool remove_ec_omap_journal_entry(const hobject_t &hoid, const ECOmapJournalEntry &entry);
+ std::pair<gen_t, bool> omap_get_generation(const hobject_t &hoid);
+ void omap_trim_delete_from_journal(const hobject_t &hoid, const version_t version);
+
+ using OmapIterFunction = std::function<ObjectStore::omap_iter_ret_t(std::string_view, std::string_view)>;
+ int omap_iterate (
+ ObjectStore::CollectionHandle &c_, ///< [in] collection
+ const ghobject_t &oid, ///< [in] object
+ const ObjectStore::omap_iter_seek_t &start_from, ///< [in] where the iterator should point to at the beginning
+ const OmapIterFunction &f, ///< [in] function to call for each key/value pair
+ ObjectStore *store
+ );
+
+ int omap_get_values(
+ ObjectStore::CollectionHandle &c_, ///< [in] collection
+ const ghobject_t &oid, ///< [in] object
+ const std::set<std::string> &keys, ///< [in] keys to get
+ std::map<std::string, ceph::buffer::list> *out, ///< [out] returned key/values
+ ObjectStore *store
+ );
+
+ int omap_get_header(
+ ObjectStore::CollectionHandle &c_, ///< [in] Collection containing oid
+ const ghobject_t &oid, ///< [in] Object containing omap
+ ceph::buffer::list *header, ///< [out] omap header
+ bool allow_eio, ///< [in] don't assert on eio
+ ObjectStore *store
+ );
+
+ int omap_get(
+ ObjectStore::CollectionHandle &c_, ///< [in] Collection containing oid
+ const ghobject_t &oid, ///< [in] Object containing omap
+ ceph::buffer::list *header, ///< [out] omap header
+ std::map<std::string, ceph::buffer::list> *out, /// < [out] Key to value map
+ ObjectStore *store
+ );
+
+ int omap_check_keys(
+ ObjectStore::CollectionHandle &c_, ///< [in] Collection containing oid
+ const ghobject_t &oid, ///< [in] Object containing omap
+ const std::set<std::string> &keys, ///< [in] Keys to check
+ std::set<std::string> *out, ///< [out] Subset of keys defined on oid
+ ObjectStore *store
+ );
+
+ static bool should_be_removed(
+ const std::map<std::string, std::optional<std::string>>& removed_ranges,
+ const std::string_view key
+ );
};
#include "ECTypes.h"
#include "messages/MOSDPGPushReply.h"
#include "msg/MessageRef.h"
+#include "osd/ECOmapJournal.h"
#if WITH_CRIMSON
#include "crimson/osd/object_context.h"
#include "os/Transaction.h"
bool get_is_ec_optimized() const final {
return is_optimized();
}
+ bool remove_ec_omap_journal_entry(const hobject_t &hoid, const ECOmapJournalEntry &entry) override {
+ ceph_assert(is_optimized());
+ return optimized.remove_ec_omap_journal_entry(hoid, entry);
+ }
+
+ std::pair<gen_t, bool> omap_get_generation(const hobject_t &hoid) override {
+ ceph_assert(is_optimized());
+ return optimized.omap_get_generation(hoid);
+ }
+
+ void omap_trim_delete_from_journal(const hobject_t &hoid, const version_t version) override {
+ ceph_assert(is_optimized());
+ optimized.omap_trim_delete_from_journal(hoid, version);
+ }
+
+ int omap_iterate (
+ ObjectStore::CollectionHandle &c_, ///< [in] collection
+ const ghobject_t &oid, ///< [in] object
+ const ObjectStore::omap_iter_seek_t &start_from,
+ ///^ [in] where the iterator should point to at the beginning
+ const OmapIterFunction &f ///< [in] function to call for each key/value pair
+ ) override {
+ if (!is_optimized()) {
+ return store->omap_iterate(c_, oid, start_from, f);
+ }
+ return optimized.omap_iterate(c_, oid, start_from, f, store);
+ }
+
+ int omap_get_values(
+ ObjectStore::CollectionHandle &c_, ///< [in] collection
+ const ghobject_t &oid, ///< [in] object
+ const std::set<std::string> &keys, ///< [in] keys to get
+ std::map<std::string, ceph::buffer::list> *out ///< [out] returned key/values
+ ) override {
+ if (!is_optimized()) {
+ return store->omap_get_values(c_, oid, keys, out);
+ }
+ return optimized.omap_get_values(c_, oid, keys, out, store);
+ }
+
+ int omap_get_header(
+ ObjectStore::CollectionHandle &c_, ///< [in] Collection containing oid
+ const ghobject_t &oid, ///< [in] Object containing omap
+ ceph::buffer::list *header, ///< [out] omap header
+ bool allow_eio ///< [in] don't assert on eio
+ ) override {
+ if (!is_optimized()) {
+ return store->omap_get_header(c_, oid, header, allow_eio);
+ }
+ return optimized.omap_get_header(c_, oid, header, allow_eio, store);
+ }
+
+ int omap_get(
+ ObjectStore::CollectionHandle &c_, ///< [in] Collection containing oid
+ const ghobject_t &oid, ///< [in] Object containing omap
+ ceph::buffer::list *header, ///< [out] omap header
+ std::map<std::string, ceph::buffer::list> *out /// < [out] Key to value map
+ ) override {
+ if (!is_optimized()) {
+ return store->omap_get(c_, oid, header, out);
+ }
+ return optimized.omap_get(c_, oid, header, out, store);
+ }
+
+ int omap_check_keys(
+ ObjectStore::CollectionHandle &c_, ///< [in] Collection containing oid
+ const ghobject_t &oid, ///< [in] Object containing omap
+ const std::set<std::string> &keys, ///< [in] Keys to check
+ std::set<std::string> *out ///< [out] Subset of keys defined on oid
+ ) override {
+ if (!is_optimized()) {
+ return store->omap_check_keys(c_, oid, keys, out);
+ }
+ return optimized.omap_check_keys(c_, oid, keys, out, store);
+ }
};
const hobject_t &soid;
PGBackend *pg;
ObjectStore::Transaction *t;
+ const pg_log_entry_t &entry;
Trimmer(
- const hobject_t &soid,
PGBackend *pg,
- ObjectStore::Transaction *t)
- : soid(soid), pg(pg), t(t) {}
+ ObjectStore::Transaction *t,
+ const pg_log_entry_t &entry)
+ : soid(entry.soid), pg(pg), t(t), entry(entry) {}
void rmobject(version_t old_version) override {
pg->trim_rollback_object(
soid,
}
}
}
+
+ void ec_omap(bool clear_omap, std::optional<ceph::buffer::list> omap_header,
+ std::vector<std::pair<OmapUpdateType, ceph::buffer::list>> &omap_updates) override {
+
+ auto shard = pg->get_parent()->whoami_shard().shard;
+ spg_t spg = pg->get_parent()->whoami_spg_t();
+ auto sinfo = pg->ec_get_sinfo();
+ const auto [gen, lost_delete] = pg->omap_get_generation(soid);
+
+ if (!sinfo.is_nonprimary_shard(shard)) {
+ // If lost_delete is true, check if the object exists before performing updates
+ bool should_update = true;
+ if (lost_delete) {
+ struct stat st;
+ int r = pg->store->stat(
+ pg->ch,
+ ghobject_t(soid, gen, shard),
+ &st,
+ true);
+ if (r != 0) {
+ // Object doesn't exist on this shard, skip the update
+ should_update = false;
+ }
+ }
+
+ if (should_update) {
+ if (omap_header) {
+ t->omap_setheader(
+ coll_t(spg),
+ ghobject_t(soid, gen, shard),
+ *(omap_header));
+ }
+
+ if (clear_omap) {
+ t->omap_clear(
+ coll_t(spg),
+ ghobject_t(soid, gen, shard));
+ }
+
+ for (auto &&up: omap_updates) {
+ switch (up.first) {
+ case OmapUpdateType::Remove:
+ t->omap_rmkeys(
+ coll_t(spg),
+ ghobject_t(soid, gen, shard),
+ up.second);
+ break;
+ case OmapUpdateType::Insert:
+ t->omap_setkeys(
+ coll_t(spg),
+ ghobject_t(soid, gen, shard),
+ up.second);
+ break;
+ case OmapUpdateType::RemoveRange:
+ t->omap_rmkeyrange(
+ coll_t(spg),
+ ghobject_t(soid, gen, shard),
+ up.second);
+ break;
+ }
+ }
+ }
+ }
+
+ // Only remove journal entry if generation is NO_GEN (object not deleted)
+ // If gen != NO_GEN, the object has been deleted and journal was already cleared
+ if (gen == ghobject_t::NO_GEN && pg->get_parent()->pgb_is_primary()) {
+ const ECOmapJournalEntry to_remove(
+ entry.version, clear_omap,
+ omap_header, omap_updates
+ );
+ pg->remove_ec_omap_journal_entry(soid, to_remove);
+ }
+ }
};
void PGBackend::rollforward(
ldpp_dout(dpp, 20) << __func__ << ": entry=" << entry << dendl;
if (!entry.can_rollback())
return;
- Trimmer trimmer(entry.soid, this, t);
+ Trimmer trimmer(this, t, entry);
entry.mod_desc.visit(&trimmer);
}
{
if (!entry.can_rollback())
return;
- Trimmer trimmer(entry.soid, this, t);
+ Trimmer trimmer(this, t, entry);
entry.mod_desc.visit(&trimmer);
}
#include "common/ostream_temp.h"
#include "Coroutines.h"
+
+class ECOmapJournalEntry;
+
namespace Scrub {
class Store;
}
virtual shard_id_map<bufferlist> ec_decode_acting_set(
const shard_id_map<bufferlist> &shard_map, int chunk_size) const = 0;
virtual ECUtil::stripe_info_t ec_get_sinfo() const = 0;
-
+ virtual bool remove_ec_omap_journal_entry(const hobject_t &hoid, const ECOmapJournalEntry &entry) {
+ return false; // Only EC uses ec_omap_journal
+ };
+ virtual std::pair<gen_t, bool> omap_get_generation(const hobject_t &hoid) {
+ return {0, false}; // Only EC uses ec_omap_journal
+ };
+ virtual void omap_trim_delete_from_journal(const hobject_t &hoid, const version_t version) {};
+ using OmapIterFunction = std::function<ObjectStore::omap_iter_ret_t(std::string_view, std::string_view)>;
+ virtual int omap_iterate(
+ ObjectStore::CollectionHandle &c_, ///< [in] collection
+ const ghobject_t &oid, ///< [in] object
+ const ObjectStore::omap_iter_seek_t &start_from,
+ ///^ [in] where the iterator should point to at the beginning
+ const OmapIterFunction &f ///< [in] function to call for each key/value pair
+ ) = 0;
+ virtual int omap_get_values(
+ ObjectStore::CollectionHandle &c_, ///< [in] collection
+ const ghobject_t &oid, ///< [in] object
+ const std::set<std::string> &keys, ///< [in] keys to get
+ std::map<std::string, ceph::buffer::list> *out ///< [out] returned key/values
+ ) = 0;
+ virtual int omap_get_header(
+ ObjectStore::CollectionHandle &c_, ///< [in] Collection containing oid
+ const ghobject_t &oid, ///< [in] Object containing omap
+ ceph::buffer::list *header, ///< [out] omap header
+ bool allow_eio ///< [in] don't assert on eio
+ ) = 0;
+ virtual int omap_get(
+ ObjectStore::CollectionHandle &c_, ///< [in] Collection containing oid
+ const ghobject_t &oid, ///< [in] Object containing omap
+ ceph::buffer::list *header, ///< [out] omap header
+ std::map<std::string, ceph::buffer::list> *out /// < [out] Key to value map
+ ) = 0;
+ virtual int omap_check_keys(
+ ObjectStore::CollectionHandle &c_, ///< [in] Collection containing oid
+ const ghobject_t &oid, ///< [in] Object containing omap
+ const std::set<std::string> &keys, ///< [in] Keys to check
+ std::set<std::string> *out ///< [out] Subset of keys defined on oid
+ ) = 0;
private:
std::set<hobject_t> temp_contents;
public:
}
}
+ int omap_iterate(
+ ObjectStore::CollectionHandle &c_, ///< [in] collection
+ const ghobject_t &oid, ///< [in] object
+ const ObjectStore::omap_iter_seek_t &start_from,
+ ///^ [in] where the iterator should point to at the beginning
+ const OmapIterFunction &f ///< [in] function to call for each key/value pair
+ ) override {
+ return store->omap_iterate(c_, oid, start_from, f);
+ }
+ int omap_get_values(
+ ObjectStore::CollectionHandle &c_, ///< [in] collection
+ const ghobject_t &oid, ///< [in] object
+ const std::set<std::string> &keys, ///< [in] keys to get
+ std::map<std::string, ceph::buffer::list> *out ///< [out] returned key/values
+ ) override {
+ return store->omap_get_values(c_, oid, keys, out);
+ }
+ int omap_get_header(
+ ObjectStore::CollectionHandle &c_, ///< [in] Collection containing oid
+ const ghobject_t &oid, ///< [in] Object containing omap
+ ceph::buffer::list *header, ///< [out] omap header
+ bool allow_eio ///< [in] don't assert on eio
+ ) override {
+ return store->omap_get_header(c_, oid, header, allow_eio);
+ }
+ int omap_get(
+ ObjectStore::CollectionHandle &c_, ///< [in] Collection containing oid
+ const ghobject_t &oid, ///< [in] Object containing omap
+ ceph::buffer::list *header, ///< [out] omap header
+ std::map<std::string, ceph::buffer::list> *out /// < [out] Key to value map
+ ) override {
+ return store->omap_get(c_, oid, header, out);
+ }
+ int omap_check_keys(
+ ObjectStore::CollectionHandle &c_, ///< [in] Collection containing oid
+ const ghobject_t &oid, ///< [in] Object containing omap
+ const std::set<std::string> &keys, ///< [in] Keys to check
+ std::set<std::string> *out ///< [out] Subset of keys defined on oid
+ ) override {
+ return store->omap_check_keys(c_, oid, keys, out);
+ }
+
int objects_read_sync(
const hobject_t &hoid,
uint64_t off,
FLAG_CRIMSON = 1<<18,
FLAG_EC_OPTIMIZATIONS = 1<<19, // enable optimizations, once enabled, cannot be disabled
FLAG_CLIENT_SPLIT_READS = 1<<20, // Optimized EC is permitted to do direct reads.
+ FLAG_OMAP = 1<<21, // Pool is permitted to perform OMAP operations
};
static const char *get_flag_name(uint64_t f) {
case FLAG_CRIMSON: return "crimson";
case FLAG_EC_OPTIMIZATIONS: return "ec_optimizations";
case FLAG_CLIENT_SPLIT_READS: return "split_reads";
+ case FLAG_OMAP: return "supports_omap";
default: return "???";
}
}
return FLAG_EC_OPTIMIZATIONS;
if (name == "split_reads")
return FLAG_CLIENT_SPLIT_READS;
+ if (name == "supports_omap")
+ return FLAG_OMAP;
return 0;
}
bool is_erasure() const { return get_type() == TYPE_ERASURE; }
bool supports_omap() const {
- return !(get_type() == TYPE_ERASURE);
+ return has_flag(FLAG_OMAP) || is_replicated();
}
bool requires_aligned_append() const {
public:
virtual void append(uint64_t old_offset) {}
virtual void setattrs(std::map<std::string, std::optional<ceph::buffer::list>> &attrs) {}
+ virtual void ec_omap(bool clear_omap, std::optional<ceph::buffer::list> omap_header,
+ std::vector<std::pair<OmapUpdateType, ceph::buffer::list>> &omap_updates) {}
virtual void rmobject(version_t old_version) {}
/**
* Used to support the unfound_lost_delete log event: if the stashed
CREATE = 4,
UPDATE_SNAPS = 5,
TRY_DELETE = 6,
- ROLLBACK_EXTENTS = 7
+ ROLLBACK_EXTENTS = 7,
+ EC_OMAP = 8
};
ObjectModDesc() : can_local_rollback(true), rollback_info_completed(false) {
bl.reassign_to_mempool(mempool::mempool_osd_pglog);
encode(old_attrs, bl);
ENCODE_FINISH(bl);
}
+ void ec_omap(bool clear_omap, std::optional<ceph::buffer::list> omap_header,
+ std::vector<std::pair<OmapUpdateType, ceph::buffer::list>> &omap_updates) {
+ if(!can_local_rollback) {
+ return;
+ }
+ ENCODE_START(1, 1, bl);
+ append_id(EC_OMAP);
+ encode(clear_omap, bl);
+ encode(omap_header, bl);
+ encode(omap_updates, bl);
+ ENCODE_FINISH(bl);
+ }
bool rmobject(version_t deletion_version) {
if (!can_local_rollback || rollback_info_completed) {
return false;
return {0, 0, 0};
}
+ using OmapIterFunction = std::function<ObjectStore::omap_iter_ret_t(std::string_view, std::string_view)>;
+ int omap_iterate(
+ ObjectStore::CollectionHandle &c_, ///< [in] collection
+ const ghobject_t &oid, ///< [in] object
+ const ObjectStore::omap_iter_seek_t &start_from,
+ ///^ [in] where the iterator should point to at the beginning
+ const OmapIterFunction &f ///< [in] function to call for each key/value pair
+ ) override {
+ return 0;
+ }
+
+ int omap_get_values(
+ ObjectStore::CollectionHandle &c_, ///< [in] collection
+ const ghobject_t &oid, ///< [in] object
+ const std::set<std::string> &keys, ///< [in] keys to get
+ std::map<std::string, ceph::buffer::list> *out ///< [out] returned key/values
+ ) override {
+ return 0;
+ }
+
+ int omap_get_header(
+ ObjectStore::CollectionHandle &c_, ///< [in] Collection containing oid
+ const ghobject_t &oid, ///< [in] Object containing omap
+ ceph::buffer::list *header, ///< [out] omap header
+ bool allow_eio ///< [in] don't assert on eio
+ ) override {
+ return 0;
+ }
+
+ int omap_get(
+ ObjectStore::CollectionHandle &c_, ///< [in] Collection containing oid
+ const ghobject_t &oid, ///< [in] Object containing omap
+ ceph::buffer::list *header, ///< [out] omap header
+ std::map<std::string, ceph::buffer::list> *out /// < [out] Key to value map
+ ) override {
+ return 0;
+ }
+
+ int omap_check_keys(
+ ObjectStore::CollectionHandle &c_, ///< [in] Collection containing oid
+ const ghobject_t &oid, ///< [in] Object containing omap
+ const std::set<std::string> &keys, ///< [in] Keys to check
+ std::set<std::string> *out ///< [out] Subset of keys defined on oid
+ ) override {
+ return 0;
+ }
+
// Transaction submission
void submit_transaction(
const hobject_t &hoid,