From: Pritha Srivastava Date: Thu, 5 Sep 2024 06:31:11 +0000 (+0530) Subject: rgw/d4n: handling special characters in object names such that X-Git-Tag: v20.3.0~8^2~21 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=bdb32532956edf240a2c11730c1ce0dfacf20616;p=ceph.git rgw/d4n: handling special characters in object names such that parsing file names while restoring data from cache does not result in errors. 1. Introduce a new delimiter # instead of _ for concatening bucket_id, objectname etc to create a key, because "_" is a valid char in object names and is not encoded and results in incorrect values while parsing filenames for restorin data. 2. url encode the bucket id, version and object name before all are concatenated to form a key 3. url_decode the bucket id, version and object name after parsing the filename from cache while restoring data back. Signed-off-by: Pritha Srivastava --- diff --git a/src/rgw/driver/d4n/d4n_policy.cc b/src/rgw/driver/d4n/d4n_policy.cc index 0f5edc2bfb0c..4fd195e398f3 100644 --- a/src/rgw/driver/d4n/d4n_policy.cc +++ b/src/rgw/driver/d4n/d4n_policy.cc @@ -369,7 +369,7 @@ void LFUDAPolicy::update(const DoutPrefixProvider* dpp, const std::string& key, std::string oid_in_cache = key; if (dirty == true) { - oid_in_cache = "D_" + key; + oid_in_cache = DIRTY_BLOCK_PREFIX + key; } if (!restore_val.empty()) { @@ -513,13 +513,13 @@ void LFUDAPolicy::cleaning(const DoutPrefixProvider* dpp) erase_dirty_object(dpp, e->key, null_yield); } - std::string prefix = e->bucket_id + "_" + e->version + "_" + c_obj->get_name(); + std::string prefix = url_encode(e->bucket_id) + CACHE_DELIM + url_encode(e->version) + CACHE_DELIM + url_encode(c_obj->get_name()); off_t lst = e->size; off_t fst = 0; off_t ofs = 0; rgw::sal::DataProcessor* filter = processor.get(); - std::string head_oid_in_cache = "D_" + prefix; + std::string head_oid_in_cache = DIRTY_BLOCK_PREFIX + prefix; std::string new_head_oid_in_cache = prefix; ldpp_dout(dpp, 10) << __func__ << "(): head_oid_in_cache=" << head_oid_in_cache << dendl; ldpp_dout(dpp, 10) << __func__ << "(): new_head_oid_in_cache=" << new_head_oid_in_cache << dendl; @@ -537,7 +537,7 @@ void LFUDAPolicy::cleaning(const DoutPrefixProvider* dpp) } off_t cur_size = std::min(fst + dpp->get_cct()->_conf->rgw_max_chunk_size, lst); off_t cur_len = cur_size - fst; - std::string oid_in_cache = "D_" + prefix + "_" + std::to_string(fst) + "_" + std::to_string(cur_len); + std::string oid_in_cache = DIRTY_BLOCK_PREFIX + prefix + CACHE_DELIM + std::to_string(fst) + CACHE_DELIM + std::to_string(cur_len); ldpp_dout(dpp, 10) << __func__ << "(): oid_in_cache=" << oid_in_cache << dendl; rgw::sal::Attrs attrs; cacheDriver->get(dpp, oid_in_cache, 0, cur_len, data, attrs, null_yield); @@ -583,9 +583,9 @@ void LFUDAPolicy::cleaning(const DoutPrefixProvider* dpp) off_t cur_size = std::min(fst + dpp->get_cct()->_conf->rgw_max_chunk_size, lst); off_t cur_len = cur_size - fst; - std::string oid_in_cache = "D_" + prefix + "_" + std::to_string(fst) + "_" + std::to_string(cur_len); + std::string oid_in_cache = DIRTY_BLOCK_PREFIX + prefix + CACHE_DELIM + std::to_string(fst) + CACHE_DELIM + std::to_string(cur_len); ldpp_dout(dpp, 20) << __func__ << "(): oid_in_cache =" << oid_in_cache << dendl; - std::string new_oid_in_cache = prefix + "_" + std::to_string(fst) + "_" + std::to_string(cur_len); + std::string new_oid_in_cache = prefix + CACHE_DELIM + std::to_string(fst) + CACHE_DELIM + std::to_string(cur_len); //Rename block to remove "D" prefix cacheDriver->rename(dpp, oid_in_cache, new_oid_in_cache, null_yield); //Update in-memory data structure for each block diff --git a/src/rgw/driver/d4n/rgw_sal_d4n.cc b/src/rgw/driver/d4n/rgw_sal_d4n.cc index 28ea81972f12..b04e9ae4a02c 100644 --- a/src/rgw/driver/d4n/rgw_sal_d4n.cc +++ b/src/rgw/driver/d4n/rgw_sal_d4n.cc @@ -286,7 +286,7 @@ int D4NFilterObject::copy_object(const ACLOwner& owner, std::string key = get_cache_block_prefix(dest_object, dest_version, false); std::string head_oid_in_cache; if (dirty) { - head_oid_in_cache = std::format("{}{}","D_", key); //same as key, as there is no len or offset attached to head oid in cache + head_oid_in_cache = std::format("{}{}",DIRTY_BLOCK_PREFIX, key); //same as key, as there is no len or offset attached to head oid in cache } else { head_oid_in_cache = key; } @@ -311,8 +311,7 @@ int D4NFilterObject::copy_object(const ACLOwner& owner, return ret; } if (dirty) { - std::string object_key = dest_object->get_bucket()->get_bucket_id() + "_" + dest_object->get_oid(); - driver->get_policy_driver()->get_cache_policy()->update_dirty_object(dpp, object_key, dest_version, true, this->get_size(), creationTime, std::get(dest_object->get_bucket()->get_owner()), *etag, dest_object->get_bucket()->get_name(), dest_object->get_bucket()->get_bucket_id(), dest_object->get_key(), y); + driver->get_policy_driver()->get_cache_policy()->update_dirty_object(dpp, key, dest_version, true, this->get_size(), creationTime, std::get(dest_object->get_bucket()->get_owner()), *etag, dest_object->get_bucket()->get_name(), dest_object->get_bucket()->get_bucket_id(), dest_object->get_key(), y); } } } @@ -681,7 +680,7 @@ int D4NFilterObject::delete_data_block_cache_entries(const DoutPrefixProvider* d std::string key = get_key_in_cache(get_cache_block_prefix(this, version, false), std::to_string(fst), std::to_string(cur_len)); std::string key_in_cache; if (dirty) { - key_in_cache = std::format("{}{}","D_", key); + key_in_cache = std::format("{}{}",DIRTY_BLOCK_PREFIX, key); } else { key_in_cache = key; } @@ -1154,9 +1153,8 @@ int D4NFilterObject::D4NFilterReadOp::flush(const DoutPrefixProvider* dpp, rgw:: dest_block.cacheObj.hostsList.insert(dpp->get_cct()->_conf->rgw_d4n_l1_datacache_address); dest_block.version = dest_version; dest_block.cacheObj.dirty = true; - std::string prefix = get_cache_block_prefix(source->dest_object, dest_version, false); std::string key = get_key_in_cache(get_cache_block_prefix(source->dest_object, dest_version, false), std::to_string(ofs), std::to_string(len)); - std::string dest_oid_in_cache = std::format("{}{}","D_", key); + std::string dest_oid_in_cache = std::format("{}{}",DIRTY_BLOCK_PREFIX, key); auto ret = source->driver->get_policy_driver()->get_cache_policy()->eviction(dpp, dest_block.size, y); if (ret == 0) { rgw::sal::Attrs attrs; @@ -1266,7 +1264,7 @@ int D4NFilterObject::D4NFilterReadOp::iterate(const DoutPrefixProvider* dpp, int ldpp_dout(dpp, 20) << "D4NFilterObject::iterate:: " << __func__ << "(): READ FROM CACHE: block is dirty = " << block.cacheObj.dirty << dendl; if (block.cacheObj.dirty == true) { - key = "D_" + oid_in_cache; // we keep track of dirty data in the cache for the metadata failure case + key = DIRTY_BLOCK_PREFIX + oid_in_cache; // we keep track of dirty data in the cache for the metadata failure case ldpp_dout(dpp, 20) << "D4NFilterObject::iterate:: " << __func__ << "(): READ FROM CACHE: key=" << key << " data is Dirty." << dendl; } @@ -1359,7 +1357,7 @@ int D4NFilterObject::D4NFilterReadOp::iterate(const DoutPrefixProvider* dpp, int if ((part_len != chunk_size) && source->driver->get_policy_driver()->get_cache_policy()->exist_key(oid_in_cache) > 0) { // Read From Cache if (block.cacheObj.dirty == true){ - key = "D_" + oid_in_cache; //we keep track of dirty data in the cache for the metadata failure case + key = DIRTY_BLOCK_PREFIX + oid_in_cache; } ldpp_dout(dpp, 20) << "D4NFilterObject::iterate:: " << __func__ << "(): " << __LINE__ << ": READ FROM CACHE: block dirty =" << block.cacheObj.dirty << dendl; @@ -1748,7 +1746,7 @@ int D4NFilterObject::D4NFilterReadOp::D4NFilterGetCB::handle_data(bufferlist& bl bl_rem.claim_append(bl_copy); if (bl_rem.length() == rgw_max_chunk_size) { - std::string oid = get_key_in_cache(prefix, std::to_string(adjusted_start_ofs), std::to_string(bl_rem.length())); + std::string oid = prefix + CACHE_DELIM + std::to_string(adjusted_start_ofs) + CACHE_DELIM + std::to_string(bl_rem.length()); if (!filter->get_policy_driver()->get_cache_policy()->exist_key(oid)) { block.blockID = adjusted_start_ofs; block.size = bl_rem.length(); @@ -1787,7 +1785,7 @@ int D4NFilterObject::D4NFilterReadOp::D4NFilterGetCB::handle_data(bufferlist& bl if (source->dest_object && source->dest_bucket) { D4NFilterObject* d4n_dest_object = dynamic_cast(source->dest_object); std::string dest_version = d4n_dest_object->get_object_version(); - std::string dest_oid = get_key_in_cache(dest_prefix, std::to_string(adjusted_start_ofs), std::to_string(bl_rem.length())); + std::string dest_oid = dest_prefix + CACHE_DELIM + std::to_string(adjusted_start_ofs) + CACHE_DELIM + std::to_string(bl_rem.length()); dest_block.blockID = adjusted_start_ofs; dest_block.size = bl_rem.length(); auto ret = filter->get_policy_driver()->get_cache_policy()->eviction(dpp, dest_block.size, *y); @@ -1967,7 +1965,7 @@ int D4NFilterObject::D4NFilterDeleteOp::delete_obj(const DoutPrefixProvider* dpp } if (block.cacheObj.dirty) - prefix = "D_" + prefix; + prefix = DIRTY_BLOCK_PREFIX + prefix; std::string oid_in_cache = get_key_in_cache(prefix, std::to_string(fst), std::to_string(cur_len)); @@ -1994,8 +1992,8 @@ int D4NFilterObject::D4NFilterDeleteOp::delete_obj(const DoutPrefixProvider* dpp if (!objDirty) { // object written to backend return next->delete_obj(dpp, y, flags); } else { - std::string object_key = source->get_bucket()->get_bucket_id() + "_" + source->get_oid(); - if (!(ret = source->driver->get_policy_driver()->get_cache_policy()->erase_dirty_object(dpp, object_key, y))) { + std::string key = policy_prefix; + if (!(ret = source->driver->get_policy_driver()->get_cache_policy()->erase_dirty_object(dpp, key, y))) { ldpp_dout(dpp, 0) << "Failed to delete policy object entry for: " << source->get_name() << ", ret=" << ret << dendl; return -ENOENT; } else { @@ -2087,9 +2085,9 @@ int D4NFilterWriter::process(bufferlist&& data, uint64_t offset) return next->process(std::move(data), offset); } else { rgw::sal::Attrs attrs; - std::string oid = prefix + "_" + std::to_string(ofs); - std::string key = "D_" + oid + "_" + std::to_string(bl_len); - std::string oid_in_cache = oid + "_" + std::to_string(bl_len); + std::string oid = prefix + CACHE_DELIM + std::to_string(ofs); + std::string key = DIRTY_BLOCK_PREFIX + oid + CACHE_DELIM + std::to_string(bl_len); + std::string oid_in_cache = oid + CACHE_DELIM + std::to_string(bl_len); dirty = true; ret = driver->get_policy_driver()->get_cache_policy()->eviction(dpp, bl.length(), y); if (ret == 0) { @@ -2234,7 +2232,7 @@ int D4NFilterWriter::complete(size_t accounted_size, const std::string& etag, std::string head_oid_in_cache; //same as key, as there is no len or offset attached to head oid in cache if (dirty) { - head_oid_in_cache = std::format("{}{}","D_", key);; + head_oid_in_cache = std::format("{}{}",DIRTY_BLOCK_PREFIX, key);; } else { head_oid_in_cache = key; } @@ -2259,11 +2257,9 @@ int D4NFilterWriter::complete(size_t accounted_size, const std::string& etag, return ret; } if (dirty) { - //using object oid here so that version is automatically picked for versioned buckets, and for non-versioned buckets the old version is replaced by the latest version - std::string object_key = obj->get_bucket()->get_bucket_id() + "_" + obj->get_oid(); auto creationTime = ceph::real_clock::to_time_t(object->get_mtime()); - ldpp_dout(dpp, 16) << "D4NFilterWriter::" << __func__ << "(): object_key=" << object_key << dendl; - driver->get_policy_driver()->get_cache_policy()->update_dirty_object(dpp, object_key, version, dirty, accounted_size, creationTime, std::get(obj->get_bucket()->get_owner()), objEtag, obj->get_bucket()->get_name(), obj->get_bucket()->get_bucket_id(), obj->get_key(), y); + ldpp_dout(dpp, 16) << "D4NFilterWriter::" << __func__ << "(): key=" << key << dendl; + driver->get_policy_driver()->get_cache_policy()->update_dirty_object(dpp, key, version, dirty, accounted_size, creationTime, std::get(obj->get_bucket()->get_owner()), objEtag, obj->get_bucket()->get_name(), obj->get_bucket()->get_bucket_id(), obj->get_key(), y); } } else { //if get_cache_driver()->put() ldpp_dout(dpp, 0) << "D4NFilterWriter::" << __func__ << "(): put failed for head_oid_in_cache, ret=" << ret << dendl; diff --git a/src/rgw/driver/d4n/rgw_sal_d4n.h b/src/rgw/driver/d4n/rgw_sal_d4n.h index 48e364a66acd..005b43331044 100644 --- a/src/rgw/driver/d4n/rgw_sal_d4n.h +++ b/src/rgw/driver/d4n/rgw_sal_d4n.h @@ -42,17 +42,15 @@ namespace rgw { namespace sal { inline std::string get_cache_block_prefix(rgw::sal::Object* object, const std::string& version, bool is_dirty) { if (is_dirty) { - //return "D_" + object->get_bucket()->get_bucket_id() + "_" + version + "_" + object->get_name(); - return fmt::format("{}{}{}{}{}{}", "D_", object->get_bucket()->get_bucket_id(), "_", version, "_", object->get_name()); + return fmt::format("{}{}{}{}{}{}", DIRTY_BLOCK_PREFIX, object->get_bucket()->get_bucket_id(), CACHE_DELIM, version, CACHE_DELIM, object->get_name()); } else { - //return object->get_bucket()->get_bucket_id() + "_" + version + "_" + object->get_name(); - return fmt::format("{}{}{}{}{}", object->get_bucket()->get_bucket_id(), "_", version, "_", object->get_name()); + return fmt::format("{}{}{}{}{}", object->get_bucket()->get_bucket_id(), CACHE_DELIM, version, CACHE_DELIM, object->get_name()); } } inline std::string get_key_in_cache(const std::string& prefix, const std::string& offset, const std::string& len) { - return fmt::format("{}{}{}{}{}", prefix, "_", offset, "_", len); + return fmt::format("{}{}{}{}{}", prefix, CACHE_DELIM, offset, CACHE_DELIM, len); } using boost::redis::connection; diff --git a/src/rgw/rgw_cache_driver.h b/src/rgw/rgw_cache_driver.h index 8c3491666f36..c3055be71a86 100644 --- a/src/rgw/rgw_cache_driver.h +++ b/src/rgw/rgw_cache_driver.h @@ -14,6 +14,9 @@ constexpr char RGW_CACHE_ATTR_VERSION_ID[] = "user.rgw.version_id"; constexpr char RGW_CACHE_ATTR_SOURC_ZONE[] = "user.rgw.source_zone"; constexpr char RGW_CACHE_ATTR_LOCAL_WEIGHT[] = "user.rgw.localWeight"; +constexpr char DIRTY_BLOCK_PREFIX[] = "D#"; +constexpr char CACHE_DELIM = '#'; + namespace rgw { namespace cache { typedef std::function