From d771f0540659efb1147038d9e4739043ec375f7b Mon Sep 17 00:00:00 2001 From: Mark Kogan Date: Wed, 26 Mar 2025 11:06:42 +0000 Subject: [PATCH] rgw/d3n: store cached objects using hash digest MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit storing S3 objects under deep directory hierarchy resulted in a long path which iafter URL encoding resulted in a long cached filename that is longer than the POSIX max of 256 chars manifesting as writes to D3N cache failing with: ``` ❯ errno 36 ENAMETOOLONG 36 File name too long ``` Fixes: https://tracker.ceph.com/issues/70333 Signed-off-by: Mark Kogan (cherry picked from commit 5bfd0b7100d32bcf793c26b007b1403db8e0a42f) --- qa/workunits/rgw/test_rgw_datacache.py | 14 +++++--- src/rgw/driver/rados/rgw_d3n_datacache.cc | 41 ++++++++++++----------- src/rgw/rgw_d3n_cacherequest.h | 10 +++++- 3 files changed, 39 insertions(+), 26 deletions(-) diff --git a/qa/workunits/rgw/test_rgw_datacache.py b/qa/workunits/rgw/test_rgw_datacache.py index 2af2a0d3aa3d6..426524ae8742d 100755 --- a/qa/workunits/rgw/test_rgw_datacache.py +++ b/qa/workunits/rgw/test_rgw_datacache.py @@ -184,19 +184,23 @@ def main(): # list the files in the cache dir for troubleshooting out = exec_cmd('ls -l %s' % (cache_dir)) # get name of cached object and check if it exists in the cache - out = exec_cmd('find %s -name "*%s1"' % (cache_dir, cached_object_name)) + out = exec_cmd('find %s -type f -name "*" | tail -1' % (cache_dir)) cached_object_path = get_cmd_output(out) log.debug("Path of file in datacache is: %s", cached_object_path) - out = exec_cmd('basename %s' % (cached_object_path)) - basename_cmd_out = get_cmd_output(out) - log.debug("Name of file in datacache is: %s", basename_cmd_out) + out = exec_cmd("sha1sum %s | awk '{ print $1 }'" % (cached_object_path)) + cached_object_sha1 = get_cmd_output(out) + log.debug("SHA1 of file in datacache is: %s", cached_object_sha1) # check to see if the cached object is in Ceph out = exec_cmd('rados ls -p default.rgw.buckets.data') rados_ls_out = get_cmd_output(out) log.debug("rados ls output is: %s", rados_ls_out) - assert(basename_cmd_out in rados_ls_out) + out = exec_cmd("dd status=none if=%s of=/dev/stdout bs=1M skip=4 | sha1sum | awk '{ print $1 }'" % (outfile)) + org_object_sha1 = get_cmd_output(out) + log.debug("SHA1 of original file is: %s", org_object_sha1) + + assert(cached_object_sha1 == org_object_sha1) # Datacache test failed if sha1 of cached object does not match original object sha1" log.debug("RGW Datacache test SUCCESS") # remove datacache dir diff --git a/src/rgw/driver/rados/rgw_d3n_datacache.cc b/src/rgw/driver/rados/rgw_d3n_datacache.cc index be1a44686969d..851a6cbaba9c7 100644 --- a/src/rgw/driver/rados/rgw_d3n_datacache.cc +++ b/src/rgw/driver/rados/rgw_d3n_datacache.cc @@ -25,9 +25,9 @@ namespace efs = std::experimental::filesystem; using namespace std; -int D3nCacheAioWriteRequest::d3n_libaio_prepare_write_op(bufferlist& bl, unsigned int len, string oid, string cache_location) +int D3nCacheAioWriteRequest::d3n_libaio_prepare_write_op(bufferlist& bl, unsigned int len, string digest_oid, string cache_location) { - std::string location = cache_location + url_encode(oid, true); + std::string location = cache_location + digest_oid; int r = 0; lsubdout(g_ceph_context, rgw_datacache, 20) << "D3nDataCache: " << __func__ << "(): Write To Cache, location=" << location << dendl; @@ -111,10 +111,10 @@ void D3nDataCache::init(CephContext *_cct) { #endif } -int D3nDataCache::d3n_io_write(bufferlist& bl, unsigned int len, std::string oid) +int D3nDataCache::d3n_io_write(bufferlist& bl, unsigned int len, std::string digest_oid) { D3nChunkDataInfo* chunk_info{nullptr}; - std::string location = cache_location + url_encode(oid, true); + std::string location = cache_location + digest_oid; int r = 0; lsubdout(g_ceph_context, rgw_datacache, 20) << "D3nDataCache: " << __func__ << "(): location=" << location << dendl; @@ -152,10 +152,10 @@ int D3nDataCache::d3n_io_write(bufferlist& bl, unsigned int len, std::string oid { // update cache_map entries for new chunk in cache const std::lock_guard l(d3n_cache_lock); chunk_info = new D3nChunkDataInfo; - chunk_info->oid = oid; + chunk_info->oid = digest_oid; chunk_info->set_ctx(cct); chunk_info->size = len; - d3n_cache_map.insert(pair(oid, chunk_info)); + d3n_cache_map.insert(pair(digest_oid, chunk_info)); } return r; @@ -229,21 +229,21 @@ void D3nDataCache::put(bufferlist& bl, unsigned int len, std::string& oid) { size_t sr = 0; uint64_t freed_size = 0, _free_data_cache_size = 0, _outstanding_write_size = 0; - - ldout(cct, 10) << "D3nDataCache::" << __func__ << "(): oid=" << oid << ", len=" << len << dendl; + std::string digest_oid = D3nL1CacheRequest::generate_oid_digest(oid); + ldout(cct, 10) << "D3nDataCache::" << __func__ << "(): oid=" << oid << ", digest_oid=" << digest_oid << ", len=" << len << dendl; { const std::lock_guard l(d3n_cache_lock); - std::unordered_map::iterator iter = d3n_cache_map.find(oid); + std::unordered_map::iterator iter = d3n_cache_map.find(digest_oid); if (iter != d3n_cache_map.end()) { ldout(cct, 10) << "D3nDataCache::" << __func__ << "(): data already cached, no rewrite" << dendl; return; } - auto it = d3n_outstanding_write_list.find(oid); + auto it = d3n_outstanding_write_list.find(digest_oid); if (it != d3n_outstanding_write_list.end()) { ldout(cct, 10) << "D3nDataCache: NOTE: data put in cache already issued, no rewrite" << dendl; return; } - d3n_outstanding_write_list.insert(oid); + d3n_outstanding_write_list.insert(digest_oid); } { const std::lock_guard l(d3n_eviction_lock); @@ -263,17 +263,17 @@ void D3nDataCache::put(bufferlist& bl, unsigned int len, std::string& oid) } if (sr == 0) { ldout(cct, 2) << "D3nDataCache: Warning: eviction was not able to free disk space, not writing to cache" << dendl; - d3n_outstanding_write_list.erase(oid); + d3n_outstanding_write_list.erase(digest_oid); return; } ldout(cct, 20) << "D3nDataCache: completed eviction of " << sr << " bytes" << dendl; freed_size += sr; } int r = 0; - r = d3n_libaio_create_write_request(bl, len, oid); + r = d3n_libaio_create_write_request(bl, len, digest_oid); if (r < 0) { const std::lock_guard l(d3n_cache_lock); - d3n_outstanding_write_list.erase(oid); + d3n_outstanding_write_list.erase(digest_oid); ldout(cct, 1) << "D3nDataCache: create_aio_write_request fail, r=" << r << dendl; return; } @@ -287,10 +287,11 @@ bool D3nDataCache::get(const string& oid, const off_t len) { const std::lock_guard l(d3n_cache_lock); bool exist = false; - string location = cache_location + url_encode(oid, true); + std::string digest_oid = D3nL1CacheRequest::generate_oid_digest(oid); + string location = cache_location + digest_oid; - lsubdout(g_ceph_context, rgw_datacache, 20) << "D3nDataCache: " << __func__ << "(): location=" << location << dendl; - std::unordered_map::iterator iter = d3n_cache_map.find(oid); + lsubdout(g_ceph_context, rgw_datacache, 20) << "D3nDataCache: " << __func__ << "(): oid=" << oid << ", digest_oid=" << digest_oid << ", location=" << location << dendl; + std::unordered_map::iterator iter = d3n_cache_map.find(digest_oid); if (!(iter == d3n_cache_map.end())) { // check inside cache whether file exists or not!!!! then make exist true; struct D3nChunkDataInfo* chdo = iter->second; @@ -304,7 +305,7 @@ bool D3nDataCache::get(const string& oid, const off_t len) lru_remove(chdo); lru_insert_head(chdo); } else { - d3n_cache_map.erase(oid); + d3n_cache_map.erase(digest_oid); const std::lock_guard l(d3n_eviction_lock); lru_remove(chdo); delete chdo; @@ -341,7 +342,7 @@ size_t D3nDataCache::random_eviction() d3n_cache_map.erase(del_oid); // oid } - location = cache_location + url_encode(del_oid, true); + location = cache_location + del_oid; ::remove(location.c_str()); return freed_size; } @@ -377,7 +378,7 @@ size_t D3nDataCache::lru_eviction() } freed_size = del_entry->size; delete del_entry; - location = cache_location + url_encode(del_oid, true); + location = cache_location + del_oid; ::remove(location.c_str()); return freed_size; } diff --git a/src/rgw/rgw_d3n_cacherequest.h b/src/rgw/rgw_d3n_cacherequest.h index 54b495f5461f8..e862963fb6301 100644 --- a/src/rgw/rgw_d3n_cacherequest.h +++ b/src/rgw/rgw_d3n_cacherequest.h @@ -20,6 +20,8 @@ #include "rgw_aio.h" #include "rgw_cache.h" +#include "xxhash.h" + struct D3nGetObjData { std::mutex d3n_lock; @@ -137,12 +139,18 @@ struct D3nL1CacheRequest { } }; + static std::string generate_oid_digest(const std::string& oid) { + XXH128_hash_t hash = XXH3_128bits(oid.c_str(), oid.size()); + std::string digest = fmt::format("{:016x}{:016x}", hash.high64, hash.low64); + return std::string(digest); + } + void file_aio_read_abstract(const DoutPrefixProvider *dpp, boost::asio::yield_context yield, std::string& cache_location, off_t read_ofs, off_t read_len, rgw::Aio* aio, rgw::AioResult& r) { auto ex = yield.get_executor(); ldpp_dout(dpp, 20) << "D3nDataCache: " << __func__ << "(): oid=" << r.obj.oid << dendl; - async_read(dpp, ex, cache_location+"/"+url_encode(r.obj.oid, true), read_ofs, read_len, bind_executor(ex, d3n_libaio_handler{aio, r})); + async_read(dpp, ex, cache_location+"/"+generate_oid_digest(r.obj.oid), read_ofs, read_len, bind_executor(ex, d3n_libaio_handler{aio, r})); } }; -- 2.39.5