From 4f8a631f8589e48cdcf78f433aeb6b2583050daf Mon Sep 17 00:00:00 2001 From: Radoslaw Zarzynski Date: Sat, 12 Oct 2024 07:39:34 +0000 Subject: [PATCH] os, osd: introduce a lightweight OMAP iteration Signed-off-by: Radoslaw Zarzynski (cherry picked from commit c7407549e5a2c19949cc270c064f8bd55a52b370) --- src/os/ObjectStore.h | 45 +++++++++++++++++++++++++++++++++++++ src/osd/PrimaryLogPG.cc | 49 +++++++++++++++++++++++------------------ 2 files changed, 73 insertions(+), 21 deletions(-) diff --git a/src/os/ObjectStore.h b/src/os/ObjectStore.h index eb9b8c20e00..3868d51a655 100644 --- a/src/os/ObjectStore.h +++ b/src/os/ObjectStore.h @@ -29,6 +29,7 @@ #include #include +#include #include #include #include @@ -762,6 +763,50 @@ public: const ghobject_t &oid ///< [in] object ) = 0; + struct omap_iter_seek_t { + std::string seek_position; + enum { + // start with provided key (seek_position), if it exists + LOWER_BOUND, + // skip provided key (seek_position) even if it exists + UPPER_BOUND + } seek_type = LOWER_BOUND; + static omap_iter_seek_t min_lower_bound() { return {}; } + }; + enum class omap_iter_ret_t { + STOP, + NEXT + }; + /** + * Iterate over object map with user-provided callable + * + * Warning! The callable is executed under lock on bluestore + * operations in c. Do not use bluestore methods on c while + * iterating. (Filling in a transaction is no problem). + * + * @param c collection + * @param oid object + * @param start_from where the iterator should point to at + * the beginning + * @param visitor callable that takes OMAP key and corresponding + * value as string_views and controls iteration + * by the return. It is executed for every object's + * OMAP entry from `start_from` till end of the + * object's OMAP or till the iteration is stopped + * by `STOP`. Please note that if there is no such + * entry, `visitor` will be called 0 times. + * @return error code, zero on success + */ + virtual int omap_iterate( + CollectionHandle &c, + const ghobject_t &oid, + omap_iter_seek_t start_from, + std::function visitor + ) { + return -EOPNOTSUPP; + } + virtual int flush_journal() { return -EOPNOTSUPP; } virtual int dump_journal(std::ostream& out) { return -EOPNOTSUPP; } diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index 6b9010177f4..9c47a3745af 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -7705,27 +7705,34 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector& ops) bool truncated = false; bufferlist bl; if (oi.is_omap()) { - ObjectMap::ObjectMapIterator iter = osd->store->get_omap_iterator( - ch, ghobject_t(soid) - ); - if (!iter) { - result = -ENOENT; - goto fail; - } - iter->upper_bound(start_after); - if (filter_prefix > start_after) iter->lower_bound(filter_prefix); - for (num = 0; - iter->valid() && - iter->key().substr(0, filter_prefix.size()) == filter_prefix; - ++num, iter->next()) { - dout(20) << "Found key " << iter->key() << dendl; - if (num >= max_return || - bl.length() >= cct->_conf->osd_max_omap_bytes_per_request) { - truncated = true; - break; - } - encode(iter->key(), bl); - encode(iter->value(), bl); + using omap_iter_seek_t = ObjectStore::omap_iter_seek_t; + result = osd->store->omap_iterate( + ch, ghobject_t(soid), + // try to seek as many keys-at-once as possible for the sake of performance. + // note complexity should be logarithmic, so seek(n/2) + seek(n/2) is worse + // than just seek(n). + ObjectStore::omap_iter_seek_t{ + .seek_position = std::max(start_after, filter_prefix), + .seek_type = filter_prefix > start_after ? omap_iter_seek_t::LOWER_BOUND + : omap_iter_seek_t::UPPER_BOUND + }, + [&bl, &truncated, &filter_prefix, &num, max_return, + max_bytes=cct->_conf->osd_max_omap_bytes_per_request] + (std::string_view key, std::string_view value) mutable { + if (key.substr(0, filter_prefix.size()) != filter_prefix) { + return ObjectStore::omap_iter_ret_t::STOP; + } + if (num >= max_return || bl.length() >= max_bytes) { + truncated = true; + return ObjectStore::omap_iter_ret_t::STOP; + } + encode(key, bl); + encode(value, bl); + ++num; + return ObjectStore::omap_iter_ret_t::NEXT; + }); + if (result < 0) { + goto fail; } } // else return empty out_set encode(num, osd_op.outdata); -- 2.39.5