From: Marcel Lauhoff Date: Fri, 21 Feb 2025 11:42:07 +0000 (+0100) Subject: test: Add Cache benchmarks X-Git-Tag: v21.0.1~14^2~16 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=8354a97001baec285b930a9ef1232684fbeb5656;p=ceph.git test: Add Cache benchmarks Add Google benchmark [0] based micro benchmarks for Cache/LRU implementations in the Ceph code base. [0] https://github.com/google/benchmark Signed-off-by: Marcel Lauhoff On-behalf-of: SAP marcel.lauhoff@sap.com --- diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt index ecba4a139bc..0bfe9019db0 100644 --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -1083,7 +1083,17 @@ add_executable(unittest_ceph_assert ceph_assert.cc) add_ceph_unittest(unittest_ceph_assert) target_link_libraries(unittest_ceph_assert ceph-common global) -endif() +endif(NOT WIN32) + +find_package(benchmark QUIET) +if (benchmark_FOUND) + add_executable(bench_lrus + bench_lrus.cc + ) + target_link_libraries(bench_lrus ceph-common global-static benchmark::benchmark Boost::context) +else() + message(STATUS "The google/benchmark library was not found. Skipping micro benchmark tests") +endif(benchmark_FOUND) add_executable(test_nvmeof_gw_utils test_nvmeof_gw_utils.cc diff --git a/src/test/bench_lrus.cc b/src/test/bench_lrus.cc new file mode 100644 index 00000000000..02155220e63 --- /dev/null +++ b/src/test/bench_lrus.cc @@ -0,0 +1,461 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2025 Clyso GmbH + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/ceph_argparse.h" +#include "common/cohort_lru.h" +#include "common/shared_cache.hpp" +#include "common/simple_cache.hpp" +#include "common/web_cache.h" +#include "global/global_context.h" +#include "global/global_init.h" +#include "include/uuid.h" + + +// Cache implementations in the Ceph codebase: +// ✅ cohort lru +// ✅ shared_cache.hpp SharedLRU +// ✅ simple_cache SimpleLRU +// ✅ web_cache +// Not benchmarked here (reason): +// ❌ LRUSet (not concurrent) +// ❌ intrusive_lru: lru implementation with embedded map and list hook (not concurrent) +// ❌ include/lru.h LRU - (not concurrent) + + +// Workload Generator Helper +// +// (1) insert unique items > cache size +// - exercises cache replacement algorithm +// - with > 1 thread - test concurrency +// (2) Inserts using pareto distributed keys +// - approximates real world workload + +namespace { + +// Config + +constexpr size_t SMALL_CACHE = 100; +constexpr size_t LARGE_CACHE = 1000; +constexpr size_t CACHE_OP_COUNT = 1000000; +constexpr size_t THREADS_SINGLE = 1; +constexpr size_t THREADS_LOTS = 128; +constexpr size_t RAND_VALUE_LEN = 32; +constexpr size_t PARETO_KEY_POOL_SIZE = 1000; + +std::string random_key() { + uuid_d uuid; + uuid.generate_random(); + return uuid.to_string(); +} + +std::string random_value() { + std::string result(RAND_VALUE_LEN, '0'); + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution dist(0, 0xFF); + for (size_t i=0; i< RAND_VALUE_LEN; ++i) { + result[i] = static_cast(dist(gen)); + } + return result; +} + +std::vector key_pool(size_t len) { + std::vector result; + result.reserve(len); + for (size_t i = 0; i < len; ++i) { + result.push_back(random_key()); + } + return result; +} + +std::vector workload( + const std::vector& pool, size_t length) { + const double alpha = 1.5; + std::vector weights(pool.size()); + for (size_t i = 0; i < pool.size(); ++i) { + weights[i] = std::pow(static_cast(i + 1), -alpha); + } + const double weights_sum = + std::accumulate(weights.begin(), weights.end(), 0.0); + for (auto& weight : weights) { + weight /= weights_sum; + } + std::vector partial_sums(weights.size()); + std::partial_sum(weights.begin(), weights.end(), partial_sums.begin()); + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution dis(0.0, 1.0); + + std::vector result; + result.reserve(length); + + for (size_t i = 0; i < length; ++i) { + double u = dis(gen); + auto it = std::ranges::lower_bound(partial_sums, u); + size_t idx = std::distance(partial_sums.begin(), it); + result.push_back(pool[idx]); + } + return result; +} + +// +// Cache Adapter (cache impl <-> benchmark) +// + +struct CacheAdapter { + CacheAdapter() = default; + CacheAdapter(const CacheAdapter&) = default; + CacheAdapter(CacheAdapter&&) = delete; + CacheAdapter& operator=(const CacheAdapter&) = delete; + CacheAdapter& operator=(CacheAdapter&&) = delete; + virtual ~CacheAdapter() = default; + + // Simulate common cache operation: lookup value by key, if it isn't + // cached add it + virtual void cache(const std::string& key, const std::string& value) = 0; + + virtual double hit_miss_ratio() { return -23.42; }; + virtual bool reset() { return false; }; +}; + +// Shared LRU {{{ +struct SharedLRUAdapter : public CacheAdapter { + SharedLRU _cache; + + std::atomic_int hits; + std::atomic_int misses; + explicit SharedLRUAdapter(size_t size) : _cache(g_ceph_context, size) {} + void cache(const std::string& key, const std::string& value) override { + bool existed = false; + auto* copy = new std::string(value); + const auto ptr = _cache.add(key, copy, &existed); + if (existed) { + hits++; + delete copy; + } else { + misses++; + } + } + + double hit_miss_ratio() override { + return static_cast(hits) / + (static_cast(hits) + static_cast(misses)); + } + + bool reset() override { + _cache.clear(); + hits = 0; + misses = 0; + return true; + } +}; + +// }}} + +// Simple LRU {{{ +struct SimpleLRUAdapter : public CacheAdapter { + SimpleLRU _cache; + + explicit SimpleLRUAdapter(size_t size) : _cache(size) {} + void cache(const std::string& key, const std::string& value) override { + std::string out; + if (!_cache.lookup(key, &out)) { + _cache.add(key, value); + } + } +}; + +// }}} + +// Cohort LRU {{{ +namespace cohortlru { +class Factory; + +struct Object : public cohort::lru::Object { + std::string m_key; + std::string m_value; + + Object(const Object&) = delete; + Object(Object&&) = delete; + Object& operator=(const Object&) = delete; + Object& operator=(Object&&) = delete; + ~Object() override = default; + + Object(const std::string& key, const std::string& value) : + cohort::lru::Object(), m_key(key), m_value(value) + {} + + bool reclaim(const cohort::lru::ObjectFactory* newobj_fac) override; + +}; + +struct Factory : public cohort::lru::ObjectFactory { + std::string m_key; + std::string m_value; + + Factory(const Factory&) = default; + Factory(Factory&&) = delete; + Factory& operator=(const Factory&) = default; + Factory& operator=(Factory&&) = delete; + ~Factory() override = default; + + Factory(const std::string& key, const std::string& value) : + cohort::lru::ObjectFactory(), m_key(key), m_value(value) + {} + + cohort::lru::Object* alloc() override { return new Object(m_key, m_value); } + + void recycle(cohort::lru::Object* o) override { + auto oo = dynamic_cast(o); + oo->m_key = m_key; + oo->m_value = m_value; + } +}; + +bool Object::reclaim(const cohort::lru::ObjectFactory* newobj_fac) { + const auto* factory = dynamic_cast(newobj_fac); + return (factory != nullptr); +} + +} // namespace cohortlru + +struct CohortLRUAdapter : public CacheAdapter { + cohort::lru::LRU _cache; + + explicit CohortLRUAdapter(size_t size) + : _cache( + static_cast(size / std::thread::hardware_concurrency()), + size / std::thread::hardware_concurrency()) {} + + void cache(const std::string& key, const std::string& value) override { + cohortlru::Factory prototype(key, value); + uint32_t iflags{cohort::lru::FLAG_INITIAL}; + auto o = dynamic_cast( + _cache.insert(&prototype, cohort::lru::Edge::MRU, iflags)); + ceph_assert(o != nullptr); + ceph_assert(o->m_key == key); + ceph_assert(o->m_value == value); + } +}; + +// }}} + +// Web Cache {{{ + +struct WebCacheAdapter : public CacheAdapter { + using CacheValue = std::string; + using Cache = webcache::WebCache; + Cache _cache; + + WebCacheAdapter(const WebCacheAdapter&) = delete; + WebCacheAdapter(WebCacheAdapter&&) = delete; + WebCacheAdapter& operator=(const WebCacheAdapter&) = delete; + WebCacheAdapter& operator=(WebCacheAdapter&&) = delete; + + explicit WebCacheAdapter(size_t size) : + _cache(g_ceph_context, "benchmark", size) + {} + void cache(const std::string& key, const std::string& value) override { + if (!_cache.lookup(key).has_value()) { + _cache.add(key, std::make_shared(value)); + } + } + + ~WebCacheAdapter() override { _cache.perf()->reset(); } + + double hit_miss_ratio() override { + return static_cast( + _cache.perf()->get(static_cast(webcache::Metric::hit))) / + (static_cast( + _cache.perf()->get(static_cast(webcache::Metric::hit))) + + static_cast( + _cache.perf()->get(static_cast(webcache::Metric::miss)))); + } + + bool reset() override { + _cache.clear(); + return true; + } +}; + +struct WebCacheLookupOrAdapter : public CacheAdapter { + struct CacheValue { + std::once_flag once; + std::string value; + }; + using Cache = webcache::WebCache; + Cache _cache; + + WebCacheLookupOrAdapter(const WebCacheLookupOrAdapter&) = delete; + WebCacheLookupOrAdapter(WebCacheLookupOrAdapter&&) = delete; + WebCacheLookupOrAdapter& operator=(const WebCacheLookupOrAdapter&) = delete; + WebCacheLookupOrAdapter& operator=(WebCacheLookupOrAdapter&&) = delete; + + explicit WebCacheLookupOrAdapter(size_t size) : + _cache(g_ceph_context, "benchmark", size) + {} + + ~WebCacheLookupOrAdapter() override { + _cache.perf()->reset(); + } + + void cache(const std::string& key, const std::string& value) override { + std::shared_ptr cache_value = + _cache.lookup_or(key, std::make_shared()); + + std::call_once(cache_value->once, [&]() { cache_value->value = value; }); + } + double hit_miss_ratio() override { + return static_cast( + _cache.perf()->get(static_cast(webcache::Metric::hit))) / + (static_cast( + _cache.perf()->get(static_cast(webcache::Metric::hit))) + + static_cast( + _cache.perf()->get(static_cast(webcache::Metric::miss)))); + } + bool reset() override { + _cache.clear(); + return true; + } +}; + +/// }}} + +// Benchmarks {{{ + +template +class CacheFixture : public benchmark::Fixture { + public: + std::unique_ptr cache; + void SetUp(::benchmark::State& state) override { + if (state.thread_index() == 0) { + cache = std::make_unique(state.range(0)); + } + } + + void TearDown(::benchmark::State& state) override { + if (state.thread_index() == 0) { + state.counters["hit/miss"] = cache->hit_miss_ratio(); + } + } +}; + +template +class ParetoFixture : public CacheFixture { + public: + std::vector pool; + std::array, THREADS_LOTS> pareto_keys; + + ParetoFixture() : pool(key_pool(PARETO_KEY_POOL_SIZE)) {} + + void SetUp(::benchmark::State& state) override { + if (state.thread_index() == 0) { + this->cache = std::make_unique(state.range(0)); + state.counters["Key Pool"] = pool.size(); + } + pareto_keys[state.thread_index()] = + workload(pool, state.range(1) / state.threads()); + state.counters["Keys/Thread"] = benchmark::Counter( + pareto_keys[state.thread_index()].size(), + benchmark::Counter::kAvgThreads); + ceph_assert(pareto_keys[state.thread_index()].size() > 1000); + } +}; + +BENCHMARK_TEMPLATE_METHOD_F(CacheFixture, BM_UniqueAdd)( + benchmark::State& state) { + for (auto _ : state) { + const size_t ops = state.range(1) / state.threads(); + for (size_t i = 0; i < ops; ++i) { + this->cache->cache(random_key(), random_value()); + } + state.counters["KeysProcessed"] = + benchmark::Counter(ops, benchmark::Counter::kIsRate); + state.counters["KeysProcessedInv"] = benchmark::Counter( + ops, benchmark::Counter::kIsRate | benchmark::Counter::kInvert); + } +} + +BENCHMARK_TEMPLATE_METHOD_F(ParetoFixture, BM_Pareto)(benchmark::State& state) { + for (auto _ : state) { + const auto keys = this->pareto_keys[state.thread_index()]; + for (auto key : keys) { + this->cache->cache(std::string(key), "some_value"); + } + state.counters["KeysProcessed"] = + benchmark::Counter(keys.size(), benchmark::Counter::kIsRate); + state.counters["KeysProcessedInv"] = benchmark::Counter( + keys.size(), benchmark::Counter::kIsRate | benchmark::Counter::kInvert); + } +} + +// }}} + +// Benchmark Run Configuration {{{ + +void DefaultArgs(benchmark::internal::Benchmark* bench) { + bench->Args({SMALL_CACHE, CACHE_OP_COUNT}) + ->Args({LARGE_CACHE, CACHE_OP_COUNT}) + ->Threads(THREADS_SINGLE) + ->Threads(static_cast(std::thread::hardware_concurrency())) + ->Threads(THREADS_LOTS); +} + +BENCHMARK_TEMPLATE_INSTANTIATE_F(CacheFixture, BM_UniqueAdd, SharedLRUAdapter) ->Name("UNIQUE shared")->Apply(DefaultArgs); +BENCHMARK_TEMPLATE_INSTANTIATE_F(CacheFixture, BM_UniqueAdd, SimpleLRUAdapter) ->Name("UNIQUE simple")->Apply(DefaultArgs); +BENCHMARK_TEMPLATE_INSTANTIATE_F(CacheFixture, BM_UniqueAdd, CohortLRUAdapter) ->Name("UNIQUE cohort")->Apply(DefaultArgs); +BENCHMARK_TEMPLATE_INSTANTIATE_F(CacheFixture, BM_UniqueAdd, WebCacheAdapter) ->Name("UNIQUE web ")->Apply(DefaultArgs); +BENCHMARK_TEMPLATE_INSTANTIATE_F(CacheFixture, BM_UniqueAdd, WebCacheLookupOrAdapter) ->Name("UNIQUE web-O ")->Apply(DefaultArgs); +BENCHMARK_TEMPLATE_INSTANTIATE_F(ParetoFixture, BM_Pareto, SharedLRUAdapter) ->Name("PARETO shared")->Apply(DefaultArgs); +BENCHMARK_TEMPLATE_INSTANTIATE_F(ParetoFixture, BM_Pareto, SimpleLRUAdapter) ->Name("PARETO simple")->Apply(DefaultArgs); +BENCHMARK_TEMPLATE_INSTANTIATE_F(ParetoFixture, BM_Pareto, CohortLRUAdapter) ->Name("PARETO cohort")->Apply(DefaultArgs); +BENCHMARK_TEMPLATE_INSTANTIATE_F(ParetoFixture, BM_Pareto, WebCacheAdapter) ->Name("PARETO web ")->Apply(DefaultArgs); +BENCHMARK_TEMPLATE_INSTANTIATE_F(ParetoFixture, BM_Pareto, WebCacheLookupOrAdapter) ->Name("PARETO web-O ")->Apply(DefaultArgs); + +// }}} + +} // namespace + +int main(int argc, char** argv) { + auto args = argv_to_vec(argc, argv); + auto cct = global_init( + nullptr, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, + CINIT_FLAG_NO_MON_CONFIG); + common_init_finish(g_ceph_context); + + char arg0_default[] = "benchmark"; + char* args_default = arg0_default; + if (argv == nullptr) { + argc = 1; + argv = &args_default; + } + ::benchmark::Initialize(&argc, argv); + ::benchmark::RunSpecifiedBenchmarks(); + ::benchmark::Shutdown(); + return 0; +}