From: Matthew N Heler Date: Fri, 27 Feb 2026 00:44:07 +0000 (-0600) Subject: rgw: add benchmark for GCM and CBC encrypt/decrypt throughput X-Git-Tag: v21.0.1~125^2~4 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=7e646e0633baede3c8752c1f44921832ede8e5d3;p=ceph.git rgw: add benchmark for GCM and CBC encrypt/decrypt throughput Benchmark for measuring AES-256-CBC and AES-256-GCM throughput with hardware acceleration. Supports multi-threaded runs across various chunk sizes. Signed-off-by: Matthew N. Heler --- diff --git a/src/test/rgw/CMakeLists.txt b/src/test/rgw/CMakeLists.txt index 0e1c452f172..c64e6f22068 100644 --- a/src/test/rgw/CMakeLists.txt +++ b/src/test/rgw/CMakeLists.txt @@ -141,6 +141,19 @@ target_link_libraries(bench_rgw_ratelimit ${rgw_libs}) add_executable(bench_rgw_ratelimit_gc bench_rgw_ratelimit_gc.cc ) target_link_libraries(bench_rgw_ratelimit_gc ${rgw_libs}) +# Crypto benchmark through the BlockCrypt interface (full RGW encrypt path). +# Uses the plugin system to load the configured accelerator (ISA-L or OpenSSL). +# Run with --accel crypto_isal or --accel crypto_openssl to compare backends. +add_executable(bench_rgw_crypto_accel bench_rgw_crypto_accel.cc) +target_link_libraries(bench_rgw_crypto_accel + ${rgw_libs} + global + ${CURL_LIBRARIES} + ${EXPAT_LIBRARIES} + ${CMAKE_DL_LIBS} + ${CRYPTO_LIBS}) +add_dependencies(bench_rgw_crypto_accel crypto_plugins) + add_executable(unittest_rgw_ratelimit test_rgw_ratelimit.cc $) target_link_libraries(unittest_rgw_ratelimit ${rgw_libs}) add_ceph_unittest(unittest_rgw_ratelimit) diff --git a/src/test/rgw/bench_rgw_crypto_accel.cc b/src/test/rgw/bench_rgw_crypto_accel.cc new file mode 100644 index 00000000000..001028a9a0a --- /dev/null +++ b/src/test/rgw/bench_rgw_crypto_accel.cc @@ -0,0 +1,476 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- +// vim: ts=8 sw=2 sts=2 expandtab + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2026 Ceph contributors + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "global/global_init.h" +#include "common/ceph_argparse.h" +#include "common/PluginRegistry.h" +#include "rgw_common.h" +#include "rgw_crypt.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __x86_64__ +#include +#endif + +std::unique_ptr AES_256_CBC_create( + const DoutPrefixProvider* dpp, CephContext* cct, + const uint8_t* key, size_t len); + +#define dout_subsys ceph_subsys_rgw + +using std::cout; +using std::cerr; +using std::string; +using std::vector; + +static bool is_supported_accel(const string& accel) { + return accel == "crypto_isal" || accel == "crypto_openssl" || + accel == "crypto_qat"; +} + +static string human_bytes(double bytes) { + const char* units[] = {"B", "KB", "MB", "GB", "TB"}; + int u = 0; + while (bytes >= 1024.0 && u < 4) { bytes /= 1024.0; ++u; } + char buf[64]; + snprintf(buf, sizeof(buf), "%.2f %s", bytes, units[u]); + return buf; +} + +static void fill_random(uint8_t* buf, size_t len) { + static std::mt19937_64 rng(42); + size_t i = 0; + for (; i + sizeof(uint64_t) <= len; i += sizeof(uint64_t)) { + uint64_t v = rng(); + memcpy(buf + i, &v, sizeof(v)); + } + for (; i < len; ++i) buf[i] = rng() & 0xff; +} + +/* Verify that a crypto instance can perform a basic encrypt operation. */ +static bool probe_encrypt(std::unique_ptr& crypt, + const string& err_msg) { + if (!crypt) { cerr << err_msg << "\n"; return false; } + bufferlist in, out; + buffer::ptr buf(4096); + fill_random(reinterpret_cast(buf.c_str()), 4096); + in.append(buf); + if (!crypt->encrypt(in, 0, 4096, out, 0, null_yield)) { + cerr << err_msg << "\n"; + return false; + } + return true; +} + +/* Throughput display unit, set once from --unit CLI option. */ +static const char* tp_unit = "MB/s"; +static double tp_divisor = 1024.0 * 1024.0; + +struct BenchResult { + string label; + size_t data_size; + int iterations; + double elapsed_sec; + + double throughput() const { + if (elapsed_sec <= 0) return 0; + return static_cast(data_size) * iterations / elapsed_sec / tp_divisor; + } + double ops_per_sec() const { + if (elapsed_sec <= 0) return 0; + return iterations / elapsed_sec; + } +}; + +using CryptFactory = std::function()>; +using BenchFn = std::function; + +static int probe_iters(double target_time, double min_sec, + int warmup_iters, BlockCrypt& crypt, BenchFn& fn) { + for (int i = 0; i < warmup_iters; ++i) { + if (!fn(crypt)) return 0; + } + int probe_n = 10; + double probe_sec = 0; + do { + auto t0 = std::chrono::high_resolution_clock::now(); + for (int i = 0; i < probe_n; ++i) { + if (!fn(crypt)) return 0; + } + auto t1 = std::chrono::high_resolution_clock::now(); + probe_sec = std::chrono::duration(t1 - t0).count(); + if (probe_sec < min_sec) probe_n *= 2; + } while (probe_sec < min_sec); + return std::max(static_cast(target_time / (probe_sec / probe_n)), 100); +} + +static void spin_barrier(std::atomic& barrier, int target) { + barrier.fetch_add(1, std::memory_order_release); + while (barrier.load(std::memory_order_acquire) < target) { +#ifdef __x86_64__ + _mm_pause(); +#else + std::this_thread::yield(); +#endif + } +} + +static BenchResult run_bench(const string& label, size_t data_size, + int warmup_iters, double target_time, + int fixed_iters, int nthreads, + CryptFactory factory, BenchFn bench_fn) { + if (nthreads <= 1) { + auto crypt = factory(); + if (!crypt) { + cerr << "FAILED: " << label << "\n"; + return {label, data_size, 0, 0.0}; + } + int iters = fixed_iters; + if (iters <= 0) + iters = probe_iters(target_time, 0.1, warmup_iters, *crypt, bench_fn); + if (iters == 0) return {label, data_size, 0, 0.0}; + auto start = std::chrono::high_resolution_clock::now(); + for (int i = 0; i < iters; ++i) { + if (!bench_fn(*crypt)) { + cerr << "FAILED at iteration " << i << ": " << label << "\n"; + return {label, data_size, i, 0.0}; + } + } + auto end = std::chrono::high_resolution_clock::now(); + return {label, data_size, iters, + std::chrono::duration(end - start).count()}; + } + + int iters = fixed_iters; + if (iters <= 0) { + const double probe_dur = 0.25; + std::vector per_thread_count(nthreads); + std::atomic barrier{0}; + std::vector threads; + std::atomic probe_failed{false}; + for (int t = 0; t < nthreads; ++t) { + threads.emplace_back([&, t]() { + auto crypt = factory(); + if (!crypt) { probe_failed.store(true); } + if (!probe_failed.load()) { + for (int i = 0; i < warmup_iters; ++i) bench_fn(*crypt); + } + spin_barrier(barrier, nthreads); + if (probe_failed.load()) return; + int count = 0; + auto t0 = std::chrono::high_resolution_clock::now(); + double elapsed = 0; + do { + bench_fn(*crypt); + ++count; + elapsed = std::chrono::duration( + std::chrono::high_resolution_clock::now() - t0).count(); + } while (elapsed < probe_dur); + per_thread_count[t] = count; + }); + } + for (auto& t : threads) t.join(); + int slowest = *std::min_element(per_thread_count.begin(), + per_thread_count.end()); + iters = std::max(static_cast(slowest * (target_time / probe_dur)), + 100); + } + + std::vector thread_elapsed(nthreads); + std::atomic barrier{0}; + std::atomic failed{false}; + + auto thread_body = [&](int tid) { + auto crypt = factory(); + if (!crypt) { failed.store(true); } + if (!failed.load()) { + for (int i = 0; i < warmup_iters; ++i) { + if (!bench_fn(*crypt)) { failed.store(true); break; } + } + } + // always reach the barrier so other threads don't spin forever + spin_barrier(barrier, nthreads); + if (failed.load()) return; + auto start = std::chrono::high_resolution_clock::now(); + for (int i = 0; i < iters; ++i) { + if (!bench_fn(*crypt)) { failed.store(true); return; } + } + auto end = std::chrono::high_resolution_clock::now(); + thread_elapsed[tid] = std::chrono::duration(end - start).count(); + }; + + std::vector threads; + for (int t = 0; t < nthreads; ++t) + threads.emplace_back(thread_body, t); + for (auto& t : threads) t.join(); + + if (failed.load()) { + cerr << "FAILED: " << label << "\n"; + return {label, data_size, 0, 0.0}; + } + + return {label, data_size, iters * nthreads, + *std::max_element(thread_elapsed.begin(), thread_elapsed.end())}; +} + +static void print_header() { + cout << std::left + << std::setw(36) << "Benchmark" + << std::setw(12) << "DataSize" + << std::setw(10) << "Iters" + << std::setw(12) << "Time(s)" + << std::setw(14) << "Throughput" + << std::setw(12) << "Ops/s" + << "\n" << string(96, '-') << "\n"; +} + +static void print_result(const BenchResult& r) { + cout << std::left + << std::setw(36) << r.label + << std::setw(12) << human_bytes(r.data_size) + << std::setw(10) << r.iterations + << std::setw(12) << std::fixed << std::setprecision(4) << r.elapsed_sec + << std::setw(14) << (std::to_string(static_cast(r.throughput())) + " " + tp_unit) + << std::setw(12) << std::fixed << std::setprecision(1) << r.ops_per_sec() + << "\n"; +} + +int main(int argc, char** argv) { + auto args = argv_to_vec(argc, argv); + auto cct = global_init(nullptr, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, + CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); + + int warmup = 10; + int iterations = 0; + size_t fixed_size = 0; + double target_time = 2.0; + int num_threads = 1; + string accel; + // parse args (not argv) — global_init() already stripped Ceph options + for (size_t i = 0; i < args.size(); ++i) { + string arg(args[i]); + if ((arg == "--size" || arg == "-s") && i + 1 < args.size()) { + fixed_size = std::stoull(args[++i]); + } else if (arg == "--iterations" && i + 1 < args.size()) { + iterations = std::stoi(args[++i]); + } else if ((arg == "--warmup" || arg == "-w") && i + 1 < args.size()) { + warmup = std::stoi(args[++i]); + } else if ((arg == "--time" || arg == "-t") && i + 1 < args.size()) { + target_time = std::stod(args[++i]); + } else if (arg == "--accel" && i + 1 < args.size()) { + accel = args[++i]; + } else if ((arg == "--threads" || arg == "-j") && i + 1 < args.size()) { + num_threads = std::stoi(args[++i]); + } else if (arg == "--unit" && i + 1 < args.size()) { + string u = args[++i]; + if (u == "KB") { tp_unit = "KB/s"; tp_divisor = 1024.0; } + else if (u == "MB") { tp_unit = "MB/s"; tp_divisor = 1024.0 * 1024.0; } + else if (u == "GB") { tp_unit = "GB/s"; tp_divisor = 1024.0 * 1024.0 * 1024.0; } + else if (u == "TB") { tp_unit = "TB/s"; tp_divisor = 1024.0 * 1024.0 * 1024.0 * 1024.0; } + else { + cerr << "Invalid --unit '" << u << "'. Supported: KB, MB, GB, TB\n"; + return 2; + } + } else if (arg == "--help" || arg == "-h") { + cout << "Usage: " << argv[0] << " [options]\n" + << " --accel Crypto plugin (crypto_isal, crypto_openssl, crypto_qat)\n" + << " --size, -s Fixed data size (default: run multiple sizes)\n" + << " --iterations Fixed iteration count (default: auto-scale)\n" + << " --warmup, -w Warmup iterations (default: 10)\n" + << " --time, -t Target time for auto-scaling (default: 2.0)\n" + << " --threads, -j Number of threads (default: 1)\n" + << " --unit Throughput unit: KB, MB, GB, TB (default: MB)\n" + << " --help, -h Show this help\n"; + return 0; + } + } + + if (!accel.empty()) { + if (!is_supported_accel(accel)) { + cerr << "Invalid --accel '" << accel << "'\n"; + return 2; + } + int r = g_ceph_context->_conf.set_val( + "plugin_crypto_accelerator", accel); + if (r < 0) { + cerr << "Failed to set plugin_crypto_accelerator\n"; + return 2; + } + } + + g_ceph_context->_conf.apply_changes(nullptr); + common_init_finish(g_ceph_context); + + string active_accel = g_ceph_context->_conf->plugin_crypto_accelerator; + if (!is_supported_accel(active_accel)) { + cerr << "Unsupported plugin_crypto_accelerator='" << active_accel << "'\n"; + return 2; + } + + { + auto* reg = g_ceph_context->get_plugin_registry(); + if (!reg->get_with_load("crypto", active_accel)) { + cerr << "Failed to load crypto plugin '" << active_accel << "'\n"; + return 1; + } + } + + static constexpr size_t AES_256_KEYSIZE = 32; + uint8_t key[AES_256_KEYSIZE]; + fill_random(key, sizeof(key)); + const NoDoutPrefix no_dpp(g_ceph_context, dout_subsys); + + { + auto cbc = AES_256_CBC_create(&no_dpp, g_ceph_context, key, AES_256_KEYSIZE); + if (!probe_encrypt(cbc, "CBC probe failed for '" + active_accel + + "'. Plugin loaded but hardware may not be available.")) + return 1; + } + + const bool have_gcm = (active_accel != "crypto_qat"); + if (have_gcm) { + auto gcm = AES_256_GCM_create(&no_dpp, g_ceph_context, key, AES_256_KEYSIZE); + if (!probe_encrypt(gcm, "GCM probe failed for '" + active_accel + "'.")) + return 1; + } + + vector sizes; + if (fixed_size > 0) { + sizes.push_back(fixed_size); + } else { + sizes = {4096, 65536, 262144, 1048576, 4194304, 16777216}; + } + + cout << "\n=== RGW BlockCrypt Benchmark ===\n\n" + << "Accelerator: " << active_accel << "\n" + << "Threads: " << num_threads << "\n" + << "Modes: AES-256-CBC"; + if (have_gcm) cout << ", AES-256-GCM"; + else cout << " (GCM skipped)"; + cout << "\nWarmup: " << warmup << " iterations\n"; + if (iterations > 0) + cout << "Iters: " << iterations << " (fixed, per thread)\n"; + else + cout << "Target: ~" << std::fixed << std::setprecision(1) + << target_time << "s per benchmark\n"; + cout << "\n"; + + vector results; + string suffix = (num_threads > 1) + ? " [" + active_accel + " x" + std::to_string(num_threads) + "]" + : " [" + active_accel + "]"; + auto emit = [&](BenchResult r) { + print_result(r); + cout << std::flush; + results.push_back(std::move(r)); + }; + auto dec_fn = [](string data, size_t len) -> BenchFn { + return [data, len](BlockCrypt& c) -> bool { + bufferlist in, out; + in.append(data); + return c.decrypt(in, 0, len, out, 0, null_yield); + }; + }; + print_header(); + + for (size_t sz : sizes) { + buffer::ptr plain_buf(sz); + fill_random(reinterpret_cast(plain_buf.c_str()), sz); + bufferlist input; + input.append(plain_buf); + string input_str = input.to_str(); + + auto cbc_factory = [&]() { + return AES_256_CBC_create(&no_dpp, g_ceph_context, key, AES_256_KEYSIZE); + }; + auto enc_fn = [input_str, sz](BlockCrypt& c) -> bool { + bufferlist in, out; + in.append(input_str); + return c.encrypt(in, 0, sz, out, 0, null_yield); + }; + + auto bench = [&](const string& lbl, size_t sz, CryptFactory f, BenchFn fn) { + emit(run_bench(lbl, sz, warmup, target_time, iterations, num_threads, + std::move(f), std::move(fn))); + }; + + bench("CBC-Encrypt" + suffix, sz, cbc_factory, enc_fn); + + { + auto pre = cbc_factory(); + bufferlist ct; + pre->encrypt(input, 0, sz, ct, 0, null_yield); + bench("CBC-Decrypt" + suffix, sz, cbc_factory, dec_fn(ct.to_str(), sz)); + } + + if (have_gcm) { + auto gcm_factory = [&]() { + return AES_256_GCM_create(&no_dpp, g_ceph_context, key, AES_256_KEYSIZE); + }; + + bench("GCM-Encrypt" + suffix, sz, gcm_factory, enc_fn); + + { + auto pre = gcm_factory(); + bufferlist ct; + pre->encrypt(input, 0, sz, ct, 0, null_yield); + string salt = AES_256_GCM_get_salt(pre.get()); + size_t enc_sz = ct.length(); + auto dec_factory = [&, salt]() { + return AES_256_GCM_create(&no_dpp, g_ceph_context, key, AES_256_KEYSIZE, + reinterpret_cast(salt.data()), salt.size()); + }; + bench("GCM-Decrypt" + suffix, sz, dec_factory, dec_fn(ct.to_str(), enc_sz)); + } + } + + cout << "\n"; + } + + cout << "\n=== Summary (Throughput in " << tp_unit << ") ===\n\n" + << std::left << std::setw(14) << "DataSize" + << std::setw(16) << "CBC-Encrypt" + << std::setw(16) << "CBC-Decrypt"; + if (have_gcm) + cout << std::setw(16) << "GCM-Encrypt" + << std::setw(16) << "GCM-Decrypt"; + cout << "\n" << string(46 + (have_gcm ? 32 : 0), '-') << "\n"; + + for (size_t sz : sizes) { + cout << std::setw(14) << human_bytes(sz); + for (const auto& r : results) { + if (r.data_size == sz) + cout << std::setw(16) << std::fixed << std::setprecision(0) + << r.throughput(); + } + cout << "\n"; + } + + cout << "\n"; + return 0; +}