From: Marcel Lauhoff Date: Thu, 19 Dec 2024 14:41:30 +0000 (+0100) Subject: rgw: SSE-KMS Secrets Cache X-Git-Tag: v21.0.1~14^2~10 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=b6186fc596dfab6442fb55f179ca72e8bc0fde60;p=ceph.git rgw: SSE-KMS Secrets Cache Add SSE Key Management System secrets cache to RGW. It is common to have secrets shared by many if not all objects in a bucket. Without RGW-side caching every PUT/GET will cause a request to an external KSM. This not only adds load to the KSM, but also slows down read and writes. Combine WebCache, ceph::async::call_once and LinuxKeyringSecret into KMSCache. WebCache stores async::once_result to wrap results of a KMS secret fetch to mitigate cache stampedes (concurrent cache requests to the same key coalesce into one). The retrieved secrets are stored in the Linux kernel key retention service (LinuxKeyringSecret) for safe keeping and retrial by subsequent requests. KMSCache adds a TTL reaper and life cycle. Cache values and error handling: The cache stores positive fetch results, permanent errors (e.g key does not exists) and transient errors (e.g fetch timeout). Each with a different TTL. Unit tests to cover cached / uncached KMS retrieve and runtime cache disable via config. Add perf counter `kms_fetch_lat` to track KMS fetch request latency and error counters to track permanent, transient and key store errors. Signed-off-by: Marcel Lauhoff Fixes: https://tracker.ceph.com/issues/68524 On-behalf-of: SAP marcel.lauhoff@sap.com --- diff --git a/src/common/options/rgw.yaml.in b/src/common/options/rgw.yaml.in index dfe1b8a76d3..9c96127a804 100644 --- a/src/common/options/rgw.yaml.in +++ b/src/common/options/rgw.yaml.in @@ -3106,12 +3106,80 @@ options: with_legacy: true # extra keys that may be used for aws:kms # defined as map "key1=YmluCmJvb3N0CmJvb3N0LQ== key2=b3V0CnNyYwpUZXN0aW5nCg==" +- name: rgw_crypt_s3_kms_cache_enabled + type: bool + level: advanced + desc: Enable caching of encryption keys for SSE-KMS. + default: true + with_legacy: true + services: + - rgw + see_also: + - rgw_crypt_s3_kms_cache_max_size + - rgw_crypt_s3_kms_cache_positive_ttl + - rgw_crypt_s3_kms_cache_transient_error_ttl + - rgw_crypt_s3_kms_cache_negative_ttl +- name: rgw_crypt_s3_kms_cache_max_size + type: uint + level: advanced + desc: Maximum number of SSE-KMS secrets cached. + Each key consumes 32 byte (AES-256) + overhead in memory + default: 128 + with_legacy: true + services: + - rgw + see_also: + - rgw_crypt_s3_kms_cache_enabled +- name: rgw_crypt_s3_kms_cache_positive_ttl + type: uint + level: advanced + desc: Time in seconds after which the KMS cache evicts successful lookups. + services: + - rgw + with_legacy: true + default: 60 + min: 10 + max: 3600 + see_also: + - rgw_crypt_s3_kms_cache_enabled - name: rgw_crypt_s3_kms_encryption_keys type: str level: dev services: - rgw with_legacy: true +- name: rgw_crypt_s3_kms_cache_transient_error_ttl + type: uint + level: advanced + desc: Time in seconds after which the KMS cache evicts entries representing transient errors (timeouts, temporary outages, misconfiguration, etc). + services: + - rgw + with_legacy: true + default: 10 + min: 0 + max: 3600 + see_also: + - rgw_crypt_s3_kms_cache_enabled +- name: rgw_crypt_s3_kms_cache_negative_ttl + type: uint + level: advanced + desc: Time in seconds after which the KMS cache evicts permanent look-up errors (e.g key does not exist). + services: + - rgw + with_legacy: true + default: 120 + min: 0 + max: 3600 + see_also: + - rgw_crypt_s3_kms_cache_enabled +- name: rgw_crypt_s3_kms_testing_delay + type: uint + level: dev + desc: Add delay in milliseconds to the 'testing' KMS key retrieval. + services: + - rgw + with_legacy: true + default: 0 - name: rgw_crypt_vault_auth type: str level: advanced diff --git a/src/rgw/CMakeLists.txt b/src/rgw/CMakeLists.txt index 3d9db673db4..b7069132316 100644 --- a/src/rgw/CMakeLists.txt +++ b/src/rgw/CMakeLists.txt @@ -131,6 +131,7 @@ set(librgw_common_srcs rgw_rest_iam.cc rgw_object_lock.cc rgw_kms.cc + rgw_kms_cache.cc rgw_kmip_client.cc rgw_url.cc rgw_oidc_provider.cc diff --git a/src/rgw/rgw_appmain.cc b/src/rgw/rgw_appmain.cc index 16a79f21d52..9c90863419a 100644 --- a/src/rgw/rgw_appmain.cc +++ b/src/rgw/rgw_appmain.cc @@ -26,6 +26,7 @@ #include "include/compat.h" #include "include/str_list.h" #include "include/stringify.h" +#include "rgw_kms_cache.h" #include "rgw_main.h" #include "rgw_asio_thread.h" #include "rgw_common.h" @@ -463,6 +464,12 @@ int rgw::AppMain::init_frontends2(RGWLib* rgwlib) } else if (framework == "beast") { fe = new RGWAsioFrontend(env, config, *sched_ctx, context_pool_holder.get()); + if (g_conf()->rgw_crypt_s3_kms_cache_enabled) { + env.kms_cache->initialize_ttl_reaper( + g_conf()->rgw_beast_enable_async + ? std::optional(context_pool_holder.get().get_executor()) + : nullopt); + } } else if (framework == "rgw-nfs") { fe = new RGWLibFrontend(env, config); @@ -598,6 +605,15 @@ void rgw::AppMain::init_dedup() } #endif +void rgw::AppMain::init_kms_cache() +{ + if (!g_conf().get_val("rgw_crypt_s3_kms_cache_enabled")) { + return; + } + env.kms_cache = std::make_unique( + dpp->get_cct(), Keyring::get_best()); +} + void rgw::AppMain::shutdown(std::function finalize_async_signals) { // stop the realm reloader @@ -618,6 +634,8 @@ void rgw::AppMain::shutdown(std::function finalize_async_signals) } #endif + env.kms_cache.reset(); + for (auto& fe : fes) { fe->stop(); } diff --git a/src/rgw/rgw_crypt.cc b/src/rgw/rgw_crypt.cc index f671e3ddd88..e572779af3a 100644 --- a/src/rgw/rgw_crypt.cc +++ b/src/rgw/rgw_crypt.cc @@ -20,6 +20,7 @@ #include "crypto/crypto_plugin.h" #include "rgw/rgw_kms.h" #include "rgw_range_projection.h" +#include "rgw/rgw_process_env.h" #include "rapidjson/document.h" #include "rapidjson/writer.h" #include "rapidjson/error/error.h" @@ -2195,7 +2196,7 @@ int rgw_s3_prepare_encrypt(req_state* s, optional_yield y, set_attr(attrs, RGW_ATTR_CRYPT_KEYID, key_id); set_attr(attrs, RGW_ATTR_CRYPT_CONTEXT, cooked_context); std::string actual_key; - res = make_actual_key_from_kms(s, attrs, y, actual_key); + res = make_actual_key_from_kms(s, attrs, s->penv.kms_cache.get(), y, actual_key); if (res != 0) { ldpp_dout(s, 5) << "ERROR: failed to retrieve actual key from key_id: " << key_id << dendl; s->err.message = "Failed to retrieve the actual key, kms-keyid: " + std::string(key_id); @@ -2694,7 +2695,8 @@ int rgw_s3_prepare_decrypt(req_state* s, optional_yield y, /* try to retrieve actual key */ std::string key_id = get_str_attribute(attrs, RGW_ATTR_CRYPT_KEYID); std::string actual_key; - res = reconstitute_actual_key_from_kms(s, attrs, y, actual_key); + + res = reconstitute_actual_key_from_kms(s, attrs, s->penv.kms_cache.get(), y, actual_key); if (res != 0) { ldpp_dout(s, 10) << "ERROR: failed to retrieve actual key from key_id: " << key_id << dendl; s->err.message = "Failed to retrieve the actual key, kms-keyid: " + key_id; @@ -2729,7 +2731,7 @@ int rgw_s3_prepare_decrypt(req_state* s, optional_yield y, /* try to retrieve actual key */ std::string key_id = get_str_attribute(attrs, RGW_ATTR_CRYPT_KEYID); std::string actual_key; - res = reconstitute_actual_key_from_kms(s, attrs, y, actual_key); + res = reconstitute_actual_key_from_kms(s, attrs, s->penv.kms_cache.get(), y, actual_key); if (res != 0) { ldpp_dout(s, 10) << "ERROR: failed to retrieve actual key from key_id: " << key_id << dendl; s->err.message = "Failed to retrieve the actual key, kms-keyid: " + key_id; diff --git a/src/rgw/rgw_kms.cc b/src/rgw/rgw_kms.cc index e089b4ec25f..a691290c054 100644 --- a/src/rgw/rgw_kms.cc +++ b/src/rgw/rgw_kms.cc @@ -13,6 +13,9 @@ #include "rgw/rgw_b64.h" #include "rgw/rgw_kms.h" #include "rgw/rgw_kmip_client.h" +#include "rgw/rgw_perf_counters.h" +#include "rgw_kms_cache.h" +#include "rgw_string.h" #include #include #include @@ -1180,41 +1183,70 @@ public: }; }; -int reconstitute_actual_key_from_kms(const DoutPrefixProvider *dpp, - map& attrs, - optional_yield y, - std::string& actual_key) -{ +static int maybe_cache_kms_fetch( + const DoutPrefixProvider* dpp, const std::string& cache_prefix, + const std::string& key_id, rgw::kms::KMSCache* kms_cache, + const kms::KMSCache::FetchFn& fetch, std::string& actual_key, + optional_yield y) { + if (kms_cache == nullptr || + !dpp->get_cct()->_conf->rgw_crypt_s3_kms_cache_enabled) { + const auto ret = fetch(actual_key); + if (ret == -ENOENT) { + perfcounter->inc(l_rgw_kms_error_permanent); + } else if (ret < 0) { + perfcounter->inc(l_rgw_kms_error_transient); + } + return ret; + } + return kms_cache->do_cache(dpp, cache_prefix, key_id, fetch, actual_key, y); +} + +int reconstitute_actual_key_from_kms( + const DoutPrefixProvider* dpp, map& attrs, + rgw::kms::KMSCache* kms_cache, optional_yield y, std::string& actual_key) { std::string key_id = get_str_attribute(attrs, RGW_ATTR_CRYPT_KEYID); - KMSContext kctx { dpp->get_cct() }; - const std::string &kms_backend { kctx.backend() }; + KMSContext kctx{dpp->get_cct()}; + const std::string& kms_backend{kctx.backend()}; - ldpp_dout(dpp, 20) << "Getting KMS encryption key for key " << key_id << dendl; + ldpp_dout(dpp, 20) << "Getting KMS encryption key for key " << key_id + << dendl; ldpp_dout(dpp, 20) << "SSE-KMS backend is " << kms_backend << dendl; - if (RGW_SSE_KMS_BACKEND_BARBICAN == kms_backend) { - return get_actual_key_from_barbican(dpp, key_id, y, actual_key); - } - - if (RGW_SSE_KMS_BACKEND_VAULT == kms_backend) { - return reconstitute_actual_key_from_vault(dpp, kctx, attrs, y, actual_key); - } + const auto fetch = [&](std::string& out_secret) -> int { + PerfGuard perf(perfcounter, l_rgw_kms_fetch_lat); + if (RGW_SSE_KMS_BACKEND_BARBICAN == kms_backend) { + return get_actual_key_from_barbican(dpp, key_id, y, out_secret); + } - if (RGW_SSE_KMS_BACKEND_KMIP == kms_backend) { - return get_actual_key_from_kmip(dpp, key_id, y, actual_key); - } + if (RGW_SSE_KMS_BACKEND_VAULT == kms_backend) { + return reconstitute_actual_key_from_vault( + dpp, kctx, attrs, y, out_secret); + } - if (RGW_SSE_KMS_BACKEND_TESTING == kms_backend) { - std::string key_selector = get_str_attribute(attrs, RGW_ATTR_CRYPT_KEYSEL); - return get_actual_key_from_conf(dpp, key_id, key_selector, actual_key); - } + if (RGW_SSE_KMS_BACKEND_KMIP == kms_backend) { + return get_actual_key_from_kmip(dpp, key_id, y, out_secret); + } - ldpp_dout(dpp, 0) << "ERROR: Invalid rgw_crypt_s3_kms_backend: " << kms_backend << dendl; - return -EINVAL; + if (RGW_SSE_KMS_BACKEND_TESTING == kms_backend) { + std::string key_selector = + get_str_attribute(attrs, RGW_ATTR_CRYPT_KEYSEL); + std::this_thread::sleep_for( + std::chrono::milliseconds( + dpp->get_cct()->_conf->rgw_crypt_s3_kms_testing_delay)); + return get_actual_key_from_conf(dpp, key_id, key_selector, out_secret); + } + ldpp_dout(dpp, 0) << "ERROR: Invalid rgw_crypt_s3_kms_backend: " + << kms_backend << dendl; + return -EINVAL; + }; + const std::string cache_prefix = string_cat_reserve("kms_", kms_backend); + return maybe_cache_kms_fetch( + dpp, cache_prefix, key_id, kms_cache, fetch, actual_key, y); } int make_actual_key_from_kms(const DoutPrefixProvider *dpp, map& attrs, + rgw::kms::KMSCache* kms_cache, optional_yield y, std::string& actual_key) { @@ -1222,7 +1254,7 @@ int make_actual_key_from_kms(const DoutPrefixProvider *dpp, const std::string &kms_backend { kctx.backend() }; if (RGW_SSE_KMS_BACKEND_VAULT == kms_backend) return make_actual_key_from_vault(dpp, kctx, attrs, y, actual_key); - return reconstitute_actual_key_from_kms(dpp, attrs, y, actual_key); + return reconstitute_actual_key_from_kms(dpp, attrs, kms_cache, y, actual_key); } int reconstitute_actual_key_from_sse_s3(const DoutPrefixProvider *dpp, diff --git a/src/rgw/rgw_kms.h b/src/rgw/rgw_kms.h index b0c29faa5eb..a2eccea19ee 100644 --- a/src/rgw/rgw_kms.h +++ b/src/rgw/rgw_kms.h @@ -22,6 +22,10 @@ static const std::string RGW_SSE_KMS_VAULT_SE_KV = "kv"; static const std::string RGW_SSE_KMS_KMIP_SE_KV = "kv"; +namespace rgw::kms { + class KMSCache; +} + /** * Retrieves the actual server-side encryption key from a KMS system given a * key ID. Currently supported KMS systems are OpenStack Barbican and HashiCorp @@ -34,10 +38,12 @@ static const std::string RGW_SSE_KMS_KMIP_SE_KV = "kv"; */ int make_actual_key_from_kms(const DoutPrefixProvider *dpp, std::map& attrs, + rgw::kms::KMSCache* kms_cache, optional_yield y, std::string& actual_key); int reconstitute_actual_key_from_kms(const DoutPrefixProvider *dpp, std::map& attrs, + rgw::kms::KMSCache* kms_cache, optional_yield y, std::string& actual_key); int make_actual_key_from_sse_s3(const DoutPrefixProvider *dpp, diff --git a/src/rgw/rgw_kms_cache.cc b/src/rgw/rgw_kms_cache.cc new file mode 100644 index 00000000000..76d5aef89b8 --- /dev/null +++ b/src/rgw/rgw_kms_cache.cc @@ -0,0 +1,256 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2025 Clyso GmbH + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "rgw_kms_cache.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/async/yield_context.h" +#include "common/dout.h" +#include "common/dout_fmt.h" +#include "common/keyring.h" +#include "include/ceph_assert.h" +#include "include/function2.hpp" +#include "rgw_perf_counters.h" +#include "rgw_string.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rgw + +namespace rgw::kms { + +std::jthread KMSCache::make_ttl_reaper_thread( + CephContext* cct, KMSSecretCache& cache, std::chrono::seconds ttl) { + return std::jthread([cct, &cache, ttl](std::stop_token stop) { + const std::string thread_name = fmt::format("{}-ttl-reaper", cache.name()); + ceph_pthread_setname(thread_name.c_str()); + std::mutex mutex; + std::condition_variable cond; + std::stop_callback on_stop(stop, [&cond]() { cond.notify_all(); }); + ldout(cct, 10) << "KMS Cache: Starting TTL reaper thread " << thread_name + << "/" << std::hex << std::this_thread::get_id() << std::dec + << ", running every " << ttl << dendl; + while (!stop.stop_requested()) { + std::unique_lock lock(mutex); + if (cond.wait_for(lock, ttl, [&stop] { return stop.stop_requested(); })) { + break; + } + const auto expired_count = cache.expire_erase(); + ldout(cct, 20) << "KMS Cache: TTL reaper thread expired " << expired_count + << " entries" << dendl; + } + ldout(cct, 10) << "KMS Cache: Stopping TTL reaper thread " << thread_name + << "/" << std::hex << std::this_thread::get_id() << std::dec + << dendl; + }); +} + +std::future KMSCache::make_ttl_reaper_async( + CephContext* cct, KMSSecretCache& cache, std::chrono::seconds ttl, + const boost::asio::strand& strand, + boost::asio::cancellation_signal& cancel_signal) { + return boost::asio::spawn( + strand, + [cct, &cache, ttl](const boost::asio::yield_context& yield) { + ldout(cct, 10) << "KMS Cache: Starting async TTL reaper, running every " + << ttl << dendl; + boost::asio::steady_timer timer(yield.get_executor()); + while (true) { + timer.expires_after(ttl); + boost::system::error_code ec; + timer.async_wait(yield[ec]); + if (ec == boost::asio::error::operation_aborted) { + ldout(cct, 10) << "KMS Cache: Stopping async TTL reaper" << dendl; + break; + } + const auto expired_count = cache.expire_erase(); + ldout(cct, 20) << "KMS Cache: Async TTL reaper expired " + << expired_count << " entries" << dendl; + } + }, + boost::asio::bind_cancellation_slot( + cancel_signal.slot(), + boost::asio::bind_executor(strand, boost::asio::use_future))); +} + +KMSCache::KMSCache(CephContext* _cct, std::unique_ptr _keyring) + : cct(_cct), keyring(std::move(_keyring)) { + ldout(cct, 10) << fmt::format( + "KMS Cache: Initializing size:{} TTL pos:{} " + "neg:{} err:{}", + cct->_conf->rgw_crypt_s3_kms_cache_max_size, + cct->_conf->rgw_crypt_s3_kms_cache_positive_ttl, + cct->_conf->rgw_crypt_s3_kms_cache_negative_ttl, + cct->_conf->rgw_crypt_s3_kms_cache_transient_error_ttl) + << dendl; + cache = std::make_unique( + cct, "kms-cache", cct->_conf->rgw_crypt_s3_kms_cache_max_size, + std::chrono::seconds(cct->_conf->rgw_crypt_s3_kms_cache_positive_ttl)); + + std::error_code ec; + if (!keyring->supported(&ec)) { + ldout(cct, 1) << "KMS Cache: " << keyring->name() << " unsupported (error " + << ec << "). Disabling Cache." << dendl; + cct->_conf->rgw_crypt_s3_kms_cache_enabled = false; + } +} + +KMSCache::~KMSCache() { + stop_ttl_reaper(); +} + +void KMSCache::initialize_ttl_reaper( + std::optional executor) { + ceph_assert(cache); + if (reaper_initialized()) { + return; + } + const auto min_ttl_secs = std::min( + {cct->_conf->rgw_crypt_s3_kms_cache_positive_ttl, + cct->_conf->rgw_crypt_s3_kms_cache_negative_ttl, + cct->_conf->rgw_crypt_s3_kms_cache_transient_error_ttl}); + if (executor.has_value()) { + auto& state = reaper_state.emplace(executor.value()); + state.done = make_ttl_reaper_async( + cct, *cache, std::chrono::seconds(min_ttl_secs), + state.strand, state.cancel_signal); + } else { + reaper_state.emplace(make_ttl_reaper_thread( + cct, *cache, std::chrono::seconds(min_ttl_secs))); + } +} + +void KMSCache::stop_ttl_reaper() { + ceph_assert(cache); + std::visit( + fu2::overload( + [](const std::monostate& mono) {}, + [](AsyncState& async_state) { + boost::asio::dispatch( + async_state.strand, + [&signal = async_state.cancel_signal]() { + signal.emit(boost::asio::cancellation_type::terminal); + }); + try { + async_state.done.wait(); + } catch (const std::future_error& e) { + if (e.code() != std::future_errc::no_state) throw; + } + }, + [&](const std::jthread&) { reaper_state.emplace(); }), + reaper_state); +} + +void KMSCache::clear_cache() const { + cache->clear(); +} + +int KMSCache::do_cache( + const DoutPrefixProvider* dpp, const std::string& key_prefix_kms, + const std::string& key_id, const FetchFn& fetch, std::string& actual_key, + optional_yield y) { + static std::string_view key_prefix("rgw_sse_"); + const std::string cache_key = + string_cat_reserve(key_prefix, key_prefix_kms, "_", key_id); + std::shared_ptr value = + cache->lookup_or(key_id, std::make_shared()); + auto result = call_once( + *value, y, + [&dpp, &fetch, &cache_key, &value, &key_prefix_kms, &key_id, + this]() -> KMSCache::CacheResult { + std::string secret; + const int ret = fetch(secret); + ldpp_dout(dpp, 20) << "KMS Cache: " << cache_key + << " call_once fetched with ret " << ret << dendl; + + if (ret == -ENOENT) { // key does not exists, treat as permanent error + ldpp_dout(dpp, 15) + << "KMS Cache: " << cache_key + << " key does not exists. treating as permanent error " << dendl; + cache->update_ttl_if( + cache_key, value, + std::chrono::seconds( + dpp->get_cct()->_conf->rgw_crypt_s3_kms_cache_negative_ttl)); + perfcounter->inc(l_rgw_kms_error_permanent); + return tl::unexpected(ret); + } else if (ret < 0) { // treat other errors as transient + ldpp_dout(dpp, 15) + << "KMS Cache: " << cache_key << " fetch error (" << ret << ") " + << std::strerror(ret) << " treating as transient error " << dendl; + cache->update_ttl_if( + cache_key, value, + std::chrono::seconds( + dpp->get_cct() + ->_conf->rgw_crypt_s3_kms_cache_transient_error_ttl)); + perfcounter->inc(l_rgw_kms_error_transient); + return tl::unexpected(ret); + } + + // This function might be in flight for the same key_id more than + // once. The keyring key must, however, be unique to not refer + // (and remove) the same key twice. + uuid_d uuid; + uuid.generate_random(); + const std::string keyring_key = string_cat_reserve( + key_prefix, key_prefix_kms, "_", key_id, "_v", uuid.to_string()); + auto keyring_secret = keyring->add(keyring_key, secret); + ceph::crypto::zeroize_for_security(secret.data(), secret.length()); + if (!keyring_secret) { + ldpp_dout(dpp, 5) + << "KMS Cache: " << cache_key << " keyring add error (" + << keyring_secret.error() + << "). removing from cache. disabling cache." << dendl; + cache->remove_if(cache_key, value); + disable_cache(); + perfcounter->inc(l_rgw_kms_error_secret_store); + return tl::unexpected(-ERR_INTERNAL_ERROR); + } + return std::move(keyring_secret.value()); + }); + + ldpp_dout_fmt( + dpp, 20, "KMS Cache: {} -> {}/{}", cache_key, + result && result.has_value() ? fmt::format("{}", *result.value()) : "-", + !result ? result.error() : 0); + + if (result) { + if (auto ret = result.value()->read(actual_key); ret.value() != 0) { + ldpp_dout(dpp, 5) << "KMS Cache: " << cache_key << " keyring " + << *result.value() << " read error (" << ret + << "). removing from cache. disabling cache." << dendl; + cache->remove_if(cache_key, value); + disable_cache(); + perfcounter->inc(l_rgw_kms_error_secret_store); + return -ERR_INTERNAL_ERROR; + } + return 0; + } else { + return result.error(); + } +} + +} // namespace rgw::kms diff --git a/src/rgw/rgw_kms_cache.h b/src/rgw/rgw_kms_cache.h new file mode 100644 index 00000000000..9fce8c80a41 --- /dev/null +++ b/src/rgw/rgw_kms_cache.h @@ -0,0 +1,111 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2025 Clyso GmbH + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#pragma once + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "common/async/call_once.h" +#include "common/async/yield_context.h" +#include "common/dout.h" +#include "common/keyring.h" +#include "common/web_cache.h" +#include "include/expected.hpp" + +namespace rgw::kms { + +// Cache RGW KMS Secrets +// +// Manages a WebCache instance plus TTL reaper. Adds cache stampede +// mitigation when fetching new values using (async) call once. +class KMSCache { + CephContext* cct; + + public: + using SharedSecret = std::shared_ptr; + using CacheResult = tl::expected; + using CacheValue = ceph::async::once_result; + using KMSSecretCache = webcache::WebCache; + using FetchFn = std::function; + + protected: + std::unique_ptr cache; + std::unique_ptr keyring; + + // The TTL Reaper is either a service thread we own or async running + // on an executor elsewhere (where we keep a strand and cancellation + // signal). + struct AsyncState { + boost::asio::strand strand; + boost::asio::cancellation_signal cancel_signal; + std::future done; + explicit AsyncState(boost::asio::io_context::executor_type ex) + : strand(boost::asio::make_strand(ex)) {} + }; + std::variant reaper_state; + + public: + KMSCache() = delete; + KMSCache(const KMSCache&) = delete; + KMSCache(KMSCache&&) = delete; + KMSCache& operator=(const KMSCache&) = delete; + KMSCache& operator=(KMSCache&&) = delete; + KMSCache(CephContext* _cct, std::unique_ptr _keyring); + virtual ~KMSCache(); + + // Initialize a TTL reaper. Either threaded or async depending on + // the executor parameter + void initialize_ttl_reaper( + std::optional executor); + + // Stop any type of TTL reaper running + void stop_ttl_reaper(); + + [[nodiscard]] bool reaper_initialized() const noexcept { + return !std::holds_alternative(reaper_state); + } + + static std::jthread make_ttl_reaper_thread( + CephContext* cct, KMSSecretCache& cache, std::chrono::seconds ttl); + + static std::future make_ttl_reaper_async(CephContext* cct, + KMSSecretCache& cache, std::chrono::seconds ttl, + const boost::asio::strand& strand, + boost::asio::cancellation_signal& cancel_signal); + + void clear_cache() const; + + // Retrieve a cached KMS secret or fetch, cache, and return a KMS + // secret. + // + // The secret is returned via actual_key and it is up to the caller + // to properly clear the secret from memory. The cache may be shared + // between SSE-KMS and SSE-S3, where key_kms_prefix is available to + // partition the key namespace. + int do_cache( + const DoutPrefixProvider* dpp, const std::string& key_kms_prefix, + const std::string& key_id, const FetchFn& fetch, std::string& actual_key, + optional_yield y); + + void disable_cache() { cct->_conf->rgw_crypt_s3_kms_cache_enabled = false; } +}; +} // namespace rgw::kms diff --git a/src/rgw/rgw_main.cc b/src/rgw/rgw_main.cc index 5c5c1487934..ebdff90c333 100644 --- a/src/rgw/rgw_main.cc +++ b/src/rgw/rgw_main.cc @@ -165,6 +165,7 @@ int main(int argc, char *argv[]) main.init_opslog(); main.init_tracepoints(); main.init_lua(); + main.init_kms_cache(); #ifdef WITH_RADOSGW_RADOS main.init_dedup(); #endif diff --git a/src/rgw/rgw_main.h b/src/rgw/rgw_main.h index 95eb92509b1..66f4c932af7 100644 --- a/src/rgw/rgw_main.h +++ b/src/rgw/rgw_main.h @@ -138,6 +138,7 @@ public: int init_frontends2(RGWLib* rgwlib = nullptr); void init_tracepoints(); void init_lua(); + void init_kms_cache(); #ifdef WITH_RADOSGW_RADOS void init_dedup(); #endif diff --git a/src/rgw/rgw_perf_counters.cc b/src/rgw/rgw_perf_counters.cc index 6019e449977..ae9c5776f3b 100644 --- a/src/rgw/rgw_perf_counters.cc +++ b/src/rgw/rgw_perf_counters.cc @@ -63,6 +63,11 @@ void add_rgw_frontend_counters(PerfCountersBuilder *pcb) { pcb->add_u64_counter(l_rgw_d4n_cache_hits, "d4n_cache_hits", "D4N cache hits"); pcb->add_u64_counter(l_rgw_d4n_cache_misses, "d4n_cache_misses", "D4N cache misses"); pcb->add_u64_counter(l_rgw_d4n_cache_evictions, "d4n_cache_evictions", "D4N cache evictions"); + + pcb->add_time_avg(l_rgw_kms_fetch_lat, "kms_fetch_lat", "Uncached KMS secret fetch latency"); + pcb->add_u64_counter(l_rgw_kms_error_permanent, "kms_error_permanent", "Permanent (e.g key not found) errors returned from KMS"); + pcb->add_u64_counter(l_rgw_kms_error_transient, "kms_error_transient", "Trainsient (e.g timeout, overloaded) errors returned from KMS"); + pcb->add_u64_counter(l_rgw_kms_error_secret_store, "kms_error_secret_store", "Secrt store errors (e.g kernel keyring quota)"); } void add_rgw_op_counters(PerfCountersBuilder *lpcb) { diff --git a/src/rgw/rgw_perf_counters.h b/src/rgw/rgw_perf_counters.h index e93b4fe36f9..85e515804c8 100644 --- a/src/rgw/rgw_perf_counters.h +++ b/src/rgw/rgw_perf_counters.h @@ -53,6 +53,10 @@ enum { l_rgw_d4n_cache_misses, l_rgw_d4n_cache_evictions, + l_rgw_kms_fetch_lat, + l_rgw_kms_error_transient, + l_rgw_kms_error_permanent, + l_rgw_kms_error_secret_store, l_rgw_last, }; diff --git a/src/rgw/rgw_process_env.h b/src/rgw/rgw_process_env.h index 9fbaf8d38a2..c40e6f2d67a 100644 --- a/src/rgw/rgw_process_env.h +++ b/src/rgw/rgw_process_env.h @@ -6,6 +6,7 @@ #include #include "rgw_auth_registry.h" +#include "rgw_kms_cache.h" class ActiveRateLimiter; class OpsLogSink; @@ -50,6 +51,7 @@ struct RGWProcessEnv { std::unique_ptr olog; std::unique_ptr auth_registry; ActiveRateLimiter* ratelimiting = nullptr; + std::unique_ptr kms_cache; #ifdef WITH_ARROW_FLIGHT // managed by rgw:flight::FlightFrontend in rgw_flight_frontend.cc diff --git a/src/test/rgw/CMakeLists.txt b/src/test/rgw/CMakeLists.txt index c587b5834dc..acc7c7f9634 100644 --- a/src/test/rgw/CMakeLists.txt +++ b/src/test/rgw/CMakeLists.txt @@ -335,6 +335,13 @@ target_include_directories(unittest_rgw_kms SYSTEM PRIVATE "${CMAKE_SOURCE_DIR}/src/rgw") target_link_libraries(unittest_rgw_kms ${rgw_libs}) +# unittest_rgw_kms_cache +add_executable(unittest_rgw_kms_cache test_rgw_kms_cache.cc) +add_ceph_unittest(unittest_rgw_kms_cache) +target_include_directories(unittest_rgw_kms_cache + SYSTEM PRIVATE "${CMAKE_SOURCE_DIR}/src/rgw") +target_link_libraries(unittest_rgw_kms_cache ${rgw_libs}) + # unittest_rgw_url add_executable(unittest_rgw_url test_rgw_url.cc) add_ceph_unittest(unittest_rgw_url) diff --git a/src/test/rgw/test_rgw_kms_cache.cc b/src/test/rgw/test_rgw_kms_cache.cc new file mode 100644 index 00000000000..15004a02e5f --- /dev/null +++ b/src/test/rgw/test_rgw_kms_cache.cc @@ -0,0 +1,368 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/web_cache.h" +#include "rgw_common.h" +#include "rgw_kms.h" +#include "rgw_kms_cache.h" +#include "rgw_perf_counters.h" + +class FakeKeyringSecret : public KeyringSecret { + public: + std::string key; + std::string val; + + FakeKeyringSecret(std::string _key, std::string _val) + : key(std::move(_key)), val(std::move(_val)) {}; + + std::error_code read(std::string& out) const override { + out = val; + return {}; + } + std::error_code remove() const override { return {}; } + bool initialized() const override { return true; } +}; + +class FakeKeyring : public Keyring { + public: + tl::expected, std::error_code> add( + const std::string& key, const std::string& val) noexcept override { + return std::make_unique(key, val); + } + bool supported(std::error_code* ec) noexcept override { return true; } + std::string_view name() const noexcept override { return "fake"; }; +}; + +class TestKMSCacheReaperLifecycle : public ::testing::Test, + public rgw::kms::KMSCache { + public: + TestKMSCacheReaperLifecycle() + : rgw::kms::KMSCache(g_ceph_context, std::make_unique()) {}; +}; + +static void rethrow(const std::exception_ptr& eptr) { + if (eptr) { + std::rethrow_exception(eptr); + } +} + +TEST_F(TestKMSCacheReaperLifecycle, Threaded) { + initialize_ttl_reaper(std::nullopt); + EXPECT_TRUE(reaper_initialized()); + EXPECT_TRUE(std::holds_alternative(reaper_state)); + + stop_ttl_reaper(); + EXPECT_FALSE(reaper_initialized()); + EXPECT_FALSE(std::holds_alternative(reaper_state)); +} + +TEST_F(TestKMSCacheReaperLifecycle, NoInit) { + EXPECT_FALSE(reaper_initialized()); + EXPECT_TRUE(std::holds_alternative(reaper_state)); + this->stop_ttl_reaper(); + EXPECT_FALSE(reaper_initialized()); + EXPECT_TRUE(std::holds_alternative(reaper_state)); +} + +TEST_F(TestKMSCacheReaperLifecycle, Async) { + boost::asio::io_context io; + auto work = boost::asio::make_work_guard(io); + std::jthread io_thread([&]() { + io.run(); + }); + auto drain = [&] { + boost::asio::post(io, boost::asio::use_future).get(); + }; + this->initialize_ttl_reaper(io.get_executor()); + drain(); + EXPECT_TRUE(reaper_initialized()); + EXPECT_TRUE( + std::holds_alternative(reaper_state)); + stop_ttl_reaper(); + drain(); + work.reset(); + io.restart(); + EXPECT_EQ(io.run(), 0); +} + +class TestKMSCache : public ::testing::Test { + protected: + CephContext* cct = g_ceph_context; + const NoDoutPrefix no_dpp{cct, ceph_subsys_rgw}; + + public: + std::unique_ptr uut = + std::make_unique( + cct, std::make_unique()); + + void TearDown() override { perfcounter->reset(); } +}; + +TEST_F(TestKMSCache, TransientFetchError) { + std::string tmp; + ASSERT_EQ( + uut->do_cache( + &no_dpp, "testing", "foo", [](std::string&) { return -EINVAL; }, tmp, + null_yield), + -EINVAL); + ASSERT_EQ(perfcounter->get(l_rgw_kms_error_transient), 1); + ASSERT_EQ(perfcounter->get(l_rgw_kms_error_permanent), 0); + ASSERT_EQ(perfcounter->get(l_rgw_kms_error_secret_store), 0); + ASSERT_TRUE(cct->_conf->rgw_crypt_s3_kms_cache_enabled); +} + +TEST_F(TestKMSCache, PermanentFetchError) { + std::string tmp; + ASSERT_EQ( + uut->do_cache( + &no_dpp, "testing", "foo", [](std::string&) { return -ENOENT; }, tmp, + null_yield), + -ENOENT); + ASSERT_EQ(perfcounter->get(l_rgw_kms_error_transient), 0); + ASSERT_EQ(perfcounter->get(l_rgw_kms_error_permanent), 1); + ASSERT_EQ(perfcounter->get(l_rgw_kms_error_secret_store), 0); + ASSERT_TRUE(cct->_conf->rgw_crypt_s3_kms_cache_enabled); +} + +TEST_F(TestKMSCache, Cache) { + const std::string test_key = "37u481923789123u72189ou3jsdf978of"; + std::string_view test_val = "dlksafj3029jfmsjf8322ty7nghb67435"; + std::string cache_return; + ASSERT_EQ( + uut->do_cache( + &no_dpp, "testing", test_key, + [&](std::string& val) { + val = test_val; + return 0; + }, + cache_return, null_yield), + 0); + EXPECT_EQ(perfcounter->get(l_rgw_kms_error_transient), 0); + EXPECT_EQ(perfcounter->get(l_rgw_kms_error_permanent), 0); + EXPECT_EQ(perfcounter->get(l_rgw_kms_error_secret_store), 0); + EXPECT_TRUE(cct->_conf->rgw_crypt_s3_kms_cache_enabled); + ASSERT_EQ(cache_return, test_val); + + cache_return.clear(); + ASSERT_EQ( + uut->do_cache( + &no_dpp, "testing", test_key, + [&](std::string&) -> int { + EXPECT_TRUE(false) << "fetch must not be called"; + return -2342; + }, + cache_return, null_yield), + 0); + EXPECT_EQ(perfcounter->get(l_rgw_kms_error_transient), 0); + EXPECT_EQ(perfcounter->get(l_rgw_kms_error_permanent), 0); + EXPECT_EQ(perfcounter->get(l_rgw_kms_error_secret_store), 0); + EXPECT_TRUE(cct->_conf->rgw_crypt_s3_kms_cache_enabled); + ASSERT_EQ(cache_return, test_val); +} + +class TestSSEKMSWithTestingKMS : public ::testing::Test { + protected: + CephContext* cct = g_ceph_context; + const NoDoutPrefix no_dpp{cct, ceph_subsys_rgw}; + std::map attrs = { + {RGW_ATTR_CRYPT_KEYID, + []() { + bufferlist bl; + bl.append("foo"); + return bl; + }()}, + {RGW_ATTR_CRYPT_KEYSEL, []() { + // AES_ECB(32*"#").decrypt(32*"*") + bufferlist bl; + bl.append( + "\xc6\xb1/\x12\xdc\xf7" + "e" + "\xe3;\xea\x14\xa4x\x1f" + "bX" + "\xc6\xb1/\x12\xdc\xf7" + "e" + "\xe3;\xea\x14\xa4x\x1f" + "bX"); + return bl; + }()}}; + rgw::kms::KMSCache* uut; + PerfCounters* cache_perf = nullptr; + + void SetUp() override { + // Simulate RGW app KMSCache livecyle. A single instance started + // during app init and may be disabled via config. + uut = &cct->lookup_or_create_singleton_object( + "TestSSEKMSWithTestingKMS::kms-cache", false, cct, + std::make_unique()); + cct->get_perfcounters_collection()->with_counters( + [&](const PerfCountersCollectionImpl::CounterMap& by_path) { + for (const auto& i : by_path) { + const auto& perf_counters = i.second.perf_counters; + if (perf_counters->get_name() == "kms-cache") { + cache_perf = perf_counters; + return; + } + } + }); + + ASSERT_NE(perfcounter, nullptr); + ASSERT_NE(cache_perf, nullptr); + } + + void TearDown() override { + JSONFormatter f(true); + cache_perf->dump_formatted(&f, false, select_labeled_t::labeled); + f.flush(std::cout); + cct->get_perfcounters_collection()->with_counters( + [&](const PerfCountersCollectionImpl::CounterMap& by_path) { + for (const auto& i : by_path) { + const auto& perf_counters = i.second.perf_counters; + if (perf_counters->get_name() == "rgw") { + auto [sum, count] = + perf_counters->get_tavg_ns(l_rgw_kms_fetch_lat); + fmt::println( + std::cout, + "RGW KMS perf counters: err_trans={} err_perm={} err_sec={} " + "avg_fetch_lat={} fetch_cnt={}", + perf_counters->get(l_rgw_kms_error_transient), + perf_counters->get(l_rgw_kms_error_permanent), + perf_counters->get(l_rgw_kms_error_secret_store), + std::chrono::nanoseconds(sum / count), count); + return; + } + } + }); + uut->clear_cache(); + } + + void test_do_reconstitue() { + std::string actual_key; + const int ret = reconstitute_actual_key_from_kms( + &no_dpp, attrs, uut, null_yield, actual_key); + ASSERT_EQ(ret, 0); + ASSERT_EQ(actual_key, "********************************"); + ASSERT_EQ(perfcounter->get(l_rgw_kms_error_secret_store), 0); + ASSERT_EQ(perfcounter->get(l_rgw_kms_error_permanent), 0); + ASSERT_EQ(perfcounter->get(l_rgw_kms_error_transient), 0); + } +}; + +TEST_F( + TestSSEKMSWithTestingKMS, TestReconstituteActualKeyFromKMSBasicsDefault) { + EXPECT_EQ(perfcounter->get_tavg_ns(l_rgw_kms_fetch_lat).second, 0); + ASSERT_TRUE(cct->_conf->rgw_crypt_s3_kms_cache_enabled); + test_do_reconstitue(); + EXPECT_EQ(perfcounter->get_tavg_ns(l_rgw_kms_fetch_lat).second, 1); + EXPECT_EQ(cache_perf->get(static_cast(webcache::Metric::hit)), 0); + EXPECT_EQ(cache_perf->get(static_cast(webcache::Metric::miss)), 1); + EXPECT_EQ(cache_perf->get(static_cast(webcache::Metric::size)), 1); + EXPECT_EQ(cache_perf->get(static_cast(webcache::Metric::clear)), 0); + EXPECT_EQ( + cache_perf->get(static_cast(webcache::Metric::capacity)), + cct->_conf->rgw_crypt_s3_kms_cache_max_size); +} + +TEST_F( + TestSSEKMSWithTestingKMS, + TestReconstituteActualKeyFromKMSBasicsWithoutCache) { + cct->_conf.set_val("rgw_crypt_s3_kms_cache_enabled", "false"); + EXPECT_EQ(perfcounter->get_tavg_ns(l_rgw_kms_fetch_lat).second, 1); + test_do_reconstitue(); + EXPECT_EQ(cache_perf->get(static_cast(webcache::Metric::hit)), 0); + EXPECT_EQ(cache_perf->get(static_cast(webcache::Metric::miss)), 0); + EXPECT_EQ(cache_perf->get(static_cast(webcache::Metric::size)), 0); + ASSERT_FALSE(cct->_conf->rgw_crypt_s3_kms_cache_enabled); + EXPECT_EQ(perfcounter->get_tavg_ns(l_rgw_kms_fetch_lat).second, 2); +} + +TEST_F( + TestSSEKMSWithTestingKMS, TestReconstituteActualKeyFromKMSBasicsWithCache) { + ASSERT_EQ(perfcounter->get_tavg_ns(l_rgw_kms_fetch_lat).second, 2); + + cct->_conf.set_val("rgw_crypt_s3_kms_cache_enabled", "true"); + test_do_reconstitue(); + EXPECT_EQ(perfcounter->get_tavg_ns(l_rgw_kms_fetch_lat).second, 3); + EXPECT_EQ(cache_perf->get(static_cast(webcache::Metric::hit)), 0); + EXPECT_EQ(cache_perf->get(static_cast(webcache::Metric::miss)), 1); + EXPECT_EQ(cache_perf->get(static_cast(webcache::Metric::size)), 1); + + test_do_reconstitue(); + EXPECT_EQ(perfcounter->get_tavg_ns(l_rgw_kms_fetch_lat).second, 3); + EXPECT_EQ(cache_perf->get(static_cast(webcache::Metric::hit)), 1); + EXPECT_EQ(cache_perf->get(static_cast(webcache::Metric::miss)), 1); + EXPECT_EQ(cache_perf->get(static_cast(webcache::Metric::size)), 1); + + test_do_reconstitue(); + EXPECT_EQ(perfcounter->get_tavg_ns(l_rgw_kms_fetch_lat).second, 3); + EXPECT_EQ(cache_perf->get(static_cast(webcache::Metric::hit)), 2); + EXPECT_EQ(cache_perf->get(static_cast(webcache::Metric::miss)), 1); + EXPECT_EQ(cache_perf->get(static_cast(webcache::Metric::size)), 1); +} + +TEST_F(TestSSEKMSWithTestingKMS, TestRuntimeEnableDisable) { + ASSERT_EQ(perfcounter->get_tavg_ns(l_rgw_kms_fetch_lat).second, 3); + + cct->_conf.set_val("rgw_crypt_s3_kms_cache_enabled", "true"); + test_do_reconstitue(); + EXPECT_EQ(perfcounter->get_tavg_ns(l_rgw_kms_fetch_lat).second, 4); + EXPECT_EQ(cache_perf->get(static_cast(webcache::Metric::hit)), 0); + EXPECT_EQ(cache_perf->get(static_cast(webcache::Metric::miss)), 1); + EXPECT_EQ(cache_perf->get(static_cast(webcache::Metric::size)), 1); + + cct->_conf.set_val("rgw_crypt_s3_kms_cache_enabled", "false"); + test_do_reconstitue(); + EXPECT_EQ(perfcounter->get_tavg_ns(l_rgw_kms_fetch_lat).second, 5); + EXPECT_EQ(cache_perf->get(static_cast(webcache::Metric::hit)), 0); + EXPECT_EQ(cache_perf->get(static_cast(webcache::Metric::miss)), 1); + EXPECT_EQ(cache_perf->get(static_cast(webcache::Metric::size)), 1); + + cct->_conf.set_val("rgw_crypt_s3_kms_cache_enabled", "true"); + test_do_reconstitue(); + EXPECT_EQ(perfcounter->get_tavg_ns(l_rgw_kms_fetch_lat).second, 5); + EXPECT_EQ(cache_perf->get(static_cast(webcache::Metric::hit)), 1); + EXPECT_EQ(cache_perf->get(static_cast(webcache::Metric::miss)), 1); + EXPECT_EQ(cache_perf->get(static_cast(webcache::Metric::size)), 1); + + cct->_conf.set_val("rgw_crypt_s3_kms_cache_enabled", "false"); + uut->clear_cache(); + test_do_reconstitue(); + EXPECT_EQ(perfcounter->get_tavg_ns(l_rgw_kms_fetch_lat).second, 6); + EXPECT_EQ(cache_perf->get(static_cast(webcache::Metric::hit)), 0); + EXPECT_EQ(cache_perf->get(static_cast(webcache::Metric::miss)), 0); + EXPECT_EQ(cache_perf->get(static_cast(webcache::Metric::size)), 0); +} + +int main(int argc, char** argv) { + auto args = argv_to_vec(argc, argv); + std::map defaults{ + {"rgw_crypt_s3_kms_backend", RGW_SSE_KMS_BACKEND_TESTING}, + {"rgw_crypt_s3_kms_encryption_keys", + "foo=IyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyM="}, + {"debug_rgw", "20"}}; + auto cct = global_init( + &defaults, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, + CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); + common_init_finish(g_ceph_context); + rgw_perf_start(g_ceph_context); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +}