]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
rgw: SSE-KMS Secrets Cache
authorMarcel Lauhoff <marcel.lauhoff@clyso.com>
Thu, 19 Dec 2024 14:41:30 +0000 (15:41 +0100)
committerMarcel Lauhoff <marcel.lauhoff@clyso.com>
Mon, 1 Jun 2026 17:04:10 +0000 (19:04 +0200)
Add SSE Key Management System secrets cache to RGW.

It is common to have secrets shared by many if not all objects in a
bucket. Without RGW-side caching every PUT/GET will cause a request to
an external KSM. This not only adds load to the KSM, but also slows
down read and writes.

Combine WebCache, ceph::async::call_once and LinuxKeyringSecret into
KMSCache. WebCache stores async::once_result to wrap results of a KMS
secret fetch to mitigate cache stampedes (concurrent cache requests to
the same key coalesce into one). The retrieved secrets are stored in
the Linux kernel key retention service (LinuxKeyringSecret) for safe
keeping and retrial by subsequent requests. KMSCache adds a TTL reaper
and life cycle.

Cache values and error handling: The cache stores positive
fetch results, permanent errors (e.g key does not exists) and
transient errors (e.g fetch timeout). Each with a different TTL.

Unit tests to cover cached / uncached KMS retrieve and runtime cache
disable via config.

Add perf counter `kms_fetch_lat` to track KMS fetch request latency
and error counters to track permanent, transient and key store
errors.

Signed-off-by: Marcel Lauhoff <marcel.lauhoff@clyso.com>
Fixes: https://tracker.ceph.com/issues/68524
On-behalf-of: SAP marcel.lauhoff@sap.com

15 files changed:
src/common/options/rgw.yaml.in
src/rgw/CMakeLists.txt
src/rgw/rgw_appmain.cc
src/rgw/rgw_crypt.cc
src/rgw/rgw_kms.cc
src/rgw/rgw_kms.h
src/rgw/rgw_kms_cache.cc [new file with mode: 0644]
src/rgw/rgw_kms_cache.h [new file with mode: 0644]
src/rgw/rgw_main.cc
src/rgw/rgw_main.h
src/rgw/rgw_perf_counters.cc
src/rgw/rgw_perf_counters.h
src/rgw/rgw_process_env.h
src/test/rgw/CMakeLists.txt
src/test/rgw/test_rgw_kms_cache.cc [new file with mode: 0644]

index dfe1b8a76d38f4fce6678178fcce3eedb58106be..9c96127a804c3c7c1335fb00c4d893111fa12abb 100644 (file)
@@ -3106,12 +3106,80 @@ options:
   with_legacy: true
 # extra keys that may be used for aws:kms
 # defined as map "key1=YmluCmJvb3N0CmJvb3N0LQ== key2=b3V0CnNyYwpUZXN0aW5nCg=="
+- name: rgw_crypt_s3_kms_cache_enabled
+  type: bool
+  level: advanced
+  desc: Enable caching of encryption keys for SSE-KMS.
+  default: true
+  with_legacy: true
+  services:
+  - rgw
+  see_also:
+  - rgw_crypt_s3_kms_cache_max_size
+  - rgw_crypt_s3_kms_cache_positive_ttl
+  - rgw_crypt_s3_kms_cache_transient_error_ttl
+  - rgw_crypt_s3_kms_cache_negative_ttl
+- name: rgw_crypt_s3_kms_cache_max_size
+  type: uint
+  level: advanced
+  desc: Maximum number of SSE-KMS secrets cached.
+    Each key consumes 32 byte (AES-256) + overhead in memory
+  default: 128
+  with_legacy: true
+  services:
+  - rgw
+  see_also:
+  - rgw_crypt_s3_kms_cache_enabled
+- name: rgw_crypt_s3_kms_cache_positive_ttl
+  type: uint
+  level: advanced
+  desc: Time in seconds after which the KMS cache evicts successful lookups.
+  services:
+  - rgw
+  with_legacy: true
+  default: 60
+  min: 10
+  max: 3600
+  see_also:
+  - rgw_crypt_s3_kms_cache_enabled
 - name: rgw_crypt_s3_kms_encryption_keys
   type: str
   level: dev
   services:
   - rgw
   with_legacy: true
+- name: rgw_crypt_s3_kms_cache_transient_error_ttl
+  type: uint
+  level: advanced
+  desc: Time in seconds after which the KMS cache evicts entries representing transient errors (timeouts, temporary outages, misconfiguration, etc).
+  services:
+  - rgw
+  with_legacy: true
+  default: 10
+  min: 0
+  max: 3600
+  see_also:
+  - rgw_crypt_s3_kms_cache_enabled
+- name: rgw_crypt_s3_kms_cache_negative_ttl
+  type: uint
+  level: advanced
+  desc: Time in seconds after which the KMS cache evicts permanent look-up errors (e.g key does not exist).
+  services:
+  - rgw
+  with_legacy: true
+  default: 120
+  min: 0
+  max: 3600
+  see_also:
+  - rgw_crypt_s3_kms_cache_enabled
+- name: rgw_crypt_s3_kms_testing_delay
+  type: uint
+  level: dev
+  desc: Add delay in milliseconds to the 'testing' KMS key retrieval.
+  services:
+  - rgw
+  with_legacy: true
+  default: 0
 - name: rgw_crypt_vault_auth
   type: str
   level: advanced
index 3d9db673db4284df965cf94860a782f0ac8eeca7..b706913231682df3fc4d72b806a87fc39d33c374 100644 (file)
@@ -131,6 +131,7 @@ set(librgw_common_srcs
   rgw_rest_iam.cc
   rgw_object_lock.cc
   rgw_kms.cc
+  rgw_kms_cache.cc
   rgw_kmip_client.cc
   rgw_url.cc
   rgw_oidc_provider.cc
index 16a79f21d528b23cdb16a13ae21dd69c3b056f1b..9c90863419a177d7a9578ab9da0bb2b01f890a31 100644 (file)
@@ -26,6 +26,7 @@
 #include "include/compat.h"
 #include "include/str_list.h"
 #include "include/stringify.h"
+#include "rgw_kms_cache.h"
 #include "rgw_main.h"
 #include "rgw_asio_thread.h"
 #include "rgw_common.h"
@@ -463,6 +464,12 @@ int rgw::AppMain::init_frontends2(RGWLib* rgwlib)
     }
     else if (framework == "beast") {
       fe = new RGWAsioFrontend(env, config, *sched_ctx, context_pool_holder.get());
+      if (g_conf()->rgw_crypt_s3_kms_cache_enabled) {
+        env.kms_cache->initialize_ttl_reaper(
+            g_conf()->rgw_beast_enable_async
+            ? std::optional(context_pool_holder.get().get_executor())
+                : nullopt);
+      }
     }
     else if (framework == "rgw-nfs") {
       fe = new RGWLibFrontend(env, config);
@@ -598,6 +605,15 @@ void rgw::AppMain::init_dedup()
 }
 #endif
 
+void rgw::AppMain::init_kms_cache()
+{
+  if (!g_conf().get_val<bool>("rgw_crypt_s3_kms_cache_enabled")) {
+    return;
+  }
+  env.kms_cache = std::make_unique<rgw::kms::KMSCache>(
+      dpp->get_cct(), Keyring::get_best());
+}
+
 void rgw::AppMain::shutdown(std::function<void(void)> finalize_async_signals)
 {
   // stop the realm reloader
@@ -618,6 +634,8 @@ void rgw::AppMain::shutdown(std::function<void(void)> finalize_async_signals)
   }
 #endif
 
+  env.kms_cache.reset();
+
   for (auto& fe : fes) {
     fe->stop();
   }
index f671e3ddd88a4a6392540c484ffd2723b80701c2..e572779af3a6d01be9428b5c058baea78a1561be 100644 (file)
@@ -20,6 +20,7 @@
 #include "crypto/crypto_plugin.h"
 #include "rgw/rgw_kms.h"
 #include "rgw_range_projection.h"
+#include "rgw/rgw_process_env.h"
 #include "rapidjson/document.h"
 #include "rapidjson/writer.h"
 #include "rapidjson/error/error.h"
@@ -2195,7 +2196,7 @@ int rgw_s3_prepare_encrypt(req_state* s, optional_yield y,
         set_attr(attrs, RGW_ATTR_CRYPT_KEYID, key_id);
         set_attr(attrs, RGW_ATTR_CRYPT_CONTEXT, cooked_context);
         std::string actual_key;
-        res = make_actual_key_from_kms(s, attrs, y, actual_key);
+        res = make_actual_key_from_kms(s, attrs, s->penv.kms_cache.get(), y, actual_key);
         if (res != 0) {
           ldpp_dout(s, 5) << "ERROR: failed to retrieve actual key from key_id: " << key_id << dendl;
           s->err.message = "Failed to retrieve the actual key, kms-keyid: " + std::string(key_id);
@@ -2694,7 +2695,8 @@ int rgw_s3_prepare_decrypt(req_state* s, optional_yield y,
     /* try to retrieve actual key */
     std::string key_id = get_str_attribute(attrs, RGW_ATTR_CRYPT_KEYID);
     std::string actual_key;
-    res = reconstitute_actual_key_from_kms(s, attrs, y, actual_key);
+
+    res = reconstitute_actual_key_from_kms(s, attrs, s->penv.kms_cache.get(), y, actual_key);
     if (res != 0) {
       ldpp_dout(s, 10) << "ERROR: failed to retrieve actual key from key_id: " << key_id << dendl;
       s->err.message = "Failed to retrieve the actual key, kms-keyid: " + key_id;
@@ -2729,7 +2731,7 @@ int rgw_s3_prepare_decrypt(req_state* s, optional_yield y,
     /* try to retrieve actual key */
     std::string key_id = get_str_attribute(attrs, RGW_ATTR_CRYPT_KEYID);
     std::string actual_key;
-    res = reconstitute_actual_key_from_kms(s, attrs, y, actual_key);
+    res = reconstitute_actual_key_from_kms(s, attrs, s->penv.kms_cache.get(), y, actual_key);
     if (res != 0) {
       ldpp_dout(s, 10) << "ERROR: failed to retrieve actual key from key_id: " << key_id << dendl;
       s->err.message = "Failed to retrieve the actual key, kms-keyid: " + key_id;
index e089b4ec25faceff4d8e4d7e61efb2369757935d..a691290c05430691188e70e85a8d1b14c49d0595 100644 (file)
@@ -13,6 +13,9 @@
 #include "rgw/rgw_b64.h"
 #include "rgw/rgw_kms.h"
 #include "rgw/rgw_kmip_client.h"
+#include "rgw/rgw_perf_counters.h"
+#include "rgw_kms_cache.h"
+#include "rgw_string.h"
 #include <rapidjson/allocators.h>
 #include <rapidjson/document.h>
 #include <rapidjson/writer.h>
@@ -1180,41 +1183,70 @@ public:
   };
 };
 
-int reconstitute_actual_key_from_kms(const DoutPrefixProvider *dpp,
-                                     map<string, bufferlist>& attrs,
-                                     optional_yield y,
-                                     std::string& actual_key)
-{
+static int maybe_cache_kms_fetch(
+    const DoutPrefixProvider* dpp, const std::string& cache_prefix,
+    const std::string& key_id, rgw::kms::KMSCache* kms_cache,
+    const kms::KMSCache::FetchFn& fetch, std::string& actual_key,
+    optional_yield y) {
+  if (kms_cache == nullptr ||
+      !dpp->get_cct()->_conf->rgw_crypt_s3_kms_cache_enabled) {
+    const auto ret = fetch(actual_key);
+    if (ret == -ENOENT) {
+      perfcounter->inc(l_rgw_kms_error_permanent);
+    } else if (ret < 0) {
+      perfcounter->inc(l_rgw_kms_error_transient);
+    }
+    return ret;
+  }
+  return kms_cache->do_cache(dpp, cache_prefix, key_id, fetch, actual_key, y);
+}
+
+int reconstitute_actual_key_from_kms(
+    const DoutPrefixProvider* dpp, map<string, bufferlist>& attrs,
+    rgw::kms::KMSCache* kms_cache, optional_yield y, std::string& actual_key) {
   std::string key_id = get_str_attribute(attrs, RGW_ATTR_CRYPT_KEYID);
-  KMSContext kctx { dpp->get_cct() };
-  const std::string &kms_backend { kctx.backend() };
+  KMSContext kctx{dpp->get_cct()};
+  const std::string& kms_backend{kctx.backend()};
 
-  ldpp_dout(dpp, 20) << "Getting KMS encryption key for key " << key_id << dendl;
+  ldpp_dout(dpp, 20) << "Getting KMS encryption key for key " << key_id
+                     << dendl;
   ldpp_dout(dpp, 20) << "SSE-KMS backend is " << kms_backend << dendl;
 
-  if (RGW_SSE_KMS_BACKEND_BARBICAN == kms_backend) {
-    return get_actual_key_from_barbican(dpp, key_id, y, actual_key);
-  }
-
-  if (RGW_SSE_KMS_BACKEND_VAULT == kms_backend) {
-    return reconstitute_actual_key_from_vault(dpp, kctx, attrs, y, actual_key);
-  }
+  const auto fetch = [&](std::string& out_secret) -> int {
+    PerfGuard perf(perfcounter, l_rgw_kms_fetch_lat);
+    if (RGW_SSE_KMS_BACKEND_BARBICAN == kms_backend) {
+      return get_actual_key_from_barbican(dpp, key_id, y, out_secret);
+    }
 
-  if (RGW_SSE_KMS_BACKEND_KMIP == kms_backend) {
-    return get_actual_key_from_kmip(dpp, key_id, y, actual_key);
-  }
+    if (RGW_SSE_KMS_BACKEND_VAULT == kms_backend) {
+      return reconstitute_actual_key_from_vault(
+          dpp, kctx, attrs, y, out_secret);
+    }
 
-  if (RGW_SSE_KMS_BACKEND_TESTING == kms_backend) {
-    std::string key_selector = get_str_attribute(attrs, RGW_ATTR_CRYPT_KEYSEL);
-    return get_actual_key_from_conf(dpp, key_id, key_selector, actual_key);
-  }
+    if (RGW_SSE_KMS_BACKEND_KMIP == kms_backend) {
+      return get_actual_key_from_kmip(dpp, key_id, y, out_secret);
+    }
 
-  ldpp_dout(dpp, 0) << "ERROR: Invalid rgw_crypt_s3_kms_backend: " << kms_backend << dendl;
-  return -EINVAL;
+    if (RGW_SSE_KMS_BACKEND_TESTING == kms_backend) {
+      std::string key_selector =
+          get_str_attribute(attrs, RGW_ATTR_CRYPT_KEYSEL);
+      std::this_thread::sleep_for(
+          std::chrono::milliseconds(
+              dpp->get_cct()->_conf->rgw_crypt_s3_kms_testing_delay));
+      return get_actual_key_from_conf(dpp, key_id, key_selector, out_secret);
+    }
+    ldpp_dout(dpp, 0) << "ERROR: Invalid rgw_crypt_s3_kms_backend: "
+                      << kms_backend << dendl;
+    return -EINVAL;
+  };
+  const std::string cache_prefix = string_cat_reserve("kms_", kms_backend);
+  return maybe_cache_kms_fetch(
+      dpp, cache_prefix, key_id, kms_cache, fetch, actual_key, y);
 }
 
 int make_actual_key_from_kms(const DoutPrefixProvider *dpp,
                              map<string, bufferlist>& attrs,
+                             rgw::kms::KMSCache* kms_cache,
                              optional_yield y,
                              std::string& actual_key)
 {
@@ -1222,7 +1254,7 @@ int make_actual_key_from_kms(const DoutPrefixProvider *dpp,
   const std::string &kms_backend { kctx.backend() };
   if (RGW_SSE_KMS_BACKEND_VAULT == kms_backend)
     return make_actual_key_from_vault(dpp, kctx, attrs, y, actual_key);
-  return reconstitute_actual_key_from_kms(dpp, attrs, y, actual_key);
+  return reconstitute_actual_key_from_kms(dpp, attrs, kms_cache, y, actual_key);
 }
 
 int reconstitute_actual_key_from_sse_s3(const DoutPrefixProvider *dpp,
index b0c29faa5ebbf8ff55b850b4c6a2bdf0f1e3d4eb..a2eccea19ee28010018b8d1d4727b7379ba88bff 100644 (file)
@@ -22,6 +22,10 @@ static const std::string RGW_SSE_KMS_VAULT_SE_KV = "kv";
 
 static const std::string RGW_SSE_KMS_KMIP_SE_KV = "kv";
 
+namespace rgw::kms {
+  class KMSCache;
+}
+
 /**
  * Retrieves the actual server-side encryption key from a KMS system given a
  * key ID. Currently supported KMS systems are OpenStack Barbican and HashiCorp
@@ -34,10 +38,12 @@ static const std::string RGW_SSE_KMS_KMIP_SE_KV = "kv";
  */
 int make_actual_key_from_kms(const DoutPrefixProvider *dpp,
                              std::map<std::string, bufferlist>& attrs,
+                             rgw::kms::KMSCache* kms_cache,
                              optional_yield y,
                              std::string& actual_key);
 int reconstitute_actual_key_from_kms(const DoutPrefixProvider *dpp,
                                      std::map<std::string, bufferlist>& attrs,
+                                     rgw::kms::KMSCache* kms_cache,
                                      optional_yield y,
                                      std::string& actual_key);
 int make_actual_key_from_sse_s3(const DoutPrefixProvider *dpp,
diff --git a/src/rgw/rgw_kms_cache.cc b/src/rgw/rgw_kms_cache.cc
new file mode 100644 (file)
index 0000000..76d5aef
--- /dev/null
@@ -0,0 +1,256 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2025 Clyso GmbH
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#include "rgw_kms_cache.h"
+
+#include <boost/asio/associated_executor.hpp>
+#include <boost/asio/bind_cancellation_slot.hpp>
+#include <boost/asio/bind_executor.hpp>
+#include <boost/asio/cancellation_signal.hpp>
+#include <boost/asio/cancellation_type.hpp>
+#include <boost/asio/detached.hpp>
+#include <boost/asio/use_future.hpp>
+#include <boost/asio/error.hpp>
+#include <boost/asio/io_context.hpp>
+#include <boost/asio/spawn.hpp>
+#include <boost/asio/steady_timer.hpp>
+#include <boost/asio/strand.hpp>
+#include <utility>
+#include <variant>
+
+#include "common/async/yield_context.h"
+#include "common/dout.h"
+#include "common/dout_fmt.h"
+#include "common/keyring.h"
+#include "include/ceph_assert.h"
+#include "include/function2.hpp"
+#include "rgw_perf_counters.h"
+#include "rgw_string.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys  ceph_subsys_rgw
+
+namespace rgw::kms {
+
+std::jthread KMSCache::make_ttl_reaper_thread(
+    CephContext* cct, KMSSecretCache& cache, std::chrono::seconds ttl) {
+  return std::jthread([cct, &cache, ttl](std::stop_token stop) {
+    const std::string thread_name = fmt::format("{}-ttl-reaper", cache.name());
+    ceph_pthread_setname(thread_name.c_str());
+    std::mutex mutex;
+    std::condition_variable cond;
+    std::stop_callback on_stop(stop, [&cond]() { cond.notify_all(); });
+    ldout(cct, 10) << "KMS Cache: Starting TTL reaper thread " << thread_name
+                   << "/" << std::hex << std::this_thread::get_id() << std::dec
+                   << ", running every " << ttl << dendl;
+    while (!stop.stop_requested()) {
+      std::unique_lock<std::mutex> lock(mutex);
+      if (cond.wait_for(lock, ttl, [&stop] { return stop.stop_requested(); })) {
+        break;
+      }
+      const auto expired_count = cache.expire_erase();
+      ldout(cct, 20) << "KMS Cache: TTL reaper thread expired " << expired_count
+                     << " entries" << dendl;
+    }
+    ldout(cct, 10) << "KMS Cache: Stopping TTL reaper thread " << thread_name
+                   << "/" << std::hex << std::this_thread::get_id() << std::dec
+                   << dendl;
+  });
+}
+
+std::future<void> KMSCache::make_ttl_reaper_async(
+    CephContext* cct, KMSSecretCache& cache, std::chrono::seconds ttl,
+    const boost::asio::strand<boost::asio::io_context::executor_type>& strand,
+    boost::asio::cancellation_signal& cancel_signal) {
+  return boost::asio::spawn(
+      strand,
+      [cct, &cache, ttl](const boost::asio::yield_context& yield) {
+        ldout(cct, 10) << "KMS Cache: Starting async TTL reaper, running every "
+                       << ttl << dendl;
+        boost::asio::steady_timer timer(yield.get_executor());
+        while (true) {
+          timer.expires_after(ttl);
+          boost::system::error_code ec;
+          timer.async_wait(yield[ec]);
+          if (ec == boost::asio::error::operation_aborted) {
+            ldout(cct, 10) << "KMS Cache: Stopping async TTL reaper" << dendl;
+            break;
+          }
+          const auto expired_count = cache.expire_erase();
+          ldout(cct, 20) << "KMS Cache: Async TTL reaper expired "
+                         << expired_count << " entries" << dendl;
+        }
+      },
+      boost::asio::bind_cancellation_slot(
+          cancel_signal.slot(),
+          boost::asio::bind_executor(strand, boost::asio::use_future)));
+}
+
+KMSCache::KMSCache(CephContext* _cct, std::unique_ptr<Keyring> _keyring)
+    : cct(_cct), keyring(std::move(_keyring)) {
+  ldout(cct, 10) << fmt::format(
+                        "KMS Cache: Initializing size:{} TTL pos:{} "
+                        "neg:{} err:{}",
+                        cct->_conf->rgw_crypt_s3_kms_cache_max_size,
+                        cct->_conf->rgw_crypt_s3_kms_cache_positive_ttl,
+                        cct->_conf->rgw_crypt_s3_kms_cache_negative_ttl,
+                        cct->_conf->rgw_crypt_s3_kms_cache_transient_error_ttl)
+                 << dendl;
+  cache = std::make_unique<KMSSecretCache>(
+      cct, "kms-cache", cct->_conf->rgw_crypt_s3_kms_cache_max_size,
+      std::chrono::seconds(cct->_conf->rgw_crypt_s3_kms_cache_positive_ttl));
+
+  std::error_code ec;
+  if (!keyring->supported(&ec)) {
+    ldout(cct, 1) << "KMS Cache: " << keyring->name() << " unsupported (error "
+                  << ec << "). Disabling Cache." << dendl;
+    cct->_conf->rgw_crypt_s3_kms_cache_enabled = false;
+  }
+}
+
+KMSCache::~KMSCache() {
+  stop_ttl_reaper();
+}
+
+void KMSCache::initialize_ttl_reaper(
+    std::optional<boost::asio::io_context::executor_type> executor) {
+  ceph_assert(cache);
+  if (reaper_initialized()) {
+    return;
+  }
+  const auto min_ttl_secs = std::min(
+      {cct->_conf->rgw_crypt_s3_kms_cache_positive_ttl,
+       cct->_conf->rgw_crypt_s3_kms_cache_negative_ttl,
+       cct->_conf->rgw_crypt_s3_kms_cache_transient_error_ttl});
+  if (executor.has_value()) {
+    auto& state = reaper_state.emplace<AsyncState>(executor.value());
+    state.done = make_ttl_reaper_async(
+        cct, *cache, std::chrono::seconds(min_ttl_secs),
+        state.strand, state.cancel_signal);
+  } else {
+    reaper_state.emplace<std::jthread>(make_ttl_reaper_thread(
+        cct, *cache, std::chrono::seconds(min_ttl_secs)));
+  }
+}
+
+void KMSCache::stop_ttl_reaper() {
+  ceph_assert(cache);
+  std::visit(
+      fu2::overload(
+          [](const std::monostate& mono) {},
+          [](AsyncState& async_state) {
+            boost::asio::dispatch(
+                async_state.strand,
+                [&signal = async_state.cancel_signal]() {
+                  signal.emit(boost::asio::cancellation_type::terminal);
+            });
+            try {
+              async_state.done.wait();
+            } catch (const std::future_error& e) {
+              if (e.code() != std::future_errc::no_state) throw;
+            }
+          },
+          [&](const std::jthread&) { reaper_state.emplace<std::monostate>(); }),
+      reaper_state);
+}
+
+void KMSCache::clear_cache() const {
+  cache->clear();
+}
+
+int KMSCache::do_cache(
+    const DoutPrefixProvider* dpp, const std::string& key_prefix_kms,
+    const std::string& key_id, const FetchFn& fetch, std::string& actual_key,
+    optional_yield y) {
+  static std::string_view key_prefix("rgw_sse_");
+  const std::string cache_key =
+      string_cat_reserve(key_prefix, key_prefix_kms, "_", key_id);
+  std::shared_ptr<KMSCache::CacheValue> value =
+      cache->lookup_or(key_id, std::make_shared<KMSCache::CacheValue>());
+  auto result = call_once(
+      *value, y,
+      [&dpp, &fetch, &cache_key, &value, &key_prefix_kms, &key_id,
+       this]() -> KMSCache::CacheResult {
+        std::string secret;
+        const int ret = fetch(secret);
+        ldpp_dout(dpp, 20) << "KMS Cache: " << cache_key
+                           << " call_once fetched with ret " << ret << dendl;
+
+        if (ret == -ENOENT) {  // key does not exists, treat as permanent error
+          ldpp_dout(dpp, 15)
+              << "KMS Cache: " << cache_key
+              << " key does not exists. treating as permanent error " << dendl;
+          cache->update_ttl_if(
+              cache_key, value,
+              std::chrono::seconds(
+                  dpp->get_cct()->_conf->rgw_crypt_s3_kms_cache_negative_ttl));
+          perfcounter->inc(l_rgw_kms_error_permanent);
+          return tl::unexpected(ret);
+        } else if (ret < 0) {  // treat other errors as transient
+          ldpp_dout(dpp, 15)
+              << "KMS Cache: " << cache_key << " fetch error (" << ret << ") "
+              << std::strerror(ret) << " treating as transient error " << dendl;
+          cache->update_ttl_if(
+              cache_key, value,
+              std::chrono::seconds(
+                  dpp->get_cct()
+                      ->_conf->rgw_crypt_s3_kms_cache_transient_error_ttl));
+          perfcounter->inc(l_rgw_kms_error_transient);
+          return tl::unexpected(ret);
+        }
+
+        // This function might be in flight for the same key_id more than
+        // once. The keyring key must, however, be unique to not refer
+        // (and remove) the same key twice.
+        uuid_d uuid;
+        uuid.generate_random();
+        const std::string keyring_key = string_cat_reserve(
+            key_prefix, key_prefix_kms, "_", key_id, "_v", uuid.to_string());
+        auto keyring_secret = keyring->add(keyring_key, secret);
+        ceph::crypto::zeroize_for_security(secret.data(), secret.length());
+        if (!keyring_secret) {
+          ldpp_dout(dpp, 5)
+              << "KMS Cache: " << cache_key << " keyring add error ("
+              << keyring_secret.error()
+              << "). removing from cache. disabling cache." << dendl;
+          cache->remove_if(cache_key, value);
+          disable_cache();
+          perfcounter->inc(l_rgw_kms_error_secret_store);
+          return tl::unexpected(-ERR_INTERNAL_ERROR);
+        }
+        return std::move(keyring_secret.value());
+      });
+
+  ldpp_dout_fmt(
+      dpp, 20, "KMS Cache: {} -> {}/{}", cache_key,
+      result && result.has_value() ? fmt::format("{}", *result.value()) : "-",
+      !result ? result.error() : 0);
+
+  if (result) {
+    if (auto ret = result.value()->read(actual_key); ret.value() != 0) {
+      ldpp_dout(dpp, 5) << "KMS Cache: " << cache_key << " keyring "
+                        << *result.value() << " read error (" << ret
+                        << "). removing from cache. disabling cache." << dendl;
+      cache->remove_if(cache_key, value);
+      disable_cache();
+      perfcounter->inc(l_rgw_kms_error_secret_store);
+      return -ERR_INTERNAL_ERROR;
+    }
+    return 0;
+  } else {
+    return result.error();
+  }
+}
+
+}  // namespace rgw::kms
diff --git a/src/rgw/rgw_kms_cache.h b/src/rgw/rgw_kms_cache.h
new file mode 100644 (file)
index 0000000..9fce8c8
--- /dev/null
@@ -0,0 +1,111 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2025 Clyso GmbH
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#pragma once
+
+#include <sys/stat.h>
+
+#include <boost/asio/executor.hpp>
+#include <boost/asio/io_context.hpp>
+#include <boost/asio/strand.hpp>
+#include <chrono>
+#include <future>
+#include <thread>
+#include <variant>
+
+#include "common/async/call_once.h"
+#include "common/async/yield_context.h"
+#include "common/dout.h"
+#include "common/keyring.h"
+#include "common/web_cache.h"
+#include "include/expected.hpp"
+
+namespace rgw::kms {
+
+// Cache RGW KMS Secrets
+//
+// Manages a WebCache instance plus TTL reaper. Adds cache stampede
+// mitigation when fetching new values using (async) call once.
+class KMSCache {
+  CephContext* cct;
+
+ public:
+  using SharedSecret = std::shared_ptr<KeyringSecret>;
+  using CacheResult = tl::expected<SharedSecret, int>;
+  using CacheValue = ceph::async::once_result<CacheResult>;
+  using KMSSecretCache = webcache::WebCache<std::string, CacheValue>;
+  using FetchFn = std::function<int(std::string&)>;
+
+ protected:
+  std::unique_ptr<KMSSecretCache> cache;
+  std::unique_ptr<Keyring> keyring;
+
+  // The TTL Reaper is either a service thread we own or async running
+  // on an executor elsewhere (where we keep a strand and cancellation
+  // signal).
+  struct AsyncState {
+    boost::asio::strand<boost::asio::io_context::executor_type> strand;
+    boost::asio::cancellation_signal cancel_signal;
+    std::future<void> done;
+    explicit AsyncState(boost::asio::io_context::executor_type ex)
+        : strand(boost::asio::make_strand(ex)) {}
+  };
+  std::variant<std::monostate, std::jthread, AsyncState> reaper_state;
+
+ public:
+  KMSCache() = delete;
+  KMSCache(const KMSCache&) = delete;
+  KMSCache(KMSCache&&) = delete;
+  KMSCache& operator=(const KMSCache&) = delete;
+  KMSCache& operator=(KMSCache&&) = delete;
+  KMSCache(CephContext* _cct, std::unique_ptr<Keyring> _keyring);
+  virtual ~KMSCache();
+
+  // Initialize a TTL reaper. Either threaded or async depending on
+  // the executor parameter
+  void initialize_ttl_reaper(
+      std::optional<boost::asio::io_context::executor_type> executor);
+
+  // Stop any type of TTL reaper running
+  void stop_ttl_reaper();
+
+  [[nodiscard]] bool reaper_initialized() const noexcept {
+    return !std::holds_alternative<std::monostate>(reaper_state);
+  }
+
+  static std::jthread make_ttl_reaper_thread(
+      CephContext* cct, KMSSecretCache& cache, std::chrono::seconds ttl);
+
+  static std::future<void> make_ttl_reaper_async(CephContext* cct,
+      KMSSecretCache& cache, std::chrono::seconds ttl,
+      const boost::asio::strand<boost::asio::io_context::executor_type>& strand,
+      boost::asio::cancellation_signal& cancel_signal);
+
+  void clear_cache() const;
+
+  // Retrieve a cached KMS secret or fetch, cache, and return a KMS
+  // secret.
+  //
+  // The secret is returned via actual_key and it is up to the caller
+  // to properly clear the secret from memory. The cache may be shared
+  // between SSE-KMS and SSE-S3, where key_kms_prefix is available to
+  // partition the key namespace.
+  int do_cache(
+      const DoutPrefixProvider* dpp, const std::string& key_kms_prefix,
+      const std::string& key_id, const FetchFn& fetch, std::string& actual_key,
+      optional_yield y);
+
+  void disable_cache() { cct->_conf->rgw_crypt_s3_kms_cache_enabled = false; }
+};
+}  // namespace rgw::kms
index 5c5c14879345677b801f43bcea302ccff431ee5f..ebdff90c33325a7d83db58925b4dc504d60b4d5a 100644 (file)
@@ -165,6 +165,7 @@ int main(int argc, char *argv[])
   main.init_opslog();
   main.init_tracepoints();
   main.init_lua();
+  main.init_kms_cache();
 #ifdef WITH_RADOSGW_RADOS
   main.init_dedup();
 #endif
index 95eb92509b1664813de74868e137a7eb3ac51fa2..66f4c932af7f9722870d9c78b715ac9e7b92217e 100644 (file)
@@ -138,6 +138,7 @@ public:
   int init_frontends2(RGWLib* rgwlib = nullptr);
   void init_tracepoints();
   void init_lua();
+  void init_kms_cache();
 #ifdef WITH_RADOSGW_RADOS
   void init_dedup();
 #endif
index 6019e449977bb64533b4a93bf0d8257cb26def88..ae9c5776f3b02197336493a7a11055e34f82a202 100644 (file)
@@ -63,6 +63,11 @@ void add_rgw_frontend_counters(PerfCountersBuilder *pcb) {
   pcb->add_u64_counter(l_rgw_d4n_cache_hits, "d4n_cache_hits", "D4N cache hits");
   pcb->add_u64_counter(l_rgw_d4n_cache_misses, "d4n_cache_misses", "D4N cache misses");
   pcb->add_u64_counter(l_rgw_d4n_cache_evictions, "d4n_cache_evictions", "D4N cache evictions");
+
+  pcb->add_time_avg(l_rgw_kms_fetch_lat, "kms_fetch_lat", "Uncached KMS secret fetch latency");
+  pcb->add_u64_counter(l_rgw_kms_error_permanent, "kms_error_permanent", "Permanent (e.g key not found) errors returned from KMS");
+  pcb->add_u64_counter(l_rgw_kms_error_transient, "kms_error_transient", "Trainsient (e.g timeout, overloaded) errors returned from KMS");
+  pcb->add_u64_counter(l_rgw_kms_error_secret_store, "kms_error_secret_store", "Secrt store errors (e.g kernel keyring quota)");
 }
 
 void add_rgw_op_counters(PerfCountersBuilder *lpcb) {
index e93b4fe36f91c30939328afb92855ce22f395034..85e515804c8f68f50bd5d01a91839b164f53c176 100644 (file)
@@ -53,6 +53,10 @@ enum {
   l_rgw_d4n_cache_misses,
   l_rgw_d4n_cache_evictions,
 
+  l_rgw_kms_fetch_lat,
+  l_rgw_kms_error_transient,
+  l_rgw_kms_error_permanent,
+  l_rgw_kms_error_secret_store,
   l_rgw_last,
 };
 
index 9fbaf8d38a26e264952351684415d26635449e79..c40e6f2d67a6009141b58d804ce9aba65bc58858 100644 (file)
@@ -6,6 +6,7 @@
 #include <memory>
 
 #include "rgw_auth_registry.h"
+#include "rgw_kms_cache.h"
 
 class ActiveRateLimiter;
 class OpsLogSink;
@@ -50,6 +51,7 @@ struct RGWProcessEnv {
   std::unique_ptr<OpsLogSink> olog;
   std::unique_ptr<rgw::auth::StrategyRegistry> auth_registry;
   ActiveRateLimiter* ratelimiting = nullptr;
+  std::unique_ptr<rgw::kms::KMSCache> kms_cache;
 
 #ifdef WITH_ARROW_FLIGHT
   // managed by rgw:flight::FlightFrontend in rgw_flight_frontend.cc
index c587b5834dcbc539b3cc81bca7045a8e76978a8d..acc7c7f963485ae52ab6c9d94df5e2793dbcc0b1 100644 (file)
@@ -335,6 +335,13 @@ target_include_directories(unittest_rgw_kms
   SYSTEM PRIVATE "${CMAKE_SOURCE_DIR}/src/rgw")
 target_link_libraries(unittest_rgw_kms ${rgw_libs})
 
+# unittest_rgw_kms_cache
+add_executable(unittest_rgw_kms_cache test_rgw_kms_cache.cc)
+add_ceph_unittest(unittest_rgw_kms_cache)
+target_include_directories(unittest_rgw_kms_cache
+  SYSTEM PRIVATE "${CMAKE_SOURCE_DIR}/src/rgw")
+target_link_libraries(unittest_rgw_kms_cache ${rgw_libs})
+
 # unittest_rgw_url
 add_executable(unittest_rgw_url test_rgw_url.cc)
 add_ceph_unittest(unittest_rgw_url)
diff --git a/src/test/rgw/test_rgw_kms_cache.cc b/src/test/rgw/test_rgw_kms_cache.cc
new file mode 100644 (file)
index 0000000..15004a0
--- /dev/null
@@ -0,0 +1,368 @@
+#include <common/async/yield_context.h>
+#include <common/ceph_argparse.h>
+#include <common/common_init.h>
+#include <common/keyring.h>
+#include <common/perf_counters.h>
+#include <fmt/format.h>
+#include <fmt/ostream.h>
+#include <global/global_context.h>
+#include <global/global_init.h>
+#include <gtest/gtest.h>
+
+#include <boost/asio/cancellation_signal.hpp>
+#include <boost/asio/co_spawn.hpp>
+#include <boost/asio/executor_work_guard.hpp>
+#include <boost/asio/io_context.hpp>
+#include <boost/asio/spawn.hpp>
+#include <boost/asio/use_future.hpp>
+#include <cerrno>
+#include <chrono>
+#include <variant>
+#include <thread>
+
+#include "common/web_cache.h"
+#include "rgw_common.h"
+#include "rgw_kms.h"
+#include "rgw_kms_cache.h"
+#include "rgw_perf_counters.h"
+
+class FakeKeyringSecret : public KeyringSecret {
+ public:
+  std::string key;
+  std::string val;
+
+  FakeKeyringSecret(std::string _key, std::string _val)
+      : key(std::move(_key)), val(std::move(_val)) {};
+
+  std::error_code read(std::string& out) const override {
+    out = val;
+    return {};
+  }
+  std::error_code remove() const override { return {}; }
+  bool initialized() const override { return true; }
+};
+
+class FakeKeyring : public Keyring {
+ public:
+  tl::expected<std::unique_ptr<KeyringSecret>, std::error_code> add(
+      const std::string& key, const std::string& val) noexcept override {
+    return std::make_unique<FakeKeyringSecret>(key, val);
+  }
+  bool supported(std::error_code* ec) noexcept override { return true; }
+  std::string_view name() const noexcept override { return "fake"; };
+};
+
+class TestKMSCacheReaperLifecycle : public ::testing::Test,
+                                    public rgw::kms::KMSCache {
+ public:
+  TestKMSCacheReaperLifecycle()
+      : rgw::kms::KMSCache(g_ceph_context, std::make_unique<FakeKeyring>()) {};
+};
+
+static void rethrow(const std::exception_ptr& eptr) {
+  if (eptr) {
+    std::rethrow_exception(eptr);
+  }
+}
+
+TEST_F(TestKMSCacheReaperLifecycle, Threaded) {
+  initialize_ttl_reaper(std::nullopt);
+  EXPECT_TRUE(reaper_initialized());
+  EXPECT_TRUE(std::holds_alternative<std::jthread>(reaper_state));
+
+  stop_ttl_reaper();
+  EXPECT_FALSE(reaper_initialized());
+  EXPECT_FALSE(std::holds_alternative<std::jthread>(reaper_state));
+}
+
+TEST_F(TestKMSCacheReaperLifecycle, NoInit) {
+  EXPECT_FALSE(reaper_initialized());
+  EXPECT_TRUE(std::holds_alternative<std::monostate>(reaper_state));
+  this->stop_ttl_reaper();
+  EXPECT_FALSE(reaper_initialized());
+  EXPECT_TRUE(std::holds_alternative<std::monostate>(reaper_state));
+}
+
+TEST_F(TestKMSCacheReaperLifecycle, Async) {
+  boost::asio::io_context io;
+  auto work = boost::asio::make_work_guard(io);
+  std::jthread io_thread([&]() {
+    io.run();
+  });
+  auto drain = [&] {
+    boost::asio::post(io, boost::asio::use_future).get();
+  };
+  this->initialize_ttl_reaper(io.get_executor());
+  drain();
+  EXPECT_TRUE(reaper_initialized());
+  EXPECT_TRUE(
+      std::holds_alternative<AsyncState>(reaper_state));
+  stop_ttl_reaper();
+  drain();
+  work.reset();
+  io.restart();
+  EXPECT_EQ(io.run(), 0);
+}
+
+class TestKMSCache : public ::testing::Test {
+ protected:
+  CephContext* cct = g_ceph_context;
+  const NoDoutPrefix no_dpp{cct, ceph_subsys_rgw};
+
+ public:
+  std::unique_ptr<rgw::kms::KMSCache> uut =
+      std::make_unique<rgw::kms::KMSCache>(
+          cct, std::make_unique<FakeKeyring>());
+
+  void TearDown() override { perfcounter->reset(); }
+};
+
+TEST_F(TestKMSCache, TransientFetchError) {
+  std::string tmp;
+  ASSERT_EQ(
+      uut->do_cache(
+          &no_dpp, "testing", "foo", [](std::string&) { return -EINVAL; }, tmp,
+          null_yield),
+      -EINVAL);
+  ASSERT_EQ(perfcounter->get(l_rgw_kms_error_transient), 1);
+  ASSERT_EQ(perfcounter->get(l_rgw_kms_error_permanent), 0);
+  ASSERT_EQ(perfcounter->get(l_rgw_kms_error_secret_store), 0);
+  ASSERT_TRUE(cct->_conf->rgw_crypt_s3_kms_cache_enabled);
+}
+
+TEST_F(TestKMSCache, PermanentFetchError) {
+  std::string tmp;
+  ASSERT_EQ(
+      uut->do_cache(
+          &no_dpp, "testing", "foo", [](std::string&) { return -ENOENT; }, tmp,
+          null_yield),
+      -ENOENT);
+  ASSERT_EQ(perfcounter->get(l_rgw_kms_error_transient), 0);
+  ASSERT_EQ(perfcounter->get(l_rgw_kms_error_permanent), 1);
+  ASSERT_EQ(perfcounter->get(l_rgw_kms_error_secret_store), 0);
+  ASSERT_TRUE(cct->_conf->rgw_crypt_s3_kms_cache_enabled);
+}
+
+TEST_F(TestKMSCache, Cache) {
+  const std::string test_key = "37u481923789123u72189ou3jsdf978of";
+  std::string_view test_val = "dlksafj3029jfmsjf8322ty7nghb67435";
+  std::string cache_return;
+  ASSERT_EQ(
+      uut->do_cache(
+          &no_dpp, "testing", test_key,
+          [&](std::string& val) {
+            val = test_val;
+            return 0;
+          },
+          cache_return, null_yield),
+      0);
+  EXPECT_EQ(perfcounter->get(l_rgw_kms_error_transient), 0);
+  EXPECT_EQ(perfcounter->get(l_rgw_kms_error_permanent), 0);
+  EXPECT_EQ(perfcounter->get(l_rgw_kms_error_secret_store), 0);
+  EXPECT_TRUE(cct->_conf->rgw_crypt_s3_kms_cache_enabled);
+  ASSERT_EQ(cache_return, test_val);
+
+  cache_return.clear();
+  ASSERT_EQ(
+      uut->do_cache(
+          &no_dpp, "testing", test_key,
+          [&](std::string&) -> int {
+            EXPECT_TRUE(false) << "fetch must not be called";
+            return -2342;
+          },
+          cache_return, null_yield),
+      0);
+  EXPECT_EQ(perfcounter->get(l_rgw_kms_error_transient), 0);
+  EXPECT_EQ(perfcounter->get(l_rgw_kms_error_permanent), 0);
+  EXPECT_EQ(perfcounter->get(l_rgw_kms_error_secret_store), 0);
+  EXPECT_TRUE(cct->_conf->rgw_crypt_s3_kms_cache_enabled);
+  ASSERT_EQ(cache_return, test_val);
+}
+
+class TestSSEKMSWithTestingKMS : public ::testing::Test {
+ protected:
+  CephContext* cct = g_ceph_context;
+  const NoDoutPrefix no_dpp{cct, ceph_subsys_rgw};
+  std::map<std::string, bufferlist> attrs = {
+      {RGW_ATTR_CRYPT_KEYID,
+       []() {
+         bufferlist bl;
+         bl.append("foo");
+         return bl;
+       }()},
+      {RGW_ATTR_CRYPT_KEYSEL, []() {
+         // AES_ECB(32*"#").decrypt(32*"*")
+         bufferlist bl;
+         bl.append(
+             "\xc6\xb1/\x12\xdc\xf7"
+             "e"
+             "\xe3;\xea\x14\xa4x\x1f"
+             "bX"
+             "\xc6\xb1/\x12\xdc\xf7"
+             "e"
+             "\xe3;\xea\x14\xa4x\x1f"
+             "bX");
+         return bl;
+       }()}};
+  rgw::kms::KMSCache* uut;
+  PerfCounters* cache_perf = nullptr;
+
+  void SetUp() override {
+    // Simulate RGW app KMSCache livecyle. A single instance started
+    // during app init and may be disabled via config.
+    uut = &cct->lookup_or_create_singleton_object<rgw::kms::KMSCache>(
+        "TestSSEKMSWithTestingKMS::kms-cache", false, cct,
+        std::make_unique<FakeKeyring>());
+    cct->get_perfcounters_collection()->with_counters(
+        [&](const PerfCountersCollectionImpl::CounterMap& by_path) {
+          for (const auto& i : by_path) {
+            const auto& perf_counters = i.second.perf_counters;
+            if (perf_counters->get_name() == "kms-cache") {
+              cache_perf = perf_counters;
+              return;
+            }
+          }
+        });
+
+    ASSERT_NE(perfcounter, nullptr);
+    ASSERT_NE(cache_perf, nullptr);
+  }
+
+  void TearDown() override {
+    JSONFormatter f(true);
+    cache_perf->dump_formatted(&f, false, select_labeled_t::labeled);
+    f.flush(std::cout);
+    cct->get_perfcounters_collection()->with_counters(
+        [&](const PerfCountersCollectionImpl::CounterMap& by_path) {
+          for (const auto& i : by_path) {
+            const auto& perf_counters = i.second.perf_counters;
+            if (perf_counters->get_name() == "rgw") {
+              auto [sum, count] =
+                  perf_counters->get_tavg_ns(l_rgw_kms_fetch_lat);
+              fmt::println(
+                  std::cout,
+                  "RGW KMS perf counters: err_trans={} err_perm={} err_sec={} "
+                  "avg_fetch_lat={} fetch_cnt={}",
+                  perf_counters->get(l_rgw_kms_error_transient),
+                  perf_counters->get(l_rgw_kms_error_permanent),
+                  perf_counters->get(l_rgw_kms_error_secret_store),
+                  std::chrono::nanoseconds(sum / count), count);
+              return;
+            }
+          }
+        });
+    uut->clear_cache();
+  }
+
+  void test_do_reconstitue() {
+    std::string actual_key;
+    const int ret = reconstitute_actual_key_from_kms(
+        &no_dpp, attrs, uut, null_yield, actual_key);
+    ASSERT_EQ(ret, 0);
+    ASSERT_EQ(actual_key, "********************************");
+    ASSERT_EQ(perfcounter->get(l_rgw_kms_error_secret_store), 0);
+    ASSERT_EQ(perfcounter->get(l_rgw_kms_error_permanent), 0);
+    ASSERT_EQ(perfcounter->get(l_rgw_kms_error_transient), 0);
+  }
+};
+
+TEST_F(
+    TestSSEKMSWithTestingKMS, TestReconstituteActualKeyFromKMSBasicsDefault) {
+  EXPECT_EQ(perfcounter->get_tavg_ns(l_rgw_kms_fetch_lat).second, 0);
+  ASSERT_TRUE(cct->_conf->rgw_crypt_s3_kms_cache_enabled);
+  test_do_reconstitue();
+  EXPECT_EQ(perfcounter->get_tavg_ns(l_rgw_kms_fetch_lat).second, 1);
+  EXPECT_EQ(cache_perf->get(static_cast<int>(webcache::Metric::hit)), 0);
+  EXPECT_EQ(cache_perf->get(static_cast<int>(webcache::Metric::miss)), 1);
+  EXPECT_EQ(cache_perf->get(static_cast<int>(webcache::Metric::size)), 1);
+  EXPECT_EQ(cache_perf->get(static_cast<int>(webcache::Metric::clear)), 0);
+  EXPECT_EQ(
+      cache_perf->get(static_cast<int>(webcache::Metric::capacity)),
+      cct->_conf->rgw_crypt_s3_kms_cache_max_size);
+}
+
+TEST_F(
+    TestSSEKMSWithTestingKMS,
+    TestReconstituteActualKeyFromKMSBasicsWithoutCache) {
+  cct->_conf.set_val("rgw_crypt_s3_kms_cache_enabled", "false");
+  EXPECT_EQ(perfcounter->get_tavg_ns(l_rgw_kms_fetch_lat).second, 1);
+  test_do_reconstitue();
+  EXPECT_EQ(cache_perf->get(static_cast<int>(webcache::Metric::hit)), 0);
+  EXPECT_EQ(cache_perf->get(static_cast<int>(webcache::Metric::miss)), 0);
+  EXPECT_EQ(cache_perf->get(static_cast<int>(webcache::Metric::size)), 0);
+  ASSERT_FALSE(cct->_conf->rgw_crypt_s3_kms_cache_enabled);
+  EXPECT_EQ(perfcounter->get_tavg_ns(l_rgw_kms_fetch_lat).second, 2);
+}
+
+TEST_F(
+    TestSSEKMSWithTestingKMS, TestReconstituteActualKeyFromKMSBasicsWithCache) {
+  ASSERT_EQ(perfcounter->get_tavg_ns(l_rgw_kms_fetch_lat).second, 2);
+
+  cct->_conf.set_val("rgw_crypt_s3_kms_cache_enabled", "true");
+  test_do_reconstitue();
+  EXPECT_EQ(perfcounter->get_tavg_ns(l_rgw_kms_fetch_lat).second, 3);
+  EXPECT_EQ(cache_perf->get(static_cast<int>(webcache::Metric::hit)), 0);
+  EXPECT_EQ(cache_perf->get(static_cast<int>(webcache::Metric::miss)), 1);
+  EXPECT_EQ(cache_perf->get(static_cast<int>(webcache::Metric::size)), 1);
+
+  test_do_reconstitue();
+  EXPECT_EQ(perfcounter->get_tavg_ns(l_rgw_kms_fetch_lat).second, 3);
+  EXPECT_EQ(cache_perf->get(static_cast<int>(webcache::Metric::hit)), 1);
+  EXPECT_EQ(cache_perf->get(static_cast<int>(webcache::Metric::miss)), 1);
+  EXPECT_EQ(cache_perf->get(static_cast<int>(webcache::Metric::size)), 1);
+
+  test_do_reconstitue();
+  EXPECT_EQ(perfcounter->get_tavg_ns(l_rgw_kms_fetch_lat).second, 3);
+  EXPECT_EQ(cache_perf->get(static_cast<int>(webcache::Metric::hit)), 2);
+  EXPECT_EQ(cache_perf->get(static_cast<int>(webcache::Metric::miss)), 1);
+  EXPECT_EQ(cache_perf->get(static_cast<int>(webcache::Metric::size)), 1);
+}
+
+TEST_F(TestSSEKMSWithTestingKMS, TestRuntimeEnableDisable) {
+  ASSERT_EQ(perfcounter->get_tavg_ns(l_rgw_kms_fetch_lat).second, 3);
+
+  cct->_conf.set_val("rgw_crypt_s3_kms_cache_enabled", "true");
+  test_do_reconstitue();
+  EXPECT_EQ(perfcounter->get_tavg_ns(l_rgw_kms_fetch_lat).second, 4);
+  EXPECT_EQ(cache_perf->get(static_cast<int>(webcache::Metric::hit)), 0);
+  EXPECT_EQ(cache_perf->get(static_cast<int>(webcache::Metric::miss)), 1);
+  EXPECT_EQ(cache_perf->get(static_cast<int>(webcache::Metric::size)), 1);
+
+  cct->_conf.set_val("rgw_crypt_s3_kms_cache_enabled", "false");
+  test_do_reconstitue();
+  EXPECT_EQ(perfcounter->get_tavg_ns(l_rgw_kms_fetch_lat).second, 5);
+  EXPECT_EQ(cache_perf->get(static_cast<int>(webcache::Metric::hit)), 0);
+  EXPECT_EQ(cache_perf->get(static_cast<int>(webcache::Metric::miss)), 1);
+  EXPECT_EQ(cache_perf->get(static_cast<int>(webcache::Metric::size)), 1);
+
+  cct->_conf.set_val("rgw_crypt_s3_kms_cache_enabled", "true");
+  test_do_reconstitue();
+  EXPECT_EQ(perfcounter->get_tavg_ns(l_rgw_kms_fetch_lat).second, 5);
+  EXPECT_EQ(cache_perf->get(static_cast<int>(webcache::Metric::hit)), 1);
+  EXPECT_EQ(cache_perf->get(static_cast<int>(webcache::Metric::miss)), 1);
+  EXPECT_EQ(cache_perf->get(static_cast<int>(webcache::Metric::size)), 1);
+
+  cct->_conf.set_val("rgw_crypt_s3_kms_cache_enabled", "false");
+  uut->clear_cache();
+  test_do_reconstitue();
+  EXPECT_EQ(perfcounter->get_tavg_ns(l_rgw_kms_fetch_lat).second, 6);
+  EXPECT_EQ(cache_perf->get(static_cast<int>(webcache::Metric::hit)), 0);
+  EXPECT_EQ(cache_perf->get(static_cast<int>(webcache::Metric::miss)), 0);
+  EXPECT_EQ(cache_perf->get(static_cast<int>(webcache::Metric::size)), 0);
+}
+
+int main(int argc, char** argv) {
+  auto args = argv_to_vec(argc, argv);
+  std::map<std::string, std::string> defaults{
+      {"rgw_crypt_s3_kms_backend", RGW_SSE_KMS_BACKEND_TESTING},
+      {"rgw_crypt_s3_kms_encryption_keys",
+       "foo=IyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyM="},
+      {"debug_rgw", "20"}};
+  auto cct = global_init(
+      &defaults, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY,
+      CINIT_FLAG_NO_DEFAULT_CONFIG_FILE);
+  common_init_finish(g_ceph_context);
+  rgw_perf_start(g_ceph_context);
+  testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}