]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
rgw: add rgw::sal::SQLiteConfigStore
authorCasey Bodley <cbodley@redhat.com>
Tue, 30 Aug 2022 21:58:09 +0000 (17:58 -0400)
committerCasey Bodley <cbodley@redhat.com>
Wed, 28 Sep 2022 21:48:00 +0000 (17:48 -0400)
Signed-off-by: Casey Bodley <cbodley@redhat.com>
14 files changed:
src/common/subsys.h
src/rgw/store/dbstore/CMakeLists.txt
src/rgw/store/dbstore/common/connection_pool.h [new file with mode: 0644]
src/rgw/store/dbstore/config/sqlite.cc [new file with mode: 0644]
src/rgw/store/dbstore/config/sqlite.h [new file with mode: 0644]
src/rgw/store/dbstore/config/sqlite_schema.h [new file with mode: 0644]
src/rgw/store/dbstore/config/store.cc [new file with mode: 0644]
src/rgw/store/dbstore/config/store.h [new file with mode: 0644]
src/rgw/store/dbstore/sqlite/connection.cc [new file with mode: 0644]
src/rgw/store/dbstore/sqlite/connection.h [new file with mode: 0644]
src/rgw/store/dbstore/sqlite/error.cc [new file with mode: 0644]
src/rgw/store/dbstore/sqlite/error.h [new file with mode: 0644]
src/rgw/store/dbstore/sqlite/statement.cc [new file with mode: 0644]
src/rgw/store/dbstore/sqlite/statement.h [new file with mode: 0644]

index 9b3de521ecdd89fbde1f8f450fe8bba27b0c21b9..4ca500e7bb5d2fc7d94a64bb608e9ace56e0d345 100644 (file)
@@ -62,6 +62,7 @@ SUBSYS(rgw, 1, 5)                 // log level for the Rados gateway
 SUBSYS(rgw_sync, 1, 5)
 SUBSYS(rgw_datacache, 1, 5)
 SUBSYS(rgw_access, 1, 5)
+SUBSYS(rgw_dbstore, 1, 5)
 SUBSYS(javaclient, 1, 5)
 SUBSYS(asok, 1, 5)
 SUBSYS(throttle, 1, 1)
index 5d52b9c984c4567588819c3a73c080112b95db56..af5868872bf411163280c05e9b1028088fc2b85c 100644 (file)
@@ -9,7 +9,15 @@ set (CMAKE_INCLUDE_DIR ${CMAKE_INCLUDE_DIR} "${CMAKE_CURRENT_SOURCE_DIR}/common"
 set(dbstore_srcs
     common/dbstore_log.h
     common/dbstore.h
-    common/dbstore.cc)
+    common/dbstore.cc
+    config/store.cc)
+IF(USE_SQLITE)
+  list(APPEND dbstore_srcs
+      config/sqlite.cc
+      sqlite/connection.cc
+      sqlite/error.cc
+      sqlite/statement.cc)
+endif()
 
 set(dbstore_mgr_srcs
     dbstore_mgr.h
@@ -19,6 +27,7 @@ set(dbstore_mgr_srcs
 add_library(dbstore_lib ${dbstore_srcs})
 target_include_directories(dbstore_lib PUBLIC "${CMAKE_SOURCE_DIR}/src/fmt/include")
 target_include_directories(dbstore_lib PUBLIC "${CMAKE_SOURCE_DIR}/src/rgw")
+target_include_directories(dbstore_lib PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}")
 set(link_targets spawn)
 if(WITH_JAEGER)
   list(APPEND link_targets jaeger_base)
diff --git a/src/rgw/store/dbstore/common/connection_pool.h b/src/rgw/store/dbstore/common/connection_pool.h
new file mode 100644 (file)
index 0000000..07f3c81
--- /dev/null
@@ -0,0 +1,147 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2022 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#pragma once
+
+#include <concepts>
+#include <condition_variable>
+#include <memory>
+#include <mutex>
+#include <boost/circular_buffer.hpp>
+#include "common/dout.h"
+
+namespace rgw::dbstore {
+
+template <typename Connection>
+class ConnectionHandle;
+
+/// A thread-safe base class that manages a fixed-size pool of generic database
+/// connections and supports the reclamation of ConnectionHandles. This class
+/// is the subset of ConnectionPool which doesn't depend on the Factory type.
+template <typename Connection>
+class ConnectionPoolBase {
+ public:
+  ConnectionPoolBase(std::size_t max_connections)
+      : connections(max_connections)
+  {}
+ private:
+  friend class ConnectionHandle<Connection>;
+
+  // TODO: the caller may detect a connection error that prevents the connection
+  // from being reused. allow them to indicate these errors here
+  void put(std::unique_ptr<Connection> connection)
+  {
+    auto lock = std::scoped_lock{mutex};
+    connections.push_back(std::move(connection));
+
+    if (connections.size() == 1) { // was empty
+      cond.notify_one();
+    }
+  }
+ protected:
+  std::mutex mutex;
+  std::condition_variable cond;
+  boost::circular_buffer<std::unique_ptr<Connection>> connections;
+};
+
+/// Handle to a database connection borrowed from the pool. Automatically
+/// returns the connection to its pool on the handle's destruction.
+template <typename Connection>
+class ConnectionHandle {
+  ConnectionPoolBase<Connection>* pool = nullptr;
+  std::unique_ptr<Connection> conn;
+ public:
+  ConnectionHandle() noexcept = default;
+  ConnectionHandle(ConnectionPoolBase<Connection>* pool,
+                   std::unique_ptr<Connection> conn) noexcept
+    : pool(pool), conn(std::move(conn)) {}
+
+  ~ConnectionHandle() {
+    if (conn) {
+      pool->put(std::move(conn));
+    }
+  }
+
+  ConnectionHandle(ConnectionHandle&&) = default;
+  ConnectionHandle& operator=(ConnectionHandle&& o) noexcept {
+    if (conn) {
+      pool->put(std::move(conn));
+    }
+    conn = std::move(o.conn);
+    pool = o.pool;
+    return *this;
+  }
+
+  explicit operator bool() const noexcept { return static_cast<bool>(conn); }
+  Connection& operator*() const noexcept { return *conn; }
+  Connection* operator->() const noexcept { return conn.get(); }
+  Connection* get() const noexcept { return conn.get(); }
+};
+
+
+// factory_of concept requires the function signature:
+//   F(const DoutPrefixProvider*) -> std::unique_ptr<T>
+template <typename F, typename T>
+concept factory_of = requires (F factory, const DoutPrefixProvider* dpp) {
+  { factory(dpp) } -> std::same_as<std::unique_ptr<T>>;
+  requires std::move_constructible<F>;
+};
+
+
+/// Generic database connection pool that enforces a limit on open connections.
+template <typename Connection, factory_of<Connection> Factory>
+class ConnectionPool : public ConnectionPoolBase<Connection> {
+ public:
+  ConnectionPool(Factory factory, std::size_t max_connections)
+      : ConnectionPoolBase<Connection>(max_connections),
+        factory(std::move(factory))
+  {}
+
+  /// Borrow a connection from the pool. If all existing connections are in use,
+  /// use the connection factory to create another one. If we've reached the
+  /// limit on open connections, wait on a condition variable for the next one
+  /// returned to the pool.
+  auto get(const DoutPrefixProvider* dpp)
+      -> ConnectionHandle<Connection>
+  {
+    auto lock = std::unique_lock{this->mutex};
+    std::unique_ptr<Connection> conn;
+
+    if (!this->connections.empty()) {
+      // take an existing connection
+      conn = std::move(this->connections.front());
+      this->connections.pop_front();
+    } else if (total < this->connections.capacity()) {
+      // add another connection to the pool
+      conn = factory(dpp);
+      ++total;
+    } else {
+      // wait for the next put()
+      // TODO: support optional_yield
+      ldpp_dout(dpp, 4) << "ConnectionPool waiting on a connection" << dendl;
+      this->cond.wait(lock, [&] { return !this->connections.empty(); });
+      ldpp_dout(dpp, 4) << "ConnectionPool done waiting" << dendl;
+      conn = std::move(this->connections.front());
+      this->connections.pop_front();
+    }
+
+    return {this, std::move(conn)};
+  }
+ private:
+  Factory factory;
+  std::size_t total = 0;
+};
+
+} // namespace rgw::dbstore
diff --git a/src/rgw/store/dbstore/config/sqlite.cc b/src/rgw/store/dbstore/config/sqlite.cc
new file mode 100644 (file)
index 0000000..051dc34
--- /dev/null
@@ -0,0 +1,2072 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2022 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include <charconv>
+#include <initializer_list>
+#include <map>
+
+#undef FMT_HEADER_ONLY
+#define FMT_HEADER_ONLY 1
+#include <fmt/format.h>
+
+#include <sqlite3.h>
+
+#include "include/buffer.h"
+#include "include/encoding.h"
+#include "common/dout.h"
+#include "common/random_string.h"
+#include "rgw_zone.h"
+
+#include "common/connection_pool.h"
+#include "sqlite/connection.h"
+#include "sqlite/error.h"
+#include "sqlite/statement.h"
+#include "sqlite_schema.h"
+#include "sqlite.h"
+
+#define dout_subsys ceph_subsys_rgw_dbstore
+
+namespace rgw::dbstore::config {
+
+struct Prefix : DoutPrefixPipe {
+  std::string_view prefix;
+  Prefix(const DoutPrefixProvider& dpp, std::string_view prefix)
+      : DoutPrefixPipe(dpp), prefix(prefix) {}
+  unsigned get_subsys() const override { return dout_subsys; }
+  void add_prefix(std::ostream& out) const override {
+    out << prefix;
+  }
+};
+
+namespace {
+
+// parameter names for prepared statement bindings
+static constexpr const char* P1 = ":1";
+static constexpr const char* P2 = ":2";
+static constexpr const char* P3 = ":3";
+static constexpr const char* P4 = ":4";
+static constexpr const char* P5 = ":5";
+static constexpr const char* P6 = ":6";
+
+
+void read_text_rows(const DoutPrefixProvider* dpp,
+                    const sqlite::stmt_execution& stmt,
+                    std::span<std::string> entries,
+                    sal::ListResult<std::string>& result)
+{
+  result.entries = sqlite::read_text_rows(dpp, stmt, entries);
+  if (result.entries.size() < entries.size()) { // end of listing
+    result.next.clear();
+  } else {
+    result.next = result.entries.back();
+  }
+}
+
+struct RealmRow {
+  RGWRealm info;
+  int ver;
+  std::string tag;
+};
+
+void read_realm_row(const sqlite::stmt_execution& stmt, RealmRow& row)
+{
+  row.info.id = sqlite::column_text(stmt, 0);
+  row.info.name = sqlite::column_text(stmt, 1);
+  row.info.current_period = sqlite::column_text(stmt, 2);
+  row.info.epoch = sqlite::column_int(stmt, 3);
+  row.ver = sqlite::column_int(stmt, 4);
+  row.tag = sqlite::column_text(stmt, 5);
+}
+
+void read_period_row(const sqlite::stmt_execution& stmt, RGWPeriod& row)
+{
+  // just read the Data column and decode everything else from that
+  std::string data = sqlite::column_text(stmt, 3);
+
+  bufferlist bl = bufferlist::static_from_string(data);
+  auto p = bl.cbegin();
+  decode(row, p);
+}
+
+struct ZoneGroupRow {
+  RGWZoneGroup info;
+  int ver;
+  std::string tag;
+};
+
+void read_zonegroup_row(const sqlite::stmt_execution& stmt, ZoneGroupRow& row)
+{
+  std::string data = sqlite::column_text(stmt, 3);
+  row.ver = sqlite::column_int(stmt, 4);
+  row.tag = sqlite::column_text(stmt, 5);
+
+  bufferlist bl = bufferlist::static_from_string(data);
+  auto p = bl.cbegin();
+  decode(row.info, p);
+}
+
+struct ZoneRow {
+  RGWZoneParams info;
+  int ver;
+  std::string tag;
+};
+
+void read_zone_row(const sqlite::stmt_execution& stmt, ZoneRow& row)
+{
+  std::string data = sqlite::column_text(stmt, 3);
+  row.ver = sqlite::column_int(stmt, 4);
+  row.tag = sqlite::column_text(stmt, 5);
+
+  bufferlist bl = bufferlist::static_from_string(data);
+  auto p = bl.cbegin();
+  decode(row.info, p);
+}
+
+std::string generate_version_tag(CephContext* cct)
+{
+  static constexpr auto TAG_LEN = 24;
+  return gen_rand_alphanumeric(cct, TAG_LEN);
+}
+
+using SQLiteConnectionHandle = ConnectionHandle<sqlite::Connection>;
+
+using SQLiteConnectionPool = ConnectionPool<
+    sqlite::Connection, sqlite::ConnectionFactory>;
+
+} // anonymous namespace
+
+class SQLiteImpl : public SQLiteConnectionPool {
+ public:
+  using SQLiteConnectionPool::SQLiteConnectionPool;
+};
+
+
+SQLiteConfigStore::SQLiteConfigStore(std::unique_ptr<SQLiteImpl> impl)
+  : impl(std::move(impl))
+{
+}
+
+SQLiteConfigStore::~SQLiteConfigStore() = default;
+
+
+// Realm
+
+class SQLiteRealmWriter : public sal::RealmWriter {
+  SQLiteImpl* impl;
+  int ver;
+  std::string tag;
+  std::string realm_id;
+  std::string realm_name;
+ public:
+  SQLiteRealmWriter(SQLiteImpl* impl, int ver, std::string tag,
+                    std::string_view realm_id, std::string_view realm_name)
+    : impl(impl), ver(ver), tag(std::move(tag)),
+      realm_id(realm_id), realm_name(realm_name)
+  {}
+
+  int write(const DoutPrefixProvider* dpp, optional_yield y,
+            const RGWRealm& info) override
+  {
+    Prefix prefix{*dpp, "dbconfig:sqlite:realm_write "}; dpp = &prefix;
+
+    if (!impl) {
+      return -EINVAL; // can't write after a conflict or delete
+    }
+    if (realm_id != info.id || realm_name != info.name) {
+      return -EINVAL; // can't modify realm id or name directly
+    }
+
+    try {
+      auto conn = impl->get(dpp);
+      auto& stmt = conn->statements["realm_upd"];
+      if (!stmt) {
+        const std::string sql = fmt::format(schema::realm_update5,
+                                            P1, P2, P3, P4, P5);
+        stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+      }
+      auto binding = sqlite::stmt_binding{stmt.get()};
+      sqlite::bind_text(dpp, binding, P1, info.id);
+      sqlite::bind_text(dpp, binding, P2, info.current_period);
+      sqlite::bind_int(dpp, binding, P3, info.epoch);
+      sqlite::bind_int(dpp, binding, P4, ver);
+      sqlite::bind_text(dpp, binding, P5, tag);
+
+      auto reset = sqlite::stmt_execution{stmt.get()};
+      sqlite::eval0(dpp, reset);
+
+      if (!::sqlite3_changes(conn->db.get())) { // VersionNumber/Tag mismatch
+        // our version is no longer consistent, so later writes would fail too
+        impl = nullptr;
+        return -ECANCELED;
+      }
+    } catch (const sqlite::error& e) {
+      ldpp_dout(dpp, 20) << "realm update failed: " << e.what() << dendl;
+      if (e.code() == sqlite::errc::foreign_key_constraint) {
+        return -EINVAL; // refers to nonexistent CurrentPeriod
+      } else if (e.code() == sqlite::errc::busy) {
+        return -EBUSY;
+      }
+      return -EIO;
+    }
+    ++ver;
+    return 0;
+  }
+
+  int rename(const DoutPrefixProvider* dpp, optional_yield y,
+             RGWRealm& info, std::string_view new_name) override
+  {
+    Prefix prefix{*dpp, "dbconfig:sqlite:realm_rename "}; dpp = &prefix;
+
+    if (!impl) {
+      return -EINVAL; // can't write after conflict or delete
+    }
+    if (realm_id != info.id || realm_name != info.name) {
+      return -EINVAL; // can't modify realm id or name directly
+    }
+    if (new_name.empty()) {
+      ldpp_dout(dpp, 0) << "realm cannot have an empty name" << dendl;
+      return -EINVAL;
+    }
+
+    try {
+      auto conn = impl->get(dpp);
+      auto& stmt = conn->statements["realm_rename"];
+      if (!stmt) {
+        const std::string sql = fmt::format(schema::realm_rename4,
+                                            P1, P2, P3, P4);
+        stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+      }
+      auto binding = sqlite::stmt_binding{stmt.get()};
+      sqlite::bind_text(dpp, binding, P1, realm_id);
+      sqlite::bind_text(dpp, binding, P2, new_name);
+      sqlite::bind_int(dpp, binding, P3, ver);
+      sqlite::bind_text(dpp, binding, P4, tag);
+
+      auto reset = sqlite::stmt_execution{stmt.get()};
+      sqlite::eval0(dpp, reset);
+
+      if (!::sqlite3_changes(conn->db.get())) { // VersionNumber/Tag mismatch
+        impl = nullptr;
+        return -ECANCELED;
+      }
+    } catch (const sqlite::error& e) {
+      ldpp_dout(dpp, 20) << "realm rename failed: " << e.what() << dendl;
+      if (e.code() == sqlite::errc::unique_constraint) {
+        return -EEXIST; // Name already taken
+      } else if (e.code() == sqlite::errc::busy) {
+        return -EBUSY;
+      }
+      return -EIO;
+    }
+    info.name = std::string{new_name};
+    ++ver;
+    return 0;
+  }
+
+  int remove(const DoutPrefixProvider* dpp, optional_yield y) override
+  {
+    Prefix prefix{*dpp, "dbconfig:sqlite:realm_remove "}; dpp = &prefix;
+
+    if (!impl) {
+      return -EINVAL; // can't write after conflict or delete
+    }
+    try {
+      auto conn = impl->get(dpp);
+      auto& stmt = conn->statements["realm_del"];
+      if (!stmt) {
+        const std::string sql = fmt::format(schema::realm_delete3, P1, P2, P3);
+        stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+      }
+      auto binding = sqlite::stmt_binding{stmt.get()};
+      sqlite::bind_text(dpp, binding, P1, realm_id);
+      sqlite::bind_int(dpp, binding, P2, ver);
+      sqlite::bind_text(dpp, binding, P3, tag);
+
+      auto reset = sqlite::stmt_execution{stmt.get()};
+      sqlite::eval0(dpp, reset);
+
+      impl = nullptr; // prevent any further writes after delete
+      if (!::sqlite3_changes(conn->db.get())) {
+        return -ECANCELED; // VersionNumber/Tag mismatch
+      }
+    } catch (const sqlite::error& e) {
+      ldpp_dout(dpp, 20) << "realm delete failed: " << e.what() << dendl;
+      if (e.code() == sqlite::errc::busy) {
+        return -EBUSY;
+      }
+      return -EIO;
+    }
+    return 0;
+  }
+}; // SQLiteRealmWriter
+
+
+int SQLiteConfigStore::write_default_realm_id(const DoutPrefixProvider* dpp,
+                                              optional_yield y, bool exclusive,
+                                              std::string_view realm_id)
+{
+  Prefix prefix{*dpp, "dbconfig:sqlite:write_default_realm_id "}; dpp = &prefix;
+
+  if (realm_id.empty()) {
+    ldpp_dout(dpp, 0) << "requires a realm id" << dendl;
+    return -EINVAL;
+  }
+
+  try {
+    auto conn = impl->get(dpp);
+    sqlite::stmt_ptr* stmt = nullptr;
+    if (exclusive) {
+      stmt = &conn->statements["def_realm_ins"];
+      if (!*stmt) {
+        const std::string sql = fmt::format(schema::default_realm_insert1, P1);
+        *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+      }
+    } else {
+      stmt = &conn->statements["def_realm_ups"];
+      if (!*stmt) {
+        const std::string sql = fmt::format(schema::default_realm_upsert1, P1);
+        *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+      }
+    }
+    auto binding = sqlite::stmt_binding{stmt->get()};
+    sqlite::bind_text(dpp, binding, P1, realm_id);
+
+    auto reset = sqlite::stmt_execution{stmt->get()};
+    sqlite::eval0(dpp, reset);
+  } catch (const sqlite::error& e) {
+    ldpp_dout(dpp, 20) << "default realm insert failed: " << e.what() << dendl;
+    if (e.code() == sqlite::errc::primary_key_constraint) {
+      return -EEXIST;
+    } else if (e.code() == sqlite::errc::busy) {
+      return -EBUSY;
+    }
+    return -EIO;
+  }
+  return 0;
+}
+
+int SQLiteConfigStore::read_default_realm_id(const DoutPrefixProvider* dpp,
+                                             optional_yield y,
+                                             std::string& realm_id)
+{
+  Prefix prefix{*dpp, "dbconfig:sqlite:read_default_realm_id "}; dpp = &prefix;
+
+  try {
+    auto conn = impl->get(dpp);
+    auto& stmt = conn->statements["def_realm_sel"];
+    if (!stmt) {
+      static constexpr std::string_view sql = schema::default_realm_select0;
+      stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+    }
+    auto reset = sqlite::stmt_execution{stmt.get()};
+    sqlite::eval1(dpp, reset);
+
+    realm_id = sqlite::column_text(reset, 0);
+  } catch (const sqlite::error& e) {
+    ldpp_dout(dpp, 20) << "default realm select failed: " << e.what() << dendl;
+    if (e.code() == sqlite::errc::busy) {
+      return -EBUSY;
+    }
+    return -EIO;
+  }
+  return 0;
+}
+
+int SQLiteConfigStore::delete_default_realm_id(const DoutPrefixProvider* dpp,
+                                               optional_yield y)
+
+{
+  Prefix prefix{*dpp, "dbconfig:sqlite:delete_default_realm_id "}; dpp = &prefix;
+
+  try {
+    auto conn = impl->get(dpp);
+    auto& stmt = conn->statements["def_realm_del"];
+    if (!stmt) {
+      static constexpr std::string_view sql = schema::default_realm_delete0;
+      stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+    }
+    auto reset = sqlite::stmt_execution{stmt.get()};
+    sqlite::eval0(dpp, reset);
+
+    if (!::sqlite3_changes(conn->db.get())) {
+      return -ENOENT;
+    }
+  } catch (const sqlite::error& e) {
+    ldpp_dout(dpp, 20) << "default realm delete failed: " << e.what() << dendl;
+    if (e.code() == sqlite::errc::busy) {
+      return -EBUSY;
+    }
+    return -EIO;
+  }
+  return 0;
+}
+
+
+int SQLiteConfigStore::create_realm(const DoutPrefixProvider* dpp,
+                                    optional_yield y, bool exclusive,
+                                    const RGWRealm& info,
+                                    std::unique_ptr<sal::RealmWriter>* writer)
+{
+  Prefix prefix{*dpp, "dbconfig:sqlite:create_realm "}; dpp = &prefix;
+
+  if (info.id.empty()) {
+    ldpp_dout(dpp, 0) << "realm cannot have an empty id" << dendl;
+    return -EINVAL;
+  }
+  if (info.name.empty()) {
+    ldpp_dout(dpp, 0) << "realm cannot have an empty name" << dendl;
+    return -EINVAL;
+  }
+
+  int ver = 1;
+  auto tag = generate_version_tag(dpp->get_cct());
+
+  try {
+    auto conn = impl->get(dpp);
+    sqlite::stmt_ptr* stmt = nullptr;
+    if (exclusive) {
+      stmt = &conn->statements["realm_ins"];
+      if (!*stmt) {
+        const std::string sql = fmt::format(schema::realm_insert4,
+                                            P1, P2, P3, P4);
+        *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+      }
+    } else {
+      stmt = &conn->statements["realm_ups"];
+      if (!*stmt) {
+        const std::string sql = fmt::format(schema::realm_upsert4,
+                                            P1, P2, P3, P4);
+        *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+      }
+    }
+    auto binding = sqlite::stmt_binding{stmt->get()};
+    sqlite::bind_text(dpp, binding, P1, info.id);
+    sqlite::bind_text(dpp, binding, P2, info.name);
+    sqlite::bind_int(dpp, binding, P3, ver);
+    sqlite::bind_text(dpp, binding, P4, tag);
+
+    auto reset = sqlite::stmt_execution{stmt->get()};
+    sqlite::eval0(dpp, reset);
+  } catch (const sqlite::error& e) {
+    ldpp_dout(dpp, 20) << "realm insert failed: " << e.what() << dendl;
+    if (e.code() == sqlite::errc::primary_key_constraint) {
+      return -EEXIST; // ID already taken
+    } else if (e.code() == sqlite::errc::unique_constraint) {
+      return -EEXIST; // Name already taken
+    } else if (e.code() == sqlite::errc::busy) {
+      return -EBUSY;
+    }
+    return -EIO;
+  }
+
+  if (writer) {
+    *writer = std::make_unique<SQLiteRealmWriter>(
+        impl.get(), ver, std::move(tag), info.id, info.name);
+  }
+  return 0;
+}
+
+int SQLiteConfigStore::read_realm_by_id(const DoutPrefixProvider* dpp,
+                                        optional_yield y,
+                                        std::string_view realm_id,
+                                        RGWRealm& info,
+                                        std::unique_ptr<sal::RealmWriter>* writer)
+{
+  Prefix prefix{*dpp, "dbconfig:sqlite:read_realm_by_id "}; dpp = &prefix;
+
+  if (realm_id.empty()) {
+    ldpp_dout(dpp, 0) << "requires a realm id" << dendl;
+    return -EINVAL;
+  }
+
+  RealmRow row;
+  try {
+    auto conn = impl->get(dpp);
+    auto& stmt = conn->statements["realm_sel_id"];
+    if (!stmt) {
+      const std::string sql = fmt::format(schema::realm_select_id1, P1);
+      stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+    }
+    auto binding = sqlite::stmt_binding{stmt.get()};
+    sqlite::bind_text(dpp, binding, P1, realm_id);
+
+    auto reset = sqlite::stmt_execution{stmt.get()};
+    sqlite::eval1(dpp, reset);
+
+    read_realm_row(reset, row);
+  } catch (const buffer::error& e) {
+    ldpp_dout(dpp, 20) << "realm decode failed: " << e.what() << dendl;
+    return -EIO;
+  } catch (const sqlite::error& e) {
+    ldpp_dout(dpp, 20) << "realm select failed: " << e.what() << dendl;
+    if (e.code() == sqlite::errc::done) {
+      return -ENOENT;
+    } else if (e.code() == sqlite::errc::busy) {
+      return -EBUSY;
+    }
+    return -EIO;
+  }
+
+  info = std::move(row.info);
+  if (writer) {
+    *writer = std::make_unique<SQLiteRealmWriter>(
+        impl.get(), row.ver, std::move(row.tag), info.id, info.name);
+  }
+  return 0;
+}
+
+static void realm_select_by_name(const DoutPrefixProvider* dpp,
+                                 sqlite::Connection& conn,
+                                 std::string_view realm_name,
+                                 RealmRow& row)
+{
+  auto& stmt = conn.statements["realm_sel_name"];
+  if (!stmt) {
+    const std::string sql = fmt::format(schema::realm_select_name1, P1);
+    stmt = sqlite::prepare_statement(dpp, conn.db.get(), sql);
+  }
+  auto binding = sqlite::stmt_binding{stmt.get()};
+  sqlite::bind_text(dpp, binding, P1, realm_name);
+
+  auto reset = sqlite::stmt_execution{stmt.get()};
+  sqlite::eval1(dpp, reset);
+
+  read_realm_row(reset, row);
+}
+
+int SQLiteConfigStore::read_realm_by_name(const DoutPrefixProvider* dpp,
+                                          optional_yield y,
+                                          std::string_view realm_name,
+                                          RGWRealm& info,
+                                          std::unique_ptr<sal::RealmWriter>* writer)
+{
+  Prefix prefix{*dpp, "dbconfig:sqlite:read_realm_by_name "}; dpp = &prefix;
+
+  if (realm_name.empty()) {
+    ldpp_dout(dpp, 0) << "requires a realm name" << dendl;
+    return -EINVAL;
+  }
+
+  RealmRow row;
+  try {
+    auto conn = impl->get(dpp);
+    realm_select_by_name(dpp, *conn, realm_name, row);
+  } catch (const buffer::error& e) {
+    ldpp_dout(dpp, 20) << "realm decode failed: " << e.what() << dendl;
+    return -EIO;
+  } catch (const sqlite::error& e) {
+    ldpp_dout(dpp, 20) << "realm select failed: " << e.what() << dendl;
+    if (e.code() == sqlite::errc::done) {
+      return -ENOENT;
+    } else if (e.code() == sqlite::errc::busy) {
+      return -EBUSY;
+    }
+    return -EIO;
+  }
+
+  info = std::move(row.info);
+  if (writer) {
+    *writer = std::make_unique<SQLiteRealmWriter>(
+        impl.get(), row.ver, std::move(row.tag), info.id, info.name);
+  }
+  return 0;
+}
+
+int SQLiteConfigStore::read_default_realm(const DoutPrefixProvider* dpp,
+                                          optional_yield y,
+                                          RGWRealm& info,
+                                          std::unique_ptr<sal::RealmWriter>* writer)
+{
+  Prefix prefix{*dpp, "dbconfig:sqlite:read_default_realm "}; dpp = &prefix;
+
+  RealmRow row;
+  try {
+    auto conn = impl->get(dpp);
+    auto& stmt = conn->statements["realm_sel_def"];
+    if (!stmt) {
+      static constexpr std::string_view sql = schema::realm_select_default0;
+      stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+    }
+    auto reset = sqlite::stmt_execution{stmt.get()};
+    sqlite::eval1(dpp, reset);
+
+    read_realm_row(reset, row);
+  } catch (const buffer::error& e) {
+    ldpp_dout(dpp, 20) << "realm decode failed: " << e.what() << dendl;
+    return -EIO;
+  } catch (const sqlite::error& e) {
+    ldpp_dout(dpp, 20) << "realm select failed: " << e.what() << dendl;
+    if (e.code() == sqlite::errc::done) {
+      return -ENOENT;
+    } else if (e.code() == sqlite::errc::busy) {
+      return -EBUSY;
+    }
+    return -EIO;
+  }
+
+  info = std::move(row.info);
+  if (writer) {
+    *writer = std::make_unique<SQLiteRealmWriter>(
+        impl.get(), row.ver, std::move(row.tag), info.id, info.name);
+  }
+  return 0;
+}
+
+int SQLiteConfigStore::read_realm_id(const DoutPrefixProvider* dpp,
+                                     optional_yield y,
+                                     std::string_view realm_name,
+                                     std::string& realm_id)
+{
+  Prefix prefix{*dpp, "dbconfig:sqlite:read_realm_id "}; dpp = &prefix;
+
+  if (realm_name.empty()) {
+    ldpp_dout(dpp, 0) << "requires a realm name" << dendl;
+    return -EINVAL;
+  }
+
+  try {
+    auto conn = impl->get(dpp);
+
+    RealmRow row;
+    realm_select_by_name(dpp, *conn, realm_name, row);
+
+    realm_id = std::move(row.info.id);
+  } catch (const buffer::error& e) {
+    ldpp_dout(dpp, 20) << "realm decode failed: " << e.what() << dendl;
+    return -EIO;
+  } catch (const sqlite::error& e) {
+    ldpp_dout(dpp, 20) << "realm select failed: " << e.what() << dendl;
+    if (e.code() == sqlite::errc::done) {
+      return -ENOENT;
+    } else if (e.code() == sqlite::errc::busy) {
+      return -EBUSY;
+    }
+    return -EIO;
+  }
+
+  return 0;
+}
+
+int SQLiteConfigStore::realm_notify_new_period(const DoutPrefixProvider* dpp,
+                                               optional_yield y,
+                                               const RGWPeriod& period)
+{
+  return -ENOTSUP;
+}
+
+int SQLiteConfigStore::list_realm_names(const DoutPrefixProvider* dpp,
+                                        optional_yield y, const std::string& marker,
+                                        std::span<std::string> entries,
+                                        sal::ListResult<std::string>& result)
+{
+  Prefix prefix{*dpp, "dbconfig:sqlite:list_realm_names "}; dpp = &prefix;
+
+  try {
+    auto conn = impl->get(dpp);
+    auto& stmt = conn->statements["realm_sel_names"];
+    if (!stmt) {
+      const std::string sql = fmt::format(schema::realm_select_names2, P1, P2);
+      stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+    }
+    auto binding = sqlite::stmt_binding{stmt.get()};
+    sqlite::bind_text(dpp, binding, P1, marker);
+    sqlite::bind_int(dpp, binding, P2, entries.size());
+
+    auto reset = sqlite::stmt_execution{stmt.get()};
+    read_text_rows(dpp, reset, entries, result);
+  } catch (const sqlite::error& e) {
+    ldpp_dout(dpp, 20) << "realm select failed: " << e.what() << dendl;
+    if (e.code() == sqlite::errc::busy) {
+      return -EBUSY;
+    }
+    return -EIO;
+  }
+  return 0;
+}
+
+
+// Period
+
+int SQLiteConfigStore::create_period(const DoutPrefixProvider* dpp,
+                                     optional_yield y, bool exclusive,
+                                     const RGWPeriod& info)
+{
+  Prefix prefix{*dpp, "dbconfig:sqlite:create_period "}; dpp = &prefix;
+
+  if (info.id.empty()) {
+    ldpp_dout(dpp, 0) << "period cannot have an empty id" << dendl;
+    return -EINVAL;
+  }
+
+  bufferlist bl;
+  encode(info, bl);
+  const auto data = std::string_view{bl.c_str(), bl.length()};
+
+  try {
+    auto conn = impl->get(dpp);
+    sqlite::stmt_ptr* stmt = nullptr;
+    if (exclusive) {
+      stmt = &conn->statements["period_ins"];
+      if (!*stmt) {
+        const std::string sql = fmt::format(schema::period_insert4,
+                                            P1, P2, P3, P4);
+        *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+      }
+    } else {
+      stmt = &conn->statements["period_ups"];
+      if (!*stmt) {
+        const std::string sql = fmt::format(schema::period_upsert4,
+                                            P1, P2, P3, P4);
+        *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+      }
+    }
+    auto binding = sqlite::stmt_binding{stmt->get()};
+    sqlite::bind_text(dpp, binding, P1, info.id);
+    sqlite::bind_int(dpp, binding, P2, info.epoch);
+    sqlite::bind_text(dpp, binding, P3, info.realm_id);
+    sqlite::bind_text(dpp, binding, P4, data);
+
+    auto reset = sqlite::stmt_execution{stmt->get()};
+    sqlite::eval0(dpp, reset);
+  } catch (const sqlite::error& e) {
+    ldpp_dout(dpp, 20) << "period insert failed: " << e.what() << dendl;
+    if (e.code() == sqlite::errc::foreign_key_constraint) {
+      return -EINVAL; // refers to nonexistent RealmID
+    } else if (e.code() == sqlite::errc::busy) {
+      return -EBUSY;
+    }
+    return -EIO;
+  }
+  return 0;
+}
+
+static void period_select_epoch(const DoutPrefixProvider* dpp,
+                                sqlite::Connection& conn,
+                                std::string_view id, uint32_t epoch,
+                                RGWPeriod& row)
+{
+  auto& stmt = conn.statements["period_sel_epoch"];
+  if (!stmt) {
+    const std::string sql = fmt::format(schema::period_select_epoch2, P1, P2);
+    stmt = sqlite::prepare_statement(dpp, conn.db.get(), sql);
+  }
+  auto binding = sqlite::stmt_binding{stmt.get()};
+  sqlite::bind_text(dpp, binding, P1, id);
+  sqlite::bind_int(dpp, binding, P2, epoch);
+
+  auto reset = sqlite::stmt_execution{stmt.get()};
+  sqlite::eval1(dpp, reset);
+
+  read_period_row(reset, row);
+}
+
+static void period_select_latest(const DoutPrefixProvider* dpp,
+                                 sqlite::Connection& conn,
+                                 std::string_view id, RGWPeriod& row)
+{
+  auto& stmt = conn.statements["period_sel_latest"];
+  if (!stmt) {
+    const std::string sql = fmt::format(schema::period_select_latest1, P1);
+    stmt = sqlite::prepare_statement(dpp, conn.db.get(), sql);
+  }
+  auto binding = sqlite::stmt_binding{stmt.get()};
+  sqlite::bind_text(dpp, binding, P1, id);
+
+  auto reset = sqlite::stmt_execution{stmt.get()};
+  sqlite::eval1(dpp, reset);
+
+  read_period_row(reset, row);
+}
+
+int SQLiteConfigStore::read_period(const DoutPrefixProvider* dpp,
+                                   optional_yield y,
+                                   std::string_view period_id,
+                                   std::optional<uint32_t> epoch,
+                                   RGWPeriod& info)
+{
+  Prefix prefix{*dpp, "dbconfig:sqlite:read_period "}; dpp = &prefix;
+
+  if (period_id.empty()) {
+    ldpp_dout(dpp, 0) << "requires a period id" << dendl;
+    return -EINVAL;
+  }
+
+  try {
+    auto conn = impl->get(dpp);
+    if (epoch) {
+      period_select_epoch(dpp, *conn, period_id, *epoch, info);
+    } else {
+      period_select_latest(dpp, *conn, period_id, info);
+    }
+  } catch (const buffer::error& e) {
+    ldpp_dout(dpp, 20) << "period decode failed: " << e.what() << dendl;
+    return -EIO;
+  } catch (const sqlite::error& e) {
+    ldpp_dout(dpp, 20) << "period select failed: " << e.what() << dendl;
+    if (e.code() == sqlite::errc::done) {
+      return -ENOENT;
+    } else if (e.code() == sqlite::errc::busy) {
+      return -EBUSY;
+    }
+    return -EIO;
+  }
+  return 0;
+}
+
+int SQLiteConfigStore::delete_period(const DoutPrefixProvider* dpp,
+                                     optional_yield y,
+                                     std::string_view period_id)
+{
+  Prefix prefix{*dpp, "dbconfig:sqlite:delete_period "}; dpp = &prefix;
+
+  if (period_id.empty()) {
+    ldpp_dout(dpp, 0) << "requires a period id" << dendl;
+    return -EINVAL;
+  }
+
+  try {
+    auto conn = impl->get(dpp);
+    auto& stmt = conn->statements["period_del"];
+    if (!stmt) {
+      const std::string sql = fmt::format(schema::period_delete1, P1);
+      stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+    }
+    auto binding = sqlite::stmt_binding{stmt.get()};
+    sqlite::bind_text(dpp, binding, P1, period_id);
+
+    auto reset = sqlite::stmt_execution{stmt.get()};
+    sqlite::eval0(dpp, reset);
+
+    if (!::sqlite3_changes(conn->db.get())) {
+      return -ENOENT;
+    }
+  } catch (const sqlite::error& e) {
+    ldpp_dout(dpp, 20) << "period delete failed: " << e.what() << dendl;
+    if (e.code() == sqlite::errc::busy) {
+      return -EBUSY;
+    }
+    return -EIO;
+  }
+  return 0;
+}
+
+int SQLiteConfigStore::list_period_ids(const DoutPrefixProvider* dpp,
+                                       optional_yield y,
+                                       const std::string& marker,
+                                       std::span<std::string> entries,
+                                       sal::ListResult<std::string>& result)
+{
+  Prefix prefix{*dpp, "dbconfig:sqlite:list_period_ids "}; dpp = &prefix;
+
+  try {
+    auto conn = impl->get(dpp);
+    auto& stmt = conn->statements["period_sel_ids"];
+    if (!stmt) {
+      const std::string sql = fmt::format(schema::period_select_ids2, P1, P2);
+      stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+    }
+    auto binding = sqlite::stmt_binding{stmt.get()};
+    sqlite::bind_text(dpp, binding, P1, marker);
+    sqlite::bind_int(dpp, binding, P2, entries.size());
+
+    auto reset = sqlite::stmt_execution{stmt.get()};
+    read_text_rows(dpp, reset, entries, result);
+  } catch (const sqlite::error& e) {
+    ldpp_dout(dpp, 20) << "period select failed: " << e.what() << dendl;
+    if (e.code() == sqlite::errc::busy) {
+      return -EBUSY;
+    }
+    return -EIO;
+  }
+  return 0;
+}
+
+
+// ZoneGroup
+
+class SQLiteZoneGroupWriter : public sal::ZoneGroupWriter {
+  SQLiteImpl* impl;
+  int ver;
+  std::string tag;
+  std::string zonegroup_id;
+  std::string zonegroup_name;
+ public:
+  SQLiteZoneGroupWriter(SQLiteImpl* impl, int ver, std::string tag,
+                        std::string_view zonegroup_id,
+                        std::string_view zonegroup_name)
+    : impl(impl), ver(ver), tag(std::move(tag)),
+      zonegroup_id(zonegroup_id), zonegroup_name(zonegroup_name)
+  {}
+
+  int write(const DoutPrefixProvider* dpp, optional_yield y,
+            const RGWZoneGroup& info) override
+  {
+    Prefix prefix{*dpp, "dbconfig:sqlite:zonegroup_write "}; dpp = &prefix;
+
+    if (!impl) {
+      return -EINVAL; // can't write after conflict or delete
+    }
+    if (zonegroup_id != info.id || zonegroup_name != info.name) {
+      return -EINVAL; // can't modify zonegroup id or name directly
+    }
+
+    bufferlist bl;
+    encode(info, bl);
+    const auto data = std::string_view{bl.c_str(), bl.length()};
+
+    try {
+      auto conn = impl->get(dpp);
+      auto& stmt = conn->statements["zonegroup_upd"];
+      if (!stmt) {
+        const std::string sql = fmt::format(schema::zonegroup_update5,
+                                            P1, P2, P3, P4, P5);
+        stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+      }
+      auto binding = sqlite::stmt_binding{stmt.get()};
+      sqlite::bind_text(dpp, binding, P1, info.id);
+      sqlite::bind_text(dpp, binding, P2, info.realm_id);
+      sqlite::bind_text(dpp, binding, P3, data);
+      sqlite::bind_int(dpp, binding, P4, ver);
+      sqlite::bind_text(dpp, binding, P5, tag);
+
+      auto reset = sqlite::stmt_execution{stmt.get()};
+      sqlite::eval0(dpp, reset);
+
+      if (!::sqlite3_changes(conn->db.get())) { // VersionNumber/Tag mismatch
+        impl = nullptr;
+        return -ECANCELED;
+      }
+    } catch (const sqlite::error& e) {
+      ldpp_dout(dpp, 20) << "zonegroup update failed: " << e.what() << dendl;
+      if (e.code() == sqlite::errc::foreign_key_constraint) {
+        return -EINVAL; // refers to nonexistent RealmID
+      } else if (e.code() == sqlite::errc::busy) {
+        return -EBUSY;
+      }
+      return -EIO;
+    }
+    return 0;
+  }
+
+  int rename(const DoutPrefixProvider* dpp, optional_yield y,
+             RGWZoneGroup& info, std::string_view new_name) override
+  {
+    Prefix prefix{*dpp, "dbconfig:sqlite:zonegroup_rename "}; dpp = &prefix;
+
+    if (!impl) {
+      return -EINVAL; // can't write after conflict or delete
+    }
+    if (zonegroup_id != info.get_id() || zonegroup_name != info.get_name()) {
+      return -EINVAL; // can't modify zonegroup id or name directly
+    }
+    if (new_name.empty()) {
+      ldpp_dout(dpp, 0) << "zonegroup cannot have an empty name" << dendl;
+      return -EINVAL;
+    }
+
+    try {
+      auto conn = impl->get(dpp);
+      auto& stmt = conn->statements["zonegroup_rename"];
+      if (!stmt) {
+        const std::string sql = fmt::format(schema::zonegroup_rename4,
+                                            P1, P2, P3, P4);
+        stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+      }
+      auto binding = sqlite::stmt_binding{stmt.get()};
+      sqlite::bind_text(dpp, binding, P1, info.id);
+      sqlite::bind_text(dpp, binding, P2, new_name);
+      sqlite::bind_int(dpp, binding, P3, ver);
+      sqlite::bind_text(dpp, binding, P4, tag);
+
+      auto reset = sqlite::stmt_execution{stmt.get()};
+      sqlite::eval0(dpp, reset);
+
+      if (!::sqlite3_changes(conn->db.get())) { // VersionNumber/Tag mismatch
+        impl = nullptr;
+        return -ECANCELED;
+      }
+    } catch (const sqlite::error& e) {
+      ldpp_dout(dpp, 20) << "zonegroup rename failed: " << e.what() << dendl;
+      if (e.code() == sqlite::errc::unique_constraint) {
+        return -EEXIST; // Name already taken
+      } else if (e.code() == sqlite::errc::busy) {
+        return -EBUSY;
+      }
+      return -EIO;
+    }
+    info.name = std::string{new_name};
+    return 0;
+  }
+
+  int remove(const DoutPrefixProvider* dpp, optional_yield y) override
+  {
+    Prefix prefix{*dpp, "dbconfig:sqlite:zonegroup_remove "}; dpp = &prefix;
+
+    if (!impl) {
+      return -EINVAL; // can't write after conflict or delete
+    }
+    try {
+      auto conn = impl->get(dpp);
+      auto& stmt = conn->statements["zonegroup_del"];
+      if (!stmt) {
+        const std::string sql = fmt::format(schema::zonegroup_delete3,
+                                            P1, P2, P3);
+        stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+      }
+      auto binding = sqlite::stmt_binding{stmt.get()};
+      sqlite::bind_text(dpp, binding, P1, zonegroup_id);
+      sqlite::bind_int(dpp, binding, P2, ver);
+      sqlite::bind_text(dpp, binding, P3, tag);
+
+      auto reset = sqlite::stmt_execution{stmt.get()};
+      sqlite::eval0(dpp, reset);
+
+      impl = nullptr;
+      if (!::sqlite3_changes(conn->db.get())) { // VersionNumber/Tag mismatch
+        return -ECANCELED;
+      }
+    } catch (const sqlite::error& e) {
+      ldpp_dout(dpp, 20) << "zonegroup delete failed: " << e.what() << dendl;
+      if (e.code() == sqlite::errc::busy) {
+        return -EBUSY;
+      }
+      return -EIO;
+    }
+    return 0;
+  }
+}; // SQLiteZoneGroupWriter
+
+
+int SQLiteConfigStore::write_default_zonegroup_id(const DoutPrefixProvider* dpp,
+                                                  optional_yield y, bool exclusive,
+                                                  std::string_view realm_id,
+                                                  std::string_view zonegroup_id)
+{
+  Prefix prefix{*dpp, "dbconfig:sqlite:write_default_zonegroup_id "}; dpp = &prefix;
+
+  try {
+    auto conn = impl->get(dpp);
+    sqlite::stmt_ptr* stmt = nullptr;
+    if (exclusive) {
+      stmt = &conn->statements["def_zonegroup_ins"];
+      if (!*stmt) {
+        const std::string sql = fmt::format(schema::default_zonegroup_insert2,
+                                            P1, P2);
+        *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+      }
+    } else {
+      stmt = &conn->statements["def_zonegroup_ups"];
+      if (!*stmt) {
+        const std::string sql = fmt::format(schema::default_zonegroup_upsert2,
+                                            P1, P2);
+        *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+      }
+    }
+    auto binding = sqlite::stmt_binding{stmt->get()};
+    sqlite::bind_text(dpp, binding, P1, realm_id);
+    sqlite::bind_text(dpp, binding, P2, zonegroup_id);
+
+    auto reset = sqlite::stmt_execution{stmt->get()};
+    sqlite::eval0(dpp, reset);
+  } catch (const sqlite::error& e) {
+    ldpp_dout(dpp, 20) << "default zonegroup insert failed: " << e.what() << dendl;
+    if (e.code() == sqlite::errc::busy) {
+      return -EBUSY;
+    }
+    return -EIO;
+  }
+  return 0;
+}
+
+int SQLiteConfigStore::read_default_zonegroup_id(const DoutPrefixProvider* dpp,
+                                                 optional_yield y,
+                                                 std::string_view realm_id,
+                                                 std::string& zonegroup_id)
+{
+  Prefix prefix{*dpp, "dbconfig:sqlite:read_default_zonegroup_id "}; dpp = &prefix;
+
+  try {
+    auto conn = impl->get(dpp);
+    auto& stmt = conn->statements["def_zonegroup_sel"];
+    if (!stmt) {
+      const std::string sql = fmt::format(schema::default_zonegroup_select1, P1);
+      stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+    }
+    auto binding = sqlite::stmt_binding{stmt.get()};
+    sqlite::bind_text(dpp, binding, P1, realm_id);
+
+    auto reset = sqlite::stmt_execution{stmt.get()};
+    sqlite::eval1(dpp, reset);
+
+    zonegroup_id = sqlite::column_text(reset, 0);
+  } catch (const sqlite::error& e) {
+    ldpp_dout(dpp, 20) << "default zonegroup select failed: " << e.what() << dendl;
+    if (e.code() == sqlite::errc::done) {
+      return -ENOENT;
+    } else if (e.code() == sqlite::errc::busy) {
+      return -EBUSY;
+    }
+    return -EIO;
+  }
+  return 0;
+}
+
+int SQLiteConfigStore::delete_default_zonegroup_id(const DoutPrefixProvider* dpp,
+                                                   optional_yield y,
+                                                   std::string_view realm_id)
+{
+  Prefix prefix{*dpp, "dbconfig:sqlite:delete_default_zonegroup_id "}; dpp = &prefix;
+
+  try {
+    auto conn = impl->get(dpp);
+    auto& stmt = conn->statements["def_zonegroup_del"];
+    if (!stmt) {
+      const std::string sql = fmt::format(schema::default_zonegroup_delete1, P1);
+      stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+    }
+    auto binding = sqlite::stmt_binding{stmt.get()};
+    sqlite::bind_text(dpp, binding, P1, realm_id);
+
+    auto reset = sqlite::stmt_execution{stmt.get()};
+    sqlite::eval0(dpp, reset);
+
+    if (!::sqlite3_changes(conn->db.get())) {
+      return -ENOENT;
+    }
+  } catch (const sqlite::error& e) {
+    ldpp_dout(dpp, 20) << "default zonegroup delete failed: " << e.what() << dendl;
+    if (e.code() == sqlite::errc::busy) {
+      return -EBUSY;
+    }
+    return -EIO;
+  }
+  return 0;
+}
+
+
+int SQLiteConfigStore::create_zonegroup(const DoutPrefixProvider* dpp,
+                                        optional_yield y, bool exclusive,
+                                        const RGWZoneGroup& info,
+                                        std::unique_ptr<sal::ZoneGroupWriter>* writer)
+{
+  Prefix prefix{*dpp, "dbconfig:sqlite:create_zonegroup "}; dpp = &prefix;
+
+  if (info.id.empty()) {
+    ldpp_dout(dpp, 0) << "zonegroup cannot have an empty id" << dendl;
+    return -EINVAL;
+  }
+  if (info.name.empty()) {
+    ldpp_dout(dpp, 0) << "zonegroup cannot have an empty name" << dendl;
+    return -EINVAL;
+  }
+
+  int ver = 1;
+  auto tag = generate_version_tag(dpp->get_cct());
+
+  bufferlist bl;
+  encode(info, bl);
+  const auto data = std::string_view{bl.c_str(), bl.length()};
+
+  try {
+    auto conn = impl->get(dpp);
+    sqlite::stmt_ptr* stmt = nullptr;
+    if (exclusive) {
+      stmt = &conn->statements["zonegroup_ins"];
+      if (!*stmt) {
+        const std::string sql = fmt::format(schema::zonegroup_insert6,
+                                            P1, P2, P3, P4, P5, P6);
+        *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+      }
+    } else {
+      stmt = &conn->statements["zonegroup_ups"];
+      if (!*stmt) {
+        const std::string sql = fmt::format(schema::zonegroup_upsert6,
+                                            P1, P2, P3, P4, P5, P6);
+        *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+      }
+    }
+    auto binding = sqlite::stmt_binding{stmt->get()};
+    sqlite::bind_text(dpp, binding, P1, info.id);
+    sqlite::bind_text(dpp, binding, P2, info.name);
+    sqlite::bind_text(dpp, binding, P3, info.realm_id);
+    sqlite::bind_text(dpp, binding, P4, data);
+    sqlite::bind_int(dpp, binding, P5, ver);
+    sqlite::bind_text(dpp, binding, P6, tag);
+
+    auto reset = sqlite::stmt_execution{stmt->get()};
+    sqlite::eval0(dpp, reset);
+  } catch (const sqlite::error& e) {
+    ldpp_dout(dpp, 20) << "zonegroup insert failed: " << e.what() << dendl;
+    if (e.code() == sqlite::errc::foreign_key_constraint) {
+      return -EINVAL; // refers to nonexistent RealmID
+    } else if (e.code() == sqlite::errc::primary_key_constraint) {
+      return -EEXIST; // ID already taken
+    } else if (e.code() == sqlite::errc::unique_constraint) {
+      return -EEXIST; // Name already taken
+    } else if (e.code() == sqlite::errc::busy) {
+      return -EBUSY;
+    }
+    return -EIO;
+  }
+
+  if (writer) {
+    *writer = std::make_unique<SQLiteZoneGroupWriter>(
+        impl.get(), ver, std::move(tag), info.id, info.name);
+  }
+  return 0;
+}
+
+int SQLiteConfigStore::read_zonegroup_by_id(const DoutPrefixProvider* dpp,
+                                            optional_yield y,
+                                            std::string_view zonegroup_id,
+                                            RGWZoneGroup& info,
+                                            std::unique_ptr<sal::ZoneGroupWriter>* writer)
+{
+  Prefix prefix{*dpp, "dbconfig:sqlite:read_zonegroup_by_id "}; dpp = &prefix;
+
+  if (zonegroup_id.empty()) {
+    ldpp_dout(dpp, 0) << "requires a zonegroup id" << dendl;
+    return -EINVAL;
+  }
+
+  ZoneGroupRow row;
+  try {
+    auto conn = impl->get(dpp);
+    auto& stmt = conn->statements["zonegroup_sel_id"];
+    if (!stmt) {
+      const std::string sql = fmt::format(schema::zonegroup_select_id1, P1);
+      stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+    }
+    auto binding = sqlite::stmt_binding{stmt.get()};
+    sqlite::bind_text(dpp, binding, P1, zonegroup_id);
+
+    auto reset = sqlite::stmt_execution{stmt.get()};
+    sqlite::eval1(dpp, reset);
+
+    read_zonegroup_row(reset, row);
+  } catch (const buffer::error& e) {
+    ldpp_dout(dpp, 20) << "zonegroup decode failed: " << e.what() << dendl;
+    return -EIO;
+  } catch (const sqlite::error& e) {
+    ldpp_dout(dpp, 20) << "zonegroup select failed: " << e.what() << dendl;
+    if (e.code() == sqlite::errc::done) {
+      return -ENOENT;
+    } else if (e.code() == sqlite::errc::busy) {
+      return -EBUSY;
+    }
+    return -EIO;
+  }
+
+  info = std::move(row.info);
+  if (writer) {
+    *writer = std::make_unique<SQLiteZoneGroupWriter>(
+        impl.get(), row.ver, std::move(row.tag), info.id, info.name);
+  }
+  return 0;
+}
+
+int SQLiteConfigStore::read_zonegroup_by_name(const DoutPrefixProvider* dpp,
+                                              optional_yield y,
+                                              std::string_view zonegroup_name,
+                                              RGWZoneGroup& info,
+                                              std::unique_ptr<sal::ZoneGroupWriter>* writer)
+{
+  Prefix prefix{*dpp, "dbconfig:sqlite:read_zonegroup_by_name "}; dpp = &prefix;
+
+  if (zonegroup_name.empty()) {
+    ldpp_dout(dpp, 0) << "requires a zonegroup name" << dendl;
+    return -EINVAL;
+  }
+
+  ZoneGroupRow row;
+  try {
+    auto conn = impl->get(dpp);
+    auto& stmt = conn->statements["zonegroup_sel_name"];
+    if (!stmt) {
+      const std::string sql = fmt::format(schema::zonegroup_select_name1, P1);
+      stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+    }
+    auto binding = sqlite::stmt_binding{stmt.get()};
+    sqlite::bind_text(dpp, binding, P1, zonegroup_name);
+
+    auto reset = sqlite::stmt_execution{stmt.get()};
+    sqlite::eval1(dpp, reset);
+
+    read_zonegroup_row(reset, row);
+  } catch (const buffer::error& e) {
+    ldpp_dout(dpp, 20) << "zonegroup decode failed: " << e.what() << dendl;
+    return -EIO;
+  } catch (const sqlite::error& e) {
+    ldpp_dout(dpp, 20) << "zonegroup select failed: " << e.what() << dendl;
+    if (e.code() == sqlite::errc::done) {
+      return -ENOENT;
+    } else if (e.code() == sqlite::errc::busy) {
+      return -EBUSY;
+    }
+    return -EIO;
+  }
+
+  info = std::move(row.info);
+  if (writer) {
+    *writer = std::make_unique<SQLiteZoneGroupWriter>(
+        impl.get(), row.ver, std::move(row.tag), info.id, info.name);
+  }
+  return 0;
+}
+
+int SQLiteConfigStore::read_default_zonegroup(const DoutPrefixProvider* dpp,
+                                              optional_yield y,
+                                              std::string_view realm_id,
+                                              RGWZoneGroup& info,
+                                              std::unique_ptr<sal::ZoneGroupWriter>* writer)
+{
+  Prefix prefix{*dpp, "dbconfig:sqlite:read_default_zonegroup "}; dpp = &prefix;
+
+  ZoneGroupRow row;
+  try {
+    auto conn = impl->get(dpp);
+    auto& stmt = conn->statements["zonegroup_sel_def"];
+    if (!stmt) {
+      static constexpr std::string_view sql = schema::zonegroup_select_default0;
+      stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+    }
+    auto reset = sqlite::stmt_execution{stmt.get()};
+    sqlite::eval1(dpp, reset);
+
+    read_zonegroup_row(reset, row);
+  } catch (const buffer::error& e) {
+    ldpp_dout(dpp, 20) << "zonegroup decode failed: " << e.what() << dendl;
+    return -EIO;
+  } catch (const sqlite::error& e) {
+    ldpp_dout(dpp, 20) << "zonegroup select failed: " << e.what() << dendl;
+    if (e.code() == sqlite::errc::done) {
+      return -ENOENT;
+    } else if (e.code() == sqlite::errc::busy) {
+      return -EBUSY;
+    }
+    return -EIO;
+  }
+
+  info = std::move(row.info);
+  if (writer) {
+    *writer = std::make_unique<SQLiteZoneGroupWriter>(
+        impl.get(), row.ver, std::move(row.tag), info.id, info.name);
+  }
+  return 0;
+}
+
+int SQLiteConfigStore::list_zonegroup_names(const DoutPrefixProvider* dpp,
+                                            optional_yield y,
+                                            const std::string& marker,
+                                            std::span<std::string> entries,
+                                            sal::ListResult<std::string>& result)
+{
+  Prefix prefix{*dpp, "dbconfig:sqlite:list_zonegroup_names "}; dpp = &prefix;
+
+  try {
+    auto conn = impl->get(dpp);
+    auto& stmt = conn->statements["zonegroup_sel_names"];
+    if (!stmt) {
+      const std::string sql = fmt::format(schema::zonegroup_select_names2, P1, P2);
+      stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+    }
+    auto binding = sqlite::stmt_binding{stmt.get()};
+    auto reset = sqlite::stmt_execution{stmt.get()};
+
+    sqlite::bind_text(dpp, binding, P1, marker);
+    sqlite::bind_int(dpp, binding, P2, entries.size());
+
+    read_text_rows(dpp, reset, entries, result);
+  } catch (const sqlite::error& e) {
+    ldpp_dout(dpp, 20) << "zonegroup select failed: " << e.what() << dendl;
+    if (e.code() == sqlite::errc::busy) {
+      return -EBUSY;
+    }
+    return -EIO;
+  }
+  return 0;
+}
+
+
+// Zone
+
+class SQLiteZoneWriter : public sal::ZoneWriter {
+  SQLiteImpl* impl;
+  int ver;
+  std::string tag;
+  std::string zone_id;
+  std::string zone_name;
+ public:
+  SQLiteZoneWriter(SQLiteImpl* impl, int ver, std::string tag,
+                   std::string_view zone_id, std::string_view zone_name)
+    : impl(impl), ver(ver), tag(std::move(tag)),
+      zone_id(zone_id), zone_name(zone_name)
+  {}
+
+  int write(const DoutPrefixProvider* dpp, optional_yield y,
+            const RGWZoneParams& info) override
+  {
+    Prefix prefix{*dpp, "dbconfig:sqlite:zone_write "}; dpp = &prefix;
+
+    if (!impl) {
+      return -EINVAL; // can't write after conflict or delete
+    }
+    if (zone_id != info.id || zone_name != info.name) {
+      return -EINVAL; // can't modify zone id or name directly
+    }
+
+    bufferlist bl;
+    encode(info, bl);
+    const auto data = std::string_view{bl.c_str(), bl.length()};
+
+    try {
+      auto conn = impl->get(dpp);
+      auto& stmt = conn->statements["zone_upd"];
+      if (!stmt) {
+        const std::string sql = fmt::format(schema::zone_update5,
+                                            P1, P2, P3, P4, P5);
+        stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+      }
+      auto binding = sqlite::stmt_binding{stmt.get()};
+      sqlite::bind_text(dpp, binding, P1, info.id);
+      sqlite::bind_text(dpp, binding, P2, info.realm_id);
+      sqlite::bind_text(dpp, binding, P3, data);
+      sqlite::bind_int(dpp, binding, P4, ver);
+      sqlite::bind_text(dpp, binding, P5, tag);
+
+      auto reset = sqlite::stmt_execution{stmt.get()};
+      sqlite::eval0(dpp, reset);
+
+      if (!::sqlite3_changes(conn->db.get())) { // VersionNumber/Tag mismatch
+        impl = nullptr;
+        return -ECANCELED;
+      }
+    } catch (const sqlite::error& e) {
+      ldpp_dout(dpp, 20) << "zone update failed: " << e.what() << dendl;
+      if (e.code() == sqlite::errc::foreign_key_constraint) {
+        return -EINVAL; // refers to nonexistent RealmID
+      } else if (e.code() == sqlite::errc::busy) {
+        return -EBUSY;
+      }
+      return -EIO;
+    }
+    ++ver;
+    return 0;
+  }
+
+  int rename(const DoutPrefixProvider* dpp, optional_yield y,
+             RGWZoneParams& info, std::string_view new_name) override
+  {
+    Prefix prefix{*dpp, "dbconfig:sqlite:zone_rename "}; dpp = &prefix;
+
+    if (!impl) {
+      return -EINVAL; // can't write after conflict or delete
+    }
+    if (zone_id != info.id || zone_name != info.name) {
+      return -EINVAL; // can't modify zone id or name directly
+    }
+    if (new_name.empty()) {
+      ldpp_dout(dpp, 0) << "zonegroup cannot have an empty name" << dendl;
+      return -EINVAL;
+    }
+
+    try {
+      auto conn = impl->get(dpp);
+      auto& stmt = conn->statements["zone_rename"];
+      if (!stmt) {
+        const std::string sql = fmt::format(schema::zone_rename4, P1, P2, P2, P3);
+        stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+      }
+      auto binding = sqlite::stmt_binding{stmt.get()};
+      sqlite::bind_text(dpp, binding, P1, info.id);
+      sqlite::bind_text(dpp, binding, P2, new_name);
+      sqlite::bind_int(dpp, binding, P3, ver);
+      sqlite::bind_text(dpp, binding, P4, tag);
+
+      auto reset = sqlite::stmt_execution{stmt.get()};
+      sqlite::eval0(dpp, reset);
+
+      if (!::sqlite3_changes(conn->db.get())) { // VersionNumber/Tag mismatch
+        impl = nullptr;
+        return -ECANCELED;
+      }
+    } catch (const sqlite::error& e) {
+      ldpp_dout(dpp, 20) << "zone rename failed: " << e.what() << dendl;
+      if (e.code() == sqlite::errc::unique_constraint) {
+        return -EEXIST; // Name already taken
+      } else if (e.code() == sqlite::errc::busy) {
+        return -EBUSY;
+      }
+      return -EIO;
+    }
+    info.name = std::string{new_name};
+    ++ver;
+    return 0;
+  }
+
+  int remove(const DoutPrefixProvider* dpp, optional_yield y) override
+  {
+    Prefix prefix{*dpp, "dbconfig:sqlite:zone_remove "}; dpp = &prefix;
+
+    if (!impl) {
+      return -EINVAL; // can't write after conflict or delete
+    }
+    try {
+      auto conn = impl->get(dpp);
+      auto& stmt = conn->statements["zone_del"];
+      if (!stmt) {
+        const std::string sql = fmt::format(schema::zone_delete3, P1, P2, P3);
+        stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+      }
+      auto binding = sqlite::stmt_binding{stmt.get()};
+      sqlite::bind_text(dpp, binding, P1, zone_id);
+      sqlite::bind_int(dpp, binding, P2, ver);
+      sqlite::bind_text(dpp, binding, P3, tag);
+
+      auto reset = sqlite::stmt_execution{stmt.get()};
+      sqlite::eval0(dpp, reset);
+
+      impl = nullptr;
+      if (!::sqlite3_changes(conn->db.get())) { // VersionNumber/Tag mismatch
+        return -ECANCELED;
+      }
+    } catch (const sqlite::error& e) {
+      ldpp_dout(dpp, 20) << "zone delete failed: " << e.what() << dendl;
+      if (e.code() == sqlite::errc::busy) {
+        return -EBUSY;
+      }
+      return -EIO;
+    }
+    return 0;
+  }
+}; // SQLiteZoneWriter
+
+
+int SQLiteConfigStore::write_default_zone_id(const DoutPrefixProvider* dpp,
+                                             optional_yield y, bool exclusive,
+                                             std::string_view realm_id,
+                                             std::string_view zone_id)
+{
+  Prefix prefix{*dpp, "dbconfig:sqlite:write_default_zone_id "}; dpp = &prefix;
+
+  if (zone_id.empty()) {
+    ldpp_dout(dpp, 0) << "requires a zone id" << dendl;
+    return -EINVAL;
+  }
+
+  try {
+    auto conn = impl->get(dpp);
+    sqlite::stmt_ptr* stmt = nullptr;
+    if (exclusive) {
+      stmt = &conn->statements["def_zone_ins"];
+      if (!*stmt) {
+        const std::string sql = fmt::format(schema::default_zone_insert2, P1, P2);
+        *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+      }
+    } else {
+      stmt = &conn->statements["def_zone_ups"];
+      if (!*stmt) {
+        const std::string sql = fmt::format(schema::default_zone_upsert2, P1, P2);
+        *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+      }
+    }
+    auto binding = sqlite::stmt_binding{stmt->get()};
+    sqlite::bind_text(dpp, binding, P1, realm_id);
+    sqlite::bind_text(dpp, binding, P2, zone_id);
+
+    auto reset = sqlite::stmt_execution{stmt->get()};
+    sqlite::eval0(dpp, reset);
+  } catch (const sqlite::error& e) {
+    ldpp_dout(dpp, 20) << "default zone insert failed: " << e.what() << dendl;
+    if (e.code() == sqlite::errc::busy) {
+      return -EBUSY;
+    }
+    return -EIO;
+  }
+  return 0;
+}
+
+int SQLiteConfigStore::read_default_zone_id(const DoutPrefixProvider* dpp,
+                                            optional_yield y,
+                                            std::string_view realm_id,
+                                            std::string& zone_id)
+{
+  Prefix prefix{*dpp, "dbconfig:sqlite:read_default_zone_id "}; dpp = &prefix;
+
+  try {
+    auto conn = impl->get(dpp);
+    auto& stmt = conn->statements["def_zone_sel"];
+    if (!stmt) {
+      const std::string sql = fmt::format(schema::default_zone_select1, P1);
+      stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+    }
+    auto binding = sqlite::stmt_binding{stmt.get()};
+    sqlite::bind_text(dpp, binding, P1, realm_id);
+
+    auto reset = sqlite::stmt_execution{stmt.get()};
+    sqlite::eval1(dpp, reset);
+
+    zone_id = sqlite::column_text(reset, 0);
+  } catch (const sqlite::error& e) {
+    ldpp_dout(dpp, 20) << "default zone select failed: " << e.what() << dendl;
+    if (e.code() == sqlite::errc::done) {
+      return -ENOENT;
+    } else if (e.code() == sqlite::errc::busy) {
+      return -EBUSY;
+    }
+    return -EIO;
+  }
+  return 0;
+}
+
+int SQLiteConfigStore::delete_default_zone_id(const DoutPrefixProvider* dpp,
+                                              optional_yield y,
+                                              std::string_view realm_id)
+{
+  Prefix prefix{*dpp, "dbconfig:sqlite:delete_default_zone_id "}; dpp = &prefix;
+
+  try {
+    auto conn = impl->get(dpp);
+    auto& stmt = conn->statements["def_zone_del"];
+    if (!stmt) {
+      const std::string sql = fmt::format(schema::default_zone_delete1, P1);
+      stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+    }
+    auto binding = sqlite::stmt_binding{stmt.get()};
+    sqlite::bind_text(dpp, binding, P1, realm_id);
+
+    auto reset = sqlite::stmt_execution{stmt.get()};
+    sqlite::eval0(dpp, reset);
+
+    if (!::sqlite3_changes(conn->db.get())) {
+      return -ENOENT;
+    }
+  } catch (const sqlite::error& e) {
+    ldpp_dout(dpp, 20) << "default zone delete failed: " << e.what() << dendl;
+    if (e.code() == sqlite::errc::busy) {
+      return -EBUSY;
+    }
+    return -EIO;
+  }
+  return 0;
+}
+
+
+int SQLiteConfigStore::create_zone(const DoutPrefixProvider* dpp,
+                                   optional_yield y, bool exclusive,
+                                   const RGWZoneParams& info,
+                                   std::unique_ptr<sal::ZoneWriter>* writer)
+{
+  Prefix prefix{*dpp, "dbconfig:sqlite:create_zone "}; dpp = &prefix;
+
+  if (info.id.empty()) {
+    ldpp_dout(dpp, 0) << "zone cannot have an empty id" << dendl;
+    return -EINVAL;
+  }
+  if (info.name.empty()) {
+    ldpp_dout(dpp, 0) << "zone cannot have an empty name" << dendl;
+    return -EINVAL;
+  }
+
+  int ver = 1;
+  auto tag = generate_version_tag(dpp->get_cct());
+
+  bufferlist bl;
+  encode(info, bl);
+  const auto data = std::string_view{bl.c_str(), bl.length()};
+
+  try {
+    auto conn = impl->get(dpp);
+    sqlite::stmt_ptr* stmt = nullptr;
+    if (exclusive) {
+      stmt = &conn->statements["zone_ins"];
+      if (!*stmt) {
+        const std::string sql = fmt::format(schema::zone_insert6,
+                                            P1, P2, P3, P4, P5, P6);
+        *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+      }
+    } else {
+      stmt = &conn->statements["zone_ups"];
+      if (!*stmt) {
+        const std::string sql = fmt::format(schema::zone_upsert6,
+                                            P1, P2, P3, P4, P5, P6);
+        *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+      }
+    }
+    auto binding = sqlite::stmt_binding{stmt->get()};
+    sqlite::bind_text(dpp, binding, P1, info.id);
+    sqlite::bind_text(dpp, binding, P2, info.name);
+    sqlite::bind_text(dpp, binding, P3, info.realm_id);
+    sqlite::bind_text(dpp, binding, P4, data);
+    sqlite::bind_int(dpp, binding, P5, ver);
+    sqlite::bind_text(dpp, binding, P6, tag);
+
+    auto reset = sqlite::stmt_execution{stmt->get()};
+    sqlite::eval0(dpp, reset);
+  } catch (const sqlite::error& e) {
+    ldpp_dout(dpp, 20) << "zone insert failed: " << e.what() << dendl;
+    if (e.code() == sqlite::errc::foreign_key_constraint) {
+      return -EINVAL; // refers to nonexistent RealmID
+    } else if (e.code() == sqlite::errc::primary_key_constraint) {
+      return -EEXIST; // ID already taken
+    } else if (e.code() == sqlite::errc::unique_constraint) {
+      return -EEXIST; // Name already taken
+    } else if (e.code() == sqlite::errc::busy) {
+      return -EBUSY;
+    }
+    return -EIO;
+  }
+
+  if (writer) {
+    *writer = std::make_unique<SQLiteZoneWriter>(
+        impl.get(), ver, std::move(tag), info.id, info.name);
+  }
+  return 0;
+}
+
+int SQLiteConfigStore::read_zone_by_id(const DoutPrefixProvider* dpp,
+                                       optional_yield y,
+                                       std::string_view zone_id,
+                                       RGWZoneParams& info,
+                                       std::unique_ptr<sal::ZoneWriter>* writer)
+{
+  Prefix prefix{*dpp, "dbconfig:sqlite:read_zone_by_id "}; dpp = &prefix;
+
+  if (zone_id.empty()) {
+    ldpp_dout(dpp, 0) << "requires a zone id" << dendl;
+    return -EINVAL;
+  }
+
+  ZoneRow row;
+  try {
+    auto conn = impl->get(dpp);
+    auto& stmt = conn->statements["zone_sel_id"];
+    if (!stmt) {
+      const std::string sql = fmt::format(schema::zone_select_id1, P1);
+      stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+    }
+    auto binding = sqlite::stmt_binding{stmt.get()};
+    sqlite::bind_text(dpp, binding, P1, zone_id);
+
+    auto reset = sqlite::stmt_execution{stmt.get()};
+    sqlite::eval1(dpp, reset);
+
+    read_zone_row(reset, row);
+  } catch (const sqlite::error& e) {
+    ldpp_dout(dpp, 20) << "zone select failed: " << e.what() << dendl;
+    if (e.code() == sqlite::errc::done) {
+      return -ENOENT;
+    } else if (e.code() == sqlite::errc::busy) {
+      return -EBUSY;
+    }
+    return -EIO;
+  }
+
+  info = std::move(row.info);
+  if (writer) {
+    *writer = std::make_unique<SQLiteZoneWriter>(
+        impl.get(), row.ver, std::move(row.tag), info.id, info.name);
+  }
+  return 0;
+}
+
+int SQLiteConfigStore::read_zone_by_name(const DoutPrefixProvider* dpp,
+                                         optional_yield y,
+                                         std::string_view zone_name,
+                                         RGWZoneParams& info,
+                                         std::unique_ptr<sal::ZoneWriter>* writer)
+{
+  Prefix prefix{*dpp, "dbconfig:sqlite:read_zone_by_name "}; dpp = &prefix;
+
+  if (zone_name.empty()) {
+    ldpp_dout(dpp, 0) << "requires a zone name" << dendl;
+    return -EINVAL;
+  }
+
+  ZoneRow row;
+  try {
+    auto conn = impl->get(dpp);
+    auto& stmt = conn->statements["zone_sel_name"];
+    if (!stmt) {
+      const std::string sql = fmt::format(schema::zone_select_name1, P1);
+      stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+    }
+    auto binding = sqlite::stmt_binding{stmt.get()};
+    sqlite::bind_text(dpp, binding, P1, zone_name);
+
+    auto reset = sqlite::stmt_execution{stmt.get()};
+    sqlite::eval1(dpp, reset);
+
+    read_zone_row(reset, row);
+  } catch (const sqlite::error& e) {
+    ldpp_dout(dpp, 20) << "zone select failed: " << e.what() << dendl;
+    if (e.code() == sqlite::errc::done) {
+      return -ENOENT;
+    } else if (e.code() == sqlite::errc::busy) {
+      return -EBUSY;
+    }
+    return -EIO;
+  }
+
+  info = std::move(row.info);
+  if (writer) {
+    *writer = std::make_unique<SQLiteZoneWriter>(
+        impl.get(), row.ver, std::move(row.tag), info.id, info.name);
+  }
+  return 0;
+}
+
+int SQLiteConfigStore::read_default_zone(const DoutPrefixProvider* dpp,
+                                         optional_yield y,
+                                         std::string_view realm_id,
+                                         RGWZoneParams& info,
+                                         std::unique_ptr<sal::ZoneWriter>* writer)
+{
+  Prefix prefix{*dpp, "dbconfig:sqlite:read_default_zone "}; dpp = &prefix;
+
+  ZoneRow row;
+  try {
+    auto conn = impl->get(dpp);
+    auto& stmt = conn->statements["zone_sel_def"];
+    if (!stmt) {
+      static constexpr std::string_view sql = schema::zone_select_default0;
+      stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+    }
+    auto reset = sqlite::stmt_execution{stmt.get()};
+    sqlite::eval1(dpp, reset);
+
+    read_zone_row(reset, row);
+  } catch (const sqlite::error& e) {
+    ldpp_dout(dpp, 20) << "zone select failed: " << e.what() << dendl;
+    if (e.code() == sqlite::errc::done) {
+      return -ENOENT;
+    } else if (e.code() == sqlite::errc::busy) {
+      return -EBUSY;
+    }
+    return -EIO;
+  }
+
+  info = std::move(row.info);
+  if (writer) {
+    *writer = std::make_unique<SQLiteZoneWriter>(
+        impl.get(), row.ver, std::move(row.tag), info.id, info.name);
+  }
+  return 0;
+}
+
+int SQLiteConfigStore::list_zone_names(const DoutPrefixProvider* dpp,
+                                       optional_yield y,
+                                       const std::string& marker,
+                                       std::span<std::string> entries,
+                                       sal::ListResult<std::string>& result)
+{
+  Prefix prefix{*dpp, "dbconfig:sqlite:list_zone_names "}; dpp = &prefix;
+
+  try {
+    auto conn = impl->get(dpp);
+    auto& stmt = conn->statements["zone_sel_names"];
+    if (!stmt) {
+      const std::string sql = fmt::format(schema::zone_select_names2, P1, P2);
+      stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+    }
+    auto binding = sqlite::stmt_binding{stmt.get()};
+    sqlite::bind_text(dpp, binding, P1, marker);
+    sqlite::bind_int(dpp, binding, P2, entries.size());
+
+    auto reset = sqlite::stmt_execution{stmt.get()};
+    read_text_rows(dpp, reset, entries, result);
+  } catch (const sqlite::error& e) {
+    ldpp_dout(dpp, 20) << "zone select failed: " << e.what() << dendl;
+    if (e.code() == sqlite::errc::busy) {
+      return -EBUSY;
+    }
+    return -EIO;
+  }
+  return 0;
+}
+
+
+// PeriodConfig
+
+int SQLiteConfigStore::read_period_config(const DoutPrefixProvider* dpp,
+                                          optional_yield y,
+                                          std::string_view realm_id,
+                                          RGWPeriodConfig& info)
+{
+  Prefix prefix{*dpp, "dbconfig:sqlite:read_period_config "}; dpp = &prefix;
+
+  try {
+    auto conn = impl->get(dpp);
+    auto& stmt = conn->statements["period_conf_sel"];
+    if (!stmt) {
+      const std::string sql = fmt::format(schema::period_config_select1, P1);
+      stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+    }
+    auto binding = sqlite::stmt_binding{stmt.get()};
+    sqlite::bind_text(dpp, binding, P1, realm_id);
+
+    auto reset = sqlite::stmt_execution{stmt.get()};
+    sqlite::eval1(dpp, reset);
+
+    std::string data = sqlite::column_text(reset, 0);
+    bufferlist bl = bufferlist::static_from_string(data);
+    auto p = bl.cbegin();
+    decode(info, p);
+
+  } catch (const buffer::error& e) {
+    ldpp_dout(dpp, 20) << "period config decode failed: " << e.what() << dendl;
+    return -EIO;
+  } catch (const sqlite::error& e) {
+    ldpp_dout(dpp, 20) << "period config select failed: " << e.what() << dendl;
+    if (e.code() == sqlite::errc::done) {
+      return -ENOENT;
+    } else if (e.code() == sqlite::errc::busy) {
+      return -EBUSY;
+    }
+    return -EIO;
+  }
+  return 0;
+}
+
+int SQLiteConfigStore::write_period_config(const DoutPrefixProvider* dpp,
+                                           optional_yield y, bool exclusive,
+                                           std::string_view realm_id,
+                                           const RGWPeriodConfig& info)
+{
+  Prefix prefix{*dpp, "dbconfig:sqlite:write_period_config "}; dpp = &prefix;
+
+  bufferlist bl;
+  encode(info, bl);
+  const auto data = std::string_view{bl.c_str(), bl.length()};
+
+  try {
+    auto conn = impl->get(dpp);
+    sqlite::stmt_ptr* stmt = nullptr;
+    if (exclusive) {
+      stmt = &conn->statements["period_conf_ins"];
+      if (!*stmt) {
+        const std::string sql = fmt::format(schema::period_config_insert2, P1, P2);
+        *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+      }
+    } else {
+      stmt = &conn->statements["period_conf_ups"];
+      if (!*stmt) {
+        const std::string sql = fmt::format(schema::period_config_upsert2, P1, P2);
+        *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql);
+      }
+    }
+    auto binding = sqlite::stmt_binding{stmt->get()};
+    sqlite::bind_text(dpp, binding, P1, realm_id);
+    sqlite::bind_text(dpp, binding, P2, data);
+
+    auto reset = sqlite::stmt_execution{stmt->get()};
+    sqlite::eval0(dpp, reset);
+  } catch (const buffer::error& e) {
+    ldpp_dout(dpp, 20) << "period config decode failed: " << e.what() << dendl;
+    return -EIO;
+  } catch (const sqlite::error& e) {
+    ldpp_dout(dpp, 20) << "period config insert failed: " << e.what() << dendl;
+    if (e.code() == sqlite::errc::primary_key_constraint) {
+      return -EEXIST;
+    } else if (e.code() == sqlite::errc::busy) {
+      return -EBUSY;
+    }
+    return -EIO;
+  }
+  return 0;
+}
+
+namespace {
+
+int version_cb(void* user, int count, char** values, char** names)
+{
+  if (count != 1) {
+    return EINVAL;
+  }
+  std::string_view name = names[0];
+  if (name != "user_version") {
+    return EINVAL;
+  }
+  std::string_view value = values[0];
+  auto result = std::from_chars(value.begin(), value.end(),
+                                *reinterpret_cast<uint32_t*>(user));
+  if (result.ec != std::errc{}) {
+    return static_cast<int>(result.ec);
+  }
+  return 0;
+}
+
+void apply_schema_migrations(const DoutPrefixProvider* dpp, sqlite3* db)
+{
+  sqlite::execute(dpp, db, "PRAGMA foreign_keys = ON", nullptr, nullptr);
+
+  // initiate a transaction and read the current schema version
+  uint32_t version = 0;
+  sqlite::execute(dpp, db, "BEGIN; PRAGMA user_version", version_cb, &version);
+
+  const uint32_t initial_version = version;
+  ldpp_dout(dpp, 4) << "current schema version " << version << dendl;
+
+  // use the version as an index into schema::migrations
+  auto m = std::next(schema::migrations.begin(), version);
+
+  for (; m != schema::migrations.end(); ++m, ++version) {
+    try {
+      sqlite::execute(dpp, db, m->up, nullptr, nullptr);
+    } catch (const sqlite::error&) {
+      ldpp_dout(dpp, -1) << "ERROR: schema migration failed on v" << version
+          << ": " << m->description << dendl;
+      throw;
+    }
+  }
+
+  if (version > initial_version) {
+    // update the user_version and commit the transaction
+    const auto commit = fmt::format("PRAGMA user_version = {}; COMMIT", version);
+    sqlite::execute(dpp, db, commit.c_str(), nullptr, nullptr);
+
+    ldpp_dout(dpp, 4) << "upgraded database schema to version " << version << dendl;
+  } else {
+    // nothing to commit
+    sqlite::execute(dpp, db, "ROLLBACK", nullptr, nullptr);
+  }
+}
+
+} // anonymous namespace
+
+
+auto create_sqlite_store(const DoutPrefixProvider* dpp, const std::string& uri)
+  -> std::unique_ptr<config::SQLiteConfigStore>
+{
+  Prefix prefix{*dpp, "dbconfig:sqlite:create_sqlite_store "}; dpp = &prefix;
+
+  // build the connection pool
+  int flags = SQLITE_OPEN_CREATE | SQLITE_OPEN_URI | SQLITE_OPEN_READWRITE |
+      SQLITE_OPEN_NOMUTEX;
+  auto factory = sqlite::ConnectionFactory{uri, flags};
+
+  // sqlite does not support concurrent writers. we enforce this limitation by
+  // using a connection pool of size=1
+  static constexpr size_t max_connections = 1;
+  auto impl = std::make_unique<SQLiteImpl>(std::move(factory), max_connections);
+
+  // open a connection to apply schema migrations
+  auto conn = impl->get(dpp);
+  apply_schema_migrations(dpp, conn->db.get());
+
+  return std::make_unique<SQLiteConfigStore>(std::move(impl));
+}
+
+} // namespace rgw::dbstore::config
diff --git a/src/rgw/store/dbstore/config/sqlite.h b/src/rgw/store/dbstore/config/sqlite.h
new file mode 100644 (file)
index 0000000..d79e040
--- /dev/null
@@ -0,0 +1,172 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2022 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#pragma once
+
+#include "rgw_sal_config.h"
+
+class DoutPrefixProvider;
+
+namespace rgw::dbstore::config {
+
+struct SQLiteImpl;
+
+class SQLiteConfigStore : public sal::ConfigStore {
+ public:
+  explicit SQLiteConfigStore(std::unique_ptr<SQLiteImpl> impl);
+  ~SQLiteConfigStore() override;
+
+  int write_default_realm_id(const DoutPrefixProvider* dpp,
+                             optional_yield y, bool exclusive,
+                             std::string_view realm_id) override;
+  int read_default_realm_id(const DoutPrefixProvider* dpp,
+                            optional_yield y,
+                            std::string& realm_id) override;
+  int delete_default_realm_id(const DoutPrefixProvider* dpp,
+                              optional_yield y) override;
+
+  int create_realm(const DoutPrefixProvider* dpp,
+                   optional_yield y, bool exclusive,
+                   const RGWRealm& info,
+                   std::unique_ptr<sal::RealmWriter>* writer) override;
+  int read_realm_by_id(const DoutPrefixProvider* dpp,
+                       optional_yield y,
+                       std::string_view realm_id,
+                       RGWRealm& info,
+                       std::unique_ptr<sal::RealmWriter>* writer) override;
+  int read_realm_by_name(const DoutPrefixProvider* dpp,
+                         optional_yield y,
+                         std::string_view realm_name,
+                         RGWRealm& info,
+                         std::unique_ptr<sal::RealmWriter>* writer) override;
+  int read_default_realm(const DoutPrefixProvider* dpp,
+                         optional_yield y,
+                         RGWRealm& info,
+                         std::unique_ptr<sal::RealmWriter>* writer) override;
+  int read_realm_id(const DoutPrefixProvider* dpp,
+                    optional_yield y, std::string_view realm_name,
+                    std::string& realm_id) override;
+  int realm_notify_new_period(const DoutPrefixProvider* dpp,
+                              optional_yield y,
+                              const RGWPeriod& period) override;
+  int list_realm_names(const DoutPrefixProvider* dpp,
+                       optional_yield y, const std::string& marker,
+                       std::span<std::string> entries,
+                       sal::ListResult<std::string>& result) override;
+
+  int create_period(const DoutPrefixProvider* dpp,
+                    optional_yield y, bool exclusive,
+                    const RGWPeriod& info) override;
+  int read_period(const DoutPrefixProvider* dpp,
+                  optional_yield y, std::string_view period_id,
+                  std::optional<uint32_t> epoch, RGWPeriod& info) override;
+  int delete_period(const DoutPrefixProvider* dpp,
+                    optional_yield y,
+                    std::string_view period_id) override;
+  int list_period_ids(const DoutPrefixProvider* dpp,
+                      optional_yield y, const std::string& marker,
+                      std::span<std::string> entries,
+                      sal::ListResult<std::string>& result) override;
+
+  int write_default_zonegroup_id(const DoutPrefixProvider* dpp,
+                                 optional_yield y, bool exclusive,
+                                 std::string_view realm_id,
+                                 std::string_view zonegroup_id) override;
+  int read_default_zonegroup_id(const DoutPrefixProvider* dpp,
+                                optional_yield y,
+                                std::string_view realm_id,
+                                std::string& zonegroup_id) override;
+  int delete_default_zonegroup_id(const DoutPrefixProvider* dpp,
+                                  optional_yield y,
+                                  std::string_view realm_id) override;
+
+  int create_zonegroup(const DoutPrefixProvider* dpp,
+                       optional_yield y, bool exclusive,
+                       const RGWZoneGroup& info,
+                       std::unique_ptr<sal::ZoneGroupWriter>* writer) override;
+  int read_zonegroup_by_id(const DoutPrefixProvider* dpp,
+                           optional_yield y,
+                           std::string_view zonegroup_id,
+                           RGWZoneGroup& info,
+                           std::unique_ptr<sal::ZoneGroupWriter>* writer) override;
+  int read_zonegroup_by_name(const DoutPrefixProvider* dpp,
+                             optional_yield y,
+                             std::string_view zonegroup_name,
+                             RGWZoneGroup& info,
+                             std::unique_ptr<sal::ZoneGroupWriter>* writer) override;
+  int read_default_zonegroup(const DoutPrefixProvider* dpp,
+                             optional_yield y,
+                             std::string_view realm_id,
+                             RGWZoneGroup& info,
+                             std::unique_ptr<sal::ZoneGroupWriter>* writer) override;
+  int list_zonegroup_names(const DoutPrefixProvider* dpp,
+                           optional_yield y, const std::string& marker,
+                           std::span<std::string> entries,
+                           sal::ListResult<std::string>& result) override;
+
+  int write_default_zone_id(const DoutPrefixProvider* dpp,
+                            optional_yield y, bool exclusive,
+                            std::string_view realm_id,
+                            std::string_view zone_id) override;
+  int read_default_zone_id(const DoutPrefixProvider* dpp,
+                           optional_yield y,
+                           std::string_view realm_id,
+                           std::string& zone_id) override;
+  int delete_default_zone_id(const DoutPrefixProvider* dpp,
+                             optional_yield y,
+                             std::string_view realm_id) override;
+
+  int create_zone(const DoutPrefixProvider* dpp,
+                  optional_yield y, bool exclusive,
+                  const RGWZoneParams& info,
+                  std::unique_ptr<sal::ZoneWriter>* writer) override;
+  int read_zone_by_id(const DoutPrefixProvider* dpp,
+                      optional_yield y,
+                      std::string_view zone_id,
+                      RGWZoneParams& info,
+                      std::unique_ptr<sal::ZoneWriter>* writer) override;
+  int read_zone_by_name(const DoutPrefixProvider* dpp,
+                        optional_yield y,
+                        std::string_view zone_name,
+                        RGWZoneParams& info,
+                        std::unique_ptr<sal::ZoneWriter>* writer) override;
+  int read_default_zone(const DoutPrefixProvider* dpp,
+                        optional_yield y,
+                        std::string_view realm_id,
+                        RGWZoneParams& info,
+                        std::unique_ptr<sal::ZoneWriter>* writer) override;
+  int list_zone_names(const DoutPrefixProvider* dpp,
+                      optional_yield y, const std::string& marker,
+                      std::span<std::string> entries,
+                      sal::ListResult<std::string>& result) override;
+
+  int read_period_config(const DoutPrefixProvider* dpp,
+                         optional_yield y,
+                         std::string_view realm_id,
+                         RGWPeriodConfig& info) override;
+  int write_period_config(const DoutPrefixProvider* dpp,
+                          optional_yield y, bool exclusive,
+                          std::string_view realm_id,
+                          const RGWPeriodConfig& info) override;
+
+ private:
+  std::unique_ptr<SQLiteImpl> impl;
+}; // SQLiteConfigStore
+
+
+auto create_sqlite_store(const DoutPrefixProvider* dpp, const std::string& uri)
+  -> std::unique_ptr<config::SQLiteConfigStore>;
+
+} // namespace rgw::dbstore::config
diff --git a/src/rgw/store/dbstore/config/sqlite_schema.h b/src/rgw/store/dbstore/config/sqlite_schema.h
new file mode 100644 (file)
index 0000000..c8a8fce
--- /dev/null
@@ -0,0 +1,299 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2022 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#pragma once
+
+#include <initializer_list>
+
+namespace rgw::dbstore::config::schema {
+
+struct Migration {
+  // human-readable description to help with debugging migration errors
+  const char* description = nullptr;
+  // series of sql statements to apply the schema migration
+  const char* up = nullptr;
+  // series of sql statements to undo the schema migration
+  const char* down = nullptr;
+};
+
+static constexpr std::initializer_list<Migration> migrations {{
+    .description = "create the initial ConfigStore tables",
+    .up = R"(
+CREATE TABLE IF NOT EXISTS Realms (
+  ID TEXT PRIMARY KEY NOT NULL,
+  Name TEXT UNIQUE NOT NULL,
+  CurrentPeriod TEXT,
+  Epoch INTEGER DEFAULT 0,
+  VersionNumber INTEGER,
+  VersionTag TEXT
+);
+CREATE TABLE IF NOT EXISTS Periods (
+  ID TEXT NOT NULL,
+  Epoch INTEGER DEFAULT 0,
+  RealmID TEXT NOT NULL REFERENCES Realms (ID),
+  Data TEXT NOT NULL,
+  PRIMARY KEY (ID, Epoch)
+);
+CREATE TABLE IF NOT EXISTS PeriodConfigs (
+  RealmID TEXT PRIMARY KEY NOT NULL REFERENCES Realms (ID),
+  Data TEXT NOT NULL
+);
+CREATE TABLE IF NOT EXISTS ZoneGroups (
+  ID TEXT PRIMARY KEY NOT NULL,
+  Name TEXT UNIQUE NOT NULL,
+  RealmID TEXT NOT NULL REFERENCES Realms (ID),
+  Data TEXT NOT NULL,
+  VersionNumber INTEGER,
+  VersionTag TEXT
+);
+CREATE TABLE IF NOT EXISTS Zones (
+  ID TEXT PRIMARY KEY NOT NULL,
+  Name TEXT UNIQUE NOT NULL,
+  RealmID TEXT NOT NULL REFERENCES Realms (ID),
+  Data TEXT NOT NULL,
+  VersionNumber INTEGER,
+  VersionTag TEXT
+);
+CREATE TABLE IF NOT EXISTS DefaultRealms (
+  ID TEXT,
+  Empty TEXT PRIMARY KEY
+);
+CREATE TABLE IF NOT EXISTS DefaultZoneGroups (
+  ID TEXT,
+  RealmID TEXT PRIMARY KEY REFERENCES Realms (ID)
+);
+CREATE TABLE IF NOT EXISTS DefaultZones (
+  ID TEXT,
+  RealmID TEXT PRIMARY KEY REFERENCES Realms (ID)
+);
+)",
+    .down = R"(
+DROP TABLE IF EXISTS Realms;
+DROP TABLE IF EXISTS Periods;
+DROP TABLE IF EXISTS PeriodConfigs;
+DROP TABLE IF EXISTS ZoneGroups;
+DROP TABLE IF EXISTS Zones;
+DROP TABLE IF EXISTS DefaultRealms;
+DROP TABLE IF EXISTS DefaultZoneGroups;
+DROP TABLE IF EXISTS DefaultZones;
+)"
+  }
+};
+
+
+// DefaultRealms
+
+static constexpr const char* default_realm_insert1 =
+"INSERT INTO DefaultRealms (ID, Empty) VALUES ({}, '')";
+
+static constexpr const char* default_realm_upsert1 =
+R"(INSERT INTO DefaultRealms (ID, Empty) VALUES ({0}, '')
+ON CONFLICT(Empty) DO UPDATE SET ID = {0})";
+
+static constexpr const char* default_realm_select0 =
+"SELECT ID FROM DefaultRealms LIMIT 1";
+
+static constexpr const char* default_realm_delete0 =
+"DELETE FROM DefaultRealms";
+
+
+// Realms
+
+static constexpr const char* realm_update5 =
+"UPDATE Realms SET CurrentPeriod = {1}, Epoch = {2}, VersionNumber = {3} + 1 \
+WHERE ID = {0} AND VersionNumber = {3} AND VersionTag = {4}";
+
+static constexpr const char* realm_rename4 =
+"UPDATE Realms SET Name = {1}, VersionNumber = {2} + 1 \
+WHERE ID = {0} AND VersionNumber = {2} AND VersionTag = {3}";
+
+static constexpr const char* realm_delete3 =
+"DELETE FROM Realms WHERE ID = {} AND VersionNumber = {} AND VersionTag = {}";
+
+static constexpr const char* realm_insert4 =
+"INSERT INTO Realms (ID, Name, VersionNumber, VersionTag) \
+VALUES ({}, {}, {}, {})";
+
+static constexpr const char* realm_upsert4 =
+"INSERT INTO Realms (ID, Name, VersionNumber, VersionTag) \
+VALUES ({0}, {1}, {2}, {3}) \
+ON CONFLICT(ID) DO UPDATE SET Name = {1}, \
+VersionNumber = {2}, VersionTag = {3}";
+
+static constexpr const char* realm_select_id1 =
+"SELECT * FROM Realms WHERE ID = {} LIMIT 1";
+
+static constexpr const char* realm_select_name1 =
+"SELECT * FROM Realms WHERE Name = {} LIMIT 1";
+
+static constexpr const char* realm_select_default0 =
+"SELECT r.* FROM Realms r \
+INNER JOIN DefaultRealms d \
+ON d.ID = r.ID LIMIT 1";
+
+static constexpr const char* realm_select_names2 =
+"SELECT Name FROM Realms WHERE Name > {} \
+ORDER BY Name ASC LIMIT {}";
+
+
+// Periods
+
+static constexpr const char* period_insert4 =
+"INSERT INTO Periods (ID, Epoch, RealmID, Data) \
+VALUES ({}, {}, {}, {})";
+
+static constexpr const char* period_upsert4 =
+"INSERT INTO Periods (ID, Epoch, RealmID, Data) \
+VALUES ({0}, {1}, {2}, {3}) \
+ON CONFLICT DO UPDATE SET RealmID = {2}, Data = {3}";
+
+static constexpr const char* period_select_epoch2 =
+"SELECT * FROM Periods WHERE ID = {} AND Epoch = {} LIMIT 1";
+
+static constexpr const char* period_select_latest1 =
+"SELECT * FROM Periods WHERE ID = {} ORDER BY Epoch DESC LIMIT 1";
+
+static constexpr const char* period_delete1 =
+"DELETE FROM Periods WHERE ID = {}";
+
+static constexpr const char* period_select_ids2 =
+"SELECT ID FROM Periods WHERE ID > {} ORDER BY ID ASC LIMIT {}";
+
+
+// DefaultZoneGroups
+
+static constexpr const char* default_zonegroup_insert2 =
+"INSERT INTO DefaultZoneGroups (RealmID, ID) VALUES ({}, {})";
+
+static constexpr const char* default_zonegroup_upsert2 =
+"INSERT INTO DefaultZoneGroups (RealmID, ID) \
+VALUES ({0}, {1}) \
+ON CONFLICT(RealmID) DO UPDATE SET ID = {1}";
+
+static constexpr const char* default_zonegroup_select1 =
+"SELECT ID FROM DefaultZoneGroups WHERE RealmID = {}";
+
+static constexpr const char* default_zonegroup_delete1 =
+"DELETE FROM DefaultZoneGroups WHERE RealmID = {}";
+
+
+// ZoneGroups
+
+static constexpr const char* zonegroup_update5 =
+"UPDATE ZoneGroups SET RealmID = {1}, Data = {2}, VersionNumber = {3} + 1 \
+WHERE ID = {0} AND VersionNumber = {3} AND VersionTag = {4}";
+
+static constexpr const char* zonegroup_rename4 =
+"UPDATE ZoneGroups SET Name = {1}, VersionNumber = {2} + 1 \
+WHERE ID = {0} AND VersionNumber = {2} AND VersionTag = {3}";
+
+static constexpr const char* zonegroup_delete3 =
+"DELETE FROM ZoneGroups WHERE ID = {} \
+AND VersionNumber = {} AND VersionTag = {}";
+
+static constexpr const char* zonegroup_insert6 =
+"INSERT INTO ZoneGroups (ID, Name, RealmID, Data, VersionNumber, VersionTag) \
+VALUES ({}, {}, {}, {}, {}, {})";
+
+static constexpr const char* zonegroup_upsert6 =
+"INSERT INTO ZoneGroups (ID, Name, RealmID, Data, VersionNumber, VersionTag) \
+VALUES ({0}, {1}, {2}, {3}, {4}, {5}) \
+ON CONFLICT (ID) DO UPDATE SET Name = {1}, RealmID = {2}, \
+Data = {3}, VersionNumber = {4}, VersionTag = {5}";
+
+static constexpr const char* zonegroup_select_id1 =
+"SELECT * FROM ZoneGroups WHERE ID = {} LIMIT 1";
+
+static constexpr const char* zonegroup_select_name1 =
+"SELECT * FROM ZoneGroups WHERE Name = {} LIMIT 1";
+
+static constexpr const char* zonegroup_select_default0 =
+"SELECT z.* FROM ZoneGroups z \
+INNER JOIN DefaultZoneGroups d \
+ON d.ID = z.ID LIMIT 1";
+
+static constexpr const char* zonegroup_select_names2 =
+"SELECT Name FROM ZoneGroups WHERE Name > {} \
+ORDER BY Name ASC LIMIT {}";
+
+
+// DefaultZones
+
+static constexpr const char* default_zone_insert2 =
+"INSERT INTO DefaultZones (RealmID, ID) VALUES ({}, {})";
+
+static constexpr const char* default_zone_upsert2 =
+"INSERT INTO DefaultZones (RealmID, ID) VALUES ({0}, {1}) \
+ON CONFLICT(RealmID) DO UPDATE SET ID = {1}";
+
+static constexpr const char* default_zone_select1 =
+"SELECT ID FROM DefaultZones WHERE RealmID = {}";
+
+static constexpr const char* default_zone_delete1 =
+"DELETE FROM DefaultZones WHERE RealmID = {}";
+
+
+// Zones
+
+static constexpr const char* zone_update5 =
+"UPDATE Zones SET RealmID = {1}, Data = {2}, VersionNumber = {3} + 1 \
+WHERE ID = {0} AND VersionNumber = {3} AND VersionTag = {4}";
+
+static constexpr const char* zone_rename4 =
+"UPDATE Zones SET Name = {1}, VersionNumber = {2} + 1 \
+WHERE ID = {0} AND VersionNumber = {2} AND VersionTag = {3}";
+
+static constexpr const char* zone_delete3 =
+"DELETE FROM Zones WHERE ID = {} AND VersionNumber = {} AND VersionTag = {}";
+
+static constexpr const char* zone_insert6 =
+"INSERT INTO Zones (ID, Name, RealmID, Data, VersionNumber, VersionTag) \
+VALUES ({}, {}, {}, {}, {}, {})";
+
+static constexpr const char* zone_upsert6 =
+"INSERT INTO Zones (ID, Name, RealmID, Data, VersionNumber, VersionTag) \
+VALUES ({0}, {1}, {2}, {3}, {4}, {5}) \
+ON CONFLICT (ID) DO UPDATE SET Name = {1}, RealmID = {2}, \
+Data = {3}, VersionNumber = {4}, VersionTag = {5}";
+
+static constexpr const char* zone_select_id1 =
+"SELECT * FROM Zones WHERE ID = {} LIMIT 1";
+
+static constexpr const char* zone_select_name1 =
+"SELECT * FROM Zones WHERE Name = {} LIMIT 1";
+
+static constexpr const char* zone_select_default0 =
+"SELECT z.* FROM Zones z \
+INNER JOIN DefaultZones d \
+ON d.ID = z.ID LIMIT 1";
+
+static constexpr const char* zone_select_names2 =
+"SELECT Name FROM Zones WHERE Name > {} \
+ORDER BY Name ASC LIMIT {}";
+
+
+// PeriodConfigs
+
+static constexpr const char* period_config_insert2 =
+"INSERT INTO PeriodConfigs (RealmID, Data) VALUES ({}, {})";
+
+static constexpr const char* period_config_upsert2 =
+"INSERT INTO PeriodConfigs (RealmID, Data) VALUES ({0}, {1}) \
+ON CONFLICT (RealmID) DO UPDATE SET Data = {1}";
+
+static constexpr const char* period_config_select1 =
+"SELECT Data FROM PeriodConfigs WHERE RealmID = {} LIMIT 1";
+
+} // namespace rgw::dbstore::config::schema
diff --git a/src/rgw/store/dbstore/config/store.cc b/src/rgw/store/dbstore/config/store.cc
new file mode 100644 (file)
index 0000000..66f7471
--- /dev/null
@@ -0,0 +1,40 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2022 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include <stdexcept>
+
+#undef FMT_HEADER_ONLY
+#define FMT_HEADER_ONLY 1
+#include <fmt/format.h>
+
+#include "store.h"
+#ifdef SQLITE_ENABLED
+#include "sqlite.h"
+#endif
+
+namespace rgw::dbstore {
+
+auto create_config_store(const DoutPrefixProvider* dpp, const std::string& uri)
+  -> std::unique_ptr<sal::ConfigStore>
+{
+#ifdef SQLITE_ENABLED
+  if (uri.starts_with("file:")) {
+    return config::create_sqlite_store(dpp, uri);
+  }
+#endif
+  throw std::runtime_error(fmt::format("unrecognized URI {}", uri));
+}
+
+} // namespace rgw::dbstore
diff --git a/src/rgw/store/dbstore/config/store.h b/src/rgw/store/dbstore/config/store.h
new file mode 100644 (file)
index 0000000..553d9f7
--- /dev/null
@@ -0,0 +1,27 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2022 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#pragma once
+
+#include <memory>
+#include "rgw_sal_config.h"
+
+namespace rgw::dbstore {
+
+// ConfigStore factory
+auto create_config_store(const DoutPrefixProvider* dpp, const std::string& uri)
+  -> std::unique_ptr<sal::ConfigStore>;
+
+} // namespace rgw::dbstore
diff --git a/src/rgw/store/dbstore/sqlite/connection.cc b/src/rgw/store/dbstore/sqlite/connection.cc
new file mode 100644 (file)
index 0000000..143a3a0
--- /dev/null
@@ -0,0 +1,34 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2022 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include "common/dout.h"
+#include "connection.h"
+#include "error.h"
+
+namespace rgw::dbstore::sqlite {
+
+db_ptr open_database(const char* filename, int flags)
+{
+  sqlite3* db = nullptr;
+  const int result = ::sqlite3_open_v2(filename, &db, flags, nullptr);
+  if (result != SQLITE_OK) {
+    throw std::system_error(result, sqlite::error_category());
+  }
+  // request extended result codes
+  (void) ::sqlite3_extended_result_codes(db, 1);
+  return db_ptr{db};
+}
+
+} // namespace rgw::dbstore::sqlite
diff --git a/src/rgw/store/dbstore/sqlite/connection.h b/src/rgw/store/dbstore/sqlite/connection.h
new file mode 100644 (file)
index 0000000..f5cd77d
--- /dev/null
@@ -0,0 +1,66 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2022 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#pragma once
+
+#include <memory>
+#include <sqlite3.h>
+
+#undef FMT_HEADER_ONLY
+#define FMT_HEADER_ONLY 1
+#include <fmt/format.h>
+
+#include "sqlite/statement.h"
+
+class DoutPrefixProvider;
+
+namespace rgw::dbstore::sqlite {
+
+// owning sqlite3 pointer
+struct db_deleter {
+  void operator()(sqlite3* p) const { ::sqlite3_close(p); }
+};
+using db_ptr = std::unique_ptr<sqlite3, db_deleter>;
+
+
+// open the database file or throw on error
+db_ptr open_database(const char* filename, int flags);
+
+
+struct Connection {
+  db_ptr db;
+  // map of statements, prepared on first use
+  std::map<std::string_view, stmt_ptr> statements;
+
+  explicit Connection(db_ptr db) : db(std::move(db)) {}
+};
+
+// sqlite connection factory for ConnectionPool
+class ConnectionFactory {
+  std::string uri;
+  int flags;
+ public:
+  ConnectionFactory(std::string uri, int flags)
+      : uri(std::move(uri)), flags(flags) {}
+
+  auto operator()(const DoutPrefixProvider* dpp)
+    -> std::unique_ptr<Connection>
+  {
+    auto db = open_database(uri.c_str(), flags);
+    return std::make_unique<Connection>(std::move(db));
+  }
+};
+
+} // namespace rgw::dbstore::sqlite
diff --git a/src/rgw/store/dbstore/sqlite/error.cc b/src/rgw/store/dbstore/sqlite/error.cc
new file mode 100644 (file)
index 0000000..5fe9eb0
--- /dev/null
@@ -0,0 +1,37 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2022 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include "error.h"
+
+namespace rgw::dbstore::sqlite {
+
+const std::error_category& error_category()
+{
+  struct category : std::error_category {
+    const char* name() const noexcept override {
+      return "dbstore:sqlite";
+    }
+    std::string message(int ev) const override {
+      return ::sqlite3_errstr(ev);
+    }
+    std::error_condition default_error_condition(int code) const noexcept override {
+      return {code & 0xFF, category()};
+    }
+  };
+  static category instance;
+  return instance;
+}
+
+} // namespace rgw::dbstore::sqlite
diff --git a/src/rgw/store/dbstore/sqlite/error.h b/src/rgw/store/dbstore/sqlite/error.h
new file mode 100644 (file)
index 0000000..15396d8
--- /dev/null
@@ -0,0 +1,81 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2022 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#pragma once
+
+#include <system_error>
+#include <sqlite3.h>
+
+namespace rgw::dbstore::sqlite {
+
+// error category for sqlite extended result codes:
+//   https://www.sqlite.org/rescode.html
+const std::error_category& error_category();
+
+
+// sqlite exception type that carries the extended error code and message
+class error : public std::runtime_error {
+  std::error_code ec;
+ public:
+  error(const char* errmsg, std::error_code ec)
+      : runtime_error(errmsg), ec(ec) {}
+  error(sqlite3* db, std::error_code ec) : error(::sqlite3_errmsg(db), ec) {}
+  error(sqlite3* db, int result) : error(db, {result, error_category()}) {}
+  error(sqlite3* db) : error(db, ::sqlite3_extended_errcode(db)) {}
+  std::error_code code() const { return ec; }
+};
+
+
+// sqlite error conditions for primary and extended result codes
+//
+// 'primary' error_conditions will match 'primary' error_codes as well as any
+// 'extended' error_codes whose lowest 8 bits match that primary code. for
+// example, the error_condition for SQLITE_CONSTRAINT will match the error_codes
+// SQLITE_CONSTRAINT and SQLITE_CONSTRAINT_*
+enum class errc {
+  // primary result codes
+  ok = SQLITE_OK,
+  busy = SQLITE_BUSY,
+  constraint = SQLITE_CONSTRAINT,
+  row = SQLITE_ROW,
+  done = SQLITE_DONE,
+
+  // extended result codes
+  primary_key_constraint = SQLITE_CONSTRAINT_PRIMARYKEY,
+  foreign_key_constraint = SQLITE_CONSTRAINT_FOREIGNKEY,
+  unique_constraint = SQLITE_CONSTRAINT_UNIQUE,
+
+  // ..add conditions as needed
+};
+
+inline std::error_code make_error_code(errc e)
+{
+  return {static_cast<int>(e), error_category()};
+}
+
+inline std::error_condition make_error_condition(errc e)
+{
+  return {static_cast<int>(e), error_category()};
+}
+
+} // namespace rgw::dbstore::sqlite
+
+namespace std {
+
+// enable implicit conversions from sqlite::errc to std::error_condition
+template<> struct is_error_condition_enum<
+    rgw::dbstore::sqlite::errc> : public true_type {};
+
+} // namespace std
diff --git a/src/rgw/store/dbstore/sqlite/statement.cc b/src/rgw/store/dbstore/sqlite/statement.cc
new file mode 100644 (file)
index 0000000..dcf7dba
--- /dev/null
@@ -0,0 +1,196 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2022 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include "common/dout.h"
+#include "error.h"
+#include "statement.h"
+
+#define dout_subsys ceph_subsys_rgw_dbstore
+
+namespace rgw::dbstore::sqlite {
+
+// owning pointer to arbitrary memory allocated and returned by sqlite3
+struct sqlite_deleter {
+  template <typename T>
+  void operator()(T* p) { ::sqlite3_free(p); }
+};
+template <typename T>
+using sqlite_ptr = std::unique_ptr<T, sqlite_deleter>;
+
+
+stmt_ptr prepare_statement(const DoutPrefixProvider* dpp,
+                           sqlite3* db, std::string_view sql)
+{
+  sqlite3_stmt* stmt = nullptr;
+  int result = ::sqlite3_prepare_v2(db, sql.data(), sql.size(), &stmt, nullptr);
+  auto ec = std::error_code{result, sqlite::error_category()};
+  if (ec != sqlite::errc::ok) {
+    const char* errmsg = ::sqlite3_errmsg(db);
+    ldpp_dout(dpp, 1) << "preparation failed: " << errmsg
+        << " (" << ec << ")\nstatement: " << sql << dendl;
+    throw sqlite::error(errmsg, ec);
+  }
+  return stmt_ptr{stmt};
+}
+
+static int bind_index(const DoutPrefixProvider* dpp,
+                      const stmt_binding& stmt, const char* name)
+{
+  const int index = ::sqlite3_bind_parameter_index(stmt.get(), name);
+  if (index <= 0) {
+    ldpp_dout(dpp, 1) << "binding failed on parameter name="
+        << name << dendl;
+    sqlite3* db = ::sqlite3_db_handle(stmt.get());
+    throw sqlite::error(db);
+  }
+  return index;
+}
+
+void bind_text(const DoutPrefixProvider* dpp, const stmt_binding& stmt,
+               const char* name, std::string_view value)
+{
+  const int index = bind_index(dpp, stmt, name);
+
+  int result = ::sqlite3_bind_text(stmt.get(), index, value.data(),
+                                   value.size(), SQLITE_STATIC);
+  auto ec = std::error_code{result, sqlite::error_category()};
+  if (ec != sqlite::errc::ok) {
+    ldpp_dout(dpp, 1) << "binding failed on parameter name="
+        << name << " value=" << value << dendl;
+    sqlite3* db = ::sqlite3_db_handle(stmt.get());
+    throw sqlite::error(db, ec);
+  }
+}
+
+void bind_int(const DoutPrefixProvider* dpp, const stmt_binding& stmt,
+              const char* name, int value)
+{
+  const int index = bind_index(dpp, stmt, name);
+
+  int result = ::sqlite3_bind_int(stmt.get(), index, value);
+  auto ec = std::error_code{result, sqlite::error_category()};
+  if (ec != sqlite::errc::ok) {
+    ldpp_dout(dpp, 1) << "binding failed on parameter name="
+        << name << " value=" << value << dendl;
+    sqlite3* db = ::sqlite3_db_handle(stmt.get());
+    throw sqlite::error(db, ec);
+  }
+}
+
+void eval0(const DoutPrefixProvider* dpp, const stmt_execution& stmt)
+{
+  sqlite_ptr<char> sql;
+  if (dpp->get_cct()->_conf->subsys.should_gather<dout_subsys, 20>()) {
+    sql.reset(::sqlite3_expanded_sql(stmt.get()));
+  }
+
+  const int result = ::sqlite3_step(stmt.get());
+  auto ec = std::error_code{result, sqlite::error_category()};
+  sqlite3* db = ::sqlite3_db_handle(stmt.get());
+
+  if (ec != sqlite::errc::done) {
+    const char* errmsg = ::sqlite3_errmsg(db);
+    ldpp_dout(dpp, 20) << "evaluation failed: " << errmsg
+        << " (" << ec << ")\nstatement: " << sql.get() << dendl;
+    throw sqlite::error(errmsg, ec);
+  }
+  ldpp_dout(dpp, 20) << "evaluation succeeded: " << sql.get() << dendl;
+}
+
+void eval1(const DoutPrefixProvider* dpp, const stmt_execution& stmt)
+{
+  sqlite_ptr<char> sql;
+  if (dpp->get_cct()->_conf->subsys.should_gather<dout_subsys, 20>()) {
+    sql.reset(::sqlite3_expanded_sql(stmt.get()));
+  }
+
+  const int result = ::sqlite3_step(stmt.get());
+  auto ec = std::error_code{result, sqlite::error_category()};
+  if (ec != sqlite::errc::row) {
+    sqlite3* db = ::sqlite3_db_handle(stmt.get());
+    const char* errmsg = ::sqlite3_errmsg(db);
+    ldpp_dout(dpp, 1) << "evaluation failed: " << errmsg << " (" << ec
+        << ")\nstatement: " << sql.get() << dendl;
+    throw sqlite::error(errmsg, ec);
+  }
+  ldpp_dout(dpp, 20) << "evaluation succeeded: " << sql.get() << dendl;
+}
+
+int column_int(const stmt_execution& stmt, int column)
+{
+  return ::sqlite3_column_int(stmt.get(), column);
+}
+
+std::string column_text(const stmt_execution& stmt, int column)
+{
+  const unsigned char* text = ::sqlite3_column_text(stmt.get(), column);
+  // may be NULL
+  if (text) {
+    const std::size_t size = ::sqlite3_column_bytes(stmt.get(), column);
+    return {reinterpret_cast<const char*>(text), size};
+  } else {
+    return {};
+  }
+}
+
+auto read_text_rows(const DoutPrefixProvider* dpp,
+                    const stmt_execution& stmt,
+                    std::span<std::string> entries)
+  -> std::span<std::string>
+{
+  sqlite_ptr<char> sql;
+  if (dpp->get_cct()->_conf->subsys.should_gather<dout_subsys, 20>()) {
+    sql.reset(::sqlite3_expanded_sql(stmt.get()));
+  }
+
+  std::size_t count = 0;
+  while (count < entries.size()) {
+    const int result = ::sqlite3_step(stmt.get());
+    auto ec = std::error_code{result, sqlite::error_category()};
+    if (ec == sqlite::errc::done) {
+      break;
+    }
+    if (ec != sqlite::errc::row) {
+      sqlite3* db = ::sqlite3_db_handle(stmt.get());
+      const char* errmsg = ::sqlite3_errmsg(db);
+      ldpp_dout(dpp, 1) << "evaluation failed: " << errmsg << " (" << ec
+          << ")\nstatement: " << sql.get() << dendl;
+      throw sqlite::error(errmsg, ec);
+    }
+    entries[count] = column_text(stmt, 0);
+    ++count;
+  }
+  ldpp_dout(dpp, 20) << "statement evaluation produced " << count
+      << " results: " << sql.get() << dendl;
+
+  return entries.first(count);
+}
+
+void execute(const DoutPrefixProvider* dpp, sqlite3* db, const char* query,
+             sqlite3_callback callback, void* arg)
+{
+  char* errmsg = nullptr;
+  const int result = ::sqlite3_exec(db, query, callback, arg, &errmsg);
+  auto ec = std::error_code{result, sqlite::error_category()};
+  auto ptr = sqlite_ptr<char>{errmsg}; // free on destruction
+  if (ec != sqlite::errc::ok) {
+    ldpp_dout(dpp, 1) << "query execution failed: " << errmsg << " (" << ec
+        << ")\nquery: " << query << dendl;
+    throw sqlite::error(errmsg, ec);
+  }
+  ldpp_dout(dpp, 20) << "query execution succeeded: " << query << dendl;
+}
+
+} // namespace rgw::dbstore::sqlite
diff --git a/src/rgw/store/dbstore/sqlite/statement.h b/src/rgw/store/dbstore/sqlite/statement.h
new file mode 100644 (file)
index 0000000..98b4acf
--- /dev/null
@@ -0,0 +1,83 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2022 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#pragma once
+
+#include <memory>
+#include <span>
+#include <string>
+
+#include <sqlite3.h>
+
+class DoutPrefixProvider;
+
+namespace rgw::dbstore::sqlite {
+
+// owning sqlite3_stmt pointer
+struct stmt_deleter {
+  void operator()(sqlite3_stmt* p) const { ::sqlite3_finalize(p); }
+};
+using stmt_ptr = std::unique_ptr<sqlite3_stmt, stmt_deleter>;
+
+// non-owning sqlite3_stmt pointer that clears binding state on destruction
+struct stmt_binding_deleter {
+  void operator()(sqlite3_stmt* p) const { ::sqlite3_clear_bindings(p); }
+};
+using stmt_binding = std::unique_ptr<sqlite3_stmt, stmt_binding_deleter>;
+
+// non-owning sqlite3_stmt pointer that clears execution state on destruction
+struct stmt_execution_deleter {
+  void operator()(sqlite3_stmt* p) const { ::sqlite3_reset(p); }
+};
+using stmt_execution = std::unique_ptr<sqlite3_stmt, stmt_execution_deleter>;
+
+
+// prepare the sql statement or throw on error
+stmt_ptr prepare_statement(const DoutPrefixProvider* dpp,
+                           sqlite3* db, std::string_view sql);
+
+// bind an input string for the given parameter name
+void bind_text(const DoutPrefixProvider* dpp, const stmt_binding& stmt,
+               const char* name, std::string_view value);
+
+// bind an input integer for the given parameter name
+void bind_int(const DoutPrefixProvider* dpp, const stmt_binding& stmt,
+              const char* name, int value);
+
+// evaluate a prepared statement, expecting no result rows
+void eval0(const DoutPrefixProvider* dpp, const stmt_execution& stmt);
+
+// evaluate a prepared statement, expecting a single result row
+void eval1(const DoutPrefixProvider* dpp, const stmt_execution& stmt);
+
+// return the given column as an integer
+int column_int(const stmt_execution& stmt, int column);
+
+// return the given column as text, or an empty string on NULL
+std::string column_text(const stmt_execution& stmt, int column);
+
+// read the text column from each result row into the given entries, and return
+// the sub-span of entries that contain results
+auto read_text_rows(const DoutPrefixProvider* dpp,
+                    const stmt_execution& stmt,
+                    std::span<std::string> entries)
+  -> std::span<std::string>;
+
+// execute a raw query without preparing a statement. the optional callback
+// can be used to read results
+void execute(const DoutPrefixProvider* dpp, sqlite3* db, const char* query,
+             sqlite3_callback callback, void* arg);
+
+} // namespace rgw::dbstore::sqlite