]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
crimson/rbd: add librbd_crimson Phase 1 skeleton wip-baum-20260225-01
authorAlexander Indenbaum <aindenba@redhat.com>
Mon, 23 Feb 2026 16:07:38 +0000 (18:07 +0200)
committerAlexander Indenbaum <aindenba@redhat.com>
Wed, 25 Feb 2026 09:06:35 +0000 (11:06 +0200)
Implement rbd_open, rbd_close, rbd_stat, rbd_get_size; cluster, image header
read (cls/rbd); name→id via rbd_id.<name>. Cluster/ioctx C API stubbed for Phase 6.

Signed-off-by: Alexander Indenbaum <aindenba@redhat.com>
crimson/rbd: add Phase 2 async I/O - rbd_aio_read, rbd_aio_write

Add block-to-object mapping (extent_to_object_extents), completion API, and
rbd_aio_read/readv, rbd_aio_write/writev using IoCtx read/write.

Signed-off-by: Alexander Indenbaum <aindenba@redhat.com>
crimson/rbd: add Phase 3 - rbd_aio_discard, rbd_aio_flush, rbd_aio_write_zeroes, rbd_flush

Implement UNMAP, FLUSH, WRITE_ZEROES; map discard/write_zeroes to IoCtx; flush is no-op (no client cache).

Signed-off-by: Alexander Indenbaum <aindenba@redhat.com>
librbd_crimson: add crimson-rbd-demo integration test, fix build

Signed-off-by: Alexander Indenbaum <aindenba@redhat.com>
crimson/rbd: implement cluster C API

Signed-off-by: Alexander Indenbaum <aindenba@redhat.com>
crimson/rbd: add rbd_metadata_get, rbd_metadata_set, rbd_resize

Signed-off-by: Alexander Indenbaum <aindenba@redhat.com>
crimson/rbd: add C API integration test, unit tests, and coverage matrix

Signed-off-by: Alexander Indenbaum <aindenba@redhat.com>
crimson/rbd: add external-thread API for SPDK Phase A1 integration

Add C API for running Seastar reactors inside SPDK reactor threads:
- rbd_crimson_configure_external_threads(): configure before spdk_reactors_start()
- rbd_crimson_register_reactor(): register reactor on each SPDK reactor thread
- rbd_crimson_run_one_tick(): advance reactor once per loop iteration
- rbd_crimson_reactor_cleanup(): per-thread cleanup
- rbd_crimson_cleanup_all(): global cleanup after spdk_reactors_fini

Implement in api/external_thread.cc by wrapping Seastar app_template
and reactor APIs. Lets SPDK link librbd_crimson for Phase A1 without
depending on Seastar directly.

Signed-off-by: Alexander Indenbaum <aindenba@redhat.com>
27 files changed:
ceph.spec.in
src/crimson/CMakeLists.txt
src/crimson/rbd/CMakeLists.txt [new file with mode: 0644]
src/crimson/rbd/api/cluster.cc [new file with mode: 0644]
src/crimson/rbd/api/crimson_rbd_impl.h [new file with mode: 0644]
src/crimson/rbd/api/crimson_rbd_internal.h [new file with mode: 0644]
src/crimson/rbd/api/external_thread.cc [new file with mode: 0644]
src/crimson/rbd/api/io.cc [new file with mode: 0644]
src/crimson/rbd/api/metadata.cc [new file with mode: 0644]
src/crimson/rbd/api/open.cc [new file with mode: 0644]
src/crimson/rbd/cluster.cc [new file with mode: 0644]
src/crimson/rbd/cluster.h [new file with mode: 0644]
src/crimson/rbd/completion.cc [new file with mode: 0644]
src/crimson/rbd/completion.h [new file with mode: 0644]
src/crimson/rbd/image.cc [new file with mode: 0644]
src/crimson/rbd/image.h [new file with mode: 0644]
src/crimson/rbd/image_header.cc [new file with mode: 0644]
src/crimson/rbd/image_header.h [new file with mode: 0644]
src/crimson/rbd/utils.cc [new file with mode: 0644]
src/crimson/rbd/utils.h [new file with mode: 0644]
src/crimson/tools/CMakeLists.txt
src/crimson/tools/rbd_demo.cc [new file with mode: 0644]
src/include/CMakeLists.txt
src/include/rbd/rbd_crimson.h [new file with mode: 0644]
src/seastar
src/test/crimson/CMakeLists.txt
src/test/crimson/test_rbd.cc [new file with mode: 0644]

index 40cf31e054b3f8243f3b093c19280294058838f4..a625cbd8642000863e5da8f76acf7543b87050c0 100644 (file)
@@ -2509,6 +2509,38 @@ fi
 %{_libdir}/librbd_tp.so
 %endif
 
+%if 0%{with crimson}
+%package -n librbd-crimson1
+Summary:       Crimson RBD client library (Seastar-native)
+%if 0%{?suse_version}
+Group:         System/Libraries
+%endif
+%description -n librbd-crimson1
+librbd-crimson is a Seastar-native RBD client library for the Ceph
+distributed storage system. Requires Crimson build (WITH_CRIMSON=ON).
+
+%package -n librbd-crimson-devel
+Summary:       Crimson RBD client headers
+%if 0%{?suse_version}
+Group:         Development/Libraries/C and C++
+%endif
+Requires:      librbd-crimson1 = %{_epoch_prefix}%{version}-%{release}
+Provides:      librbd-crimson1-devel = %{_epoch_prefix}%{version}-%{release}
+%description -n librbd-crimson-devel
+Headers and development files for librbd-crimson
+
+%files -n librbd-crimson1
+%{_libdir}/librbd_crimson.so.*
+
+%post -n librbd-crimson1 -p /sbin/ldconfig
+
+%postun -n librbd-crimson1 -p /sbin/ldconfig
+
+%files -n librbd-crimson-devel
+%{_includedir}/rbd/rbd_crimson.h
+%{_libdir}/librbd_crimson.so
+%endif
+
 %files -n librgw2
 %{_libdir}/librgw.so.*
 %if %{with lttng}
index d1236a41c95e36982c70f70376aa40cf76392747..4f1c48838810bcaf05acbd63c118c041fa9a7f32 100644 (file)
@@ -207,4 +207,5 @@ target_link_libraries(crimson
 add_subdirectory(admin)
 add_subdirectory(os)
 add_subdirectory(osd)
+add_subdirectory(rbd)
 add_subdirectory(tools)
diff --git a/src/crimson/rbd/CMakeLists.txt b/src/crimson/rbd/CMakeLists.txt
new file mode 100644 (file)
index 0000000..aba50a6
--- /dev/null
@@ -0,0 +1,47 @@
+# librbd_crimson - Seastar-native RBD client
+
+set(crimson_rbd_srcs
+  utils.cc
+  image_header.cc
+  cluster.cc
+  image.cc
+  completion.cc
+  api/open.cc
+  api/io.cc
+  api/cluster.cc
+  api/metadata.cc
+  api/external_thread.cc
+)
+
+# Build shared lib when ENABLE_SHARED (for RPM packaging); else static
+add_library(librbd_crimson ${CEPH_SHARED} ${crimson_rbd_srcs})
+
+target_include_directories(librbd_crimson
+  PUBLIC
+    ${CMAKE_SOURCE_DIR}/src/include
+    ${CMAKE_SOURCE_DIR}/src
+  PRIVATE
+    ${CMAKE_SOURCE_DIR}/src/cls/rbd
+)
+
+target_link_libraries(librbd_crimson
+  PUBLIC
+    crimson
+    crimson::cflags
+  PRIVATE
+    cls_rbd_client
+)
+
+if(ENABLE_SHARED)
+  set_target_properties(librbd_crimson PROPERTIES
+    OUTPUT_NAME rbd_crimson
+    VERSION 1.0.0
+    SOVERSION 1
+    CXX_VISIBILITY_PRESET hidden
+    VISIBILITY_INLINES_HIDDEN ON)
+endif()
+
+install(TARGETS librbd_crimson
+  ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+  LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+  RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
diff --git a/src/crimson/rbd/api/cluster.cc b/src/crimson/rbd/api/cluster.cc
new file mode 100644 (file)
index 0000000..1cc2a78
--- /dev/null
@@ -0,0 +1,309 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+//
+// C API: rbd_crimson_cluster_connect, rbd_crimson_ioctx_create,
+// rbd_crimson_cluster_wait_for_osdmap, rbd_crimson_cluster_get. Phase 6.
+//
+// Must be called from within a Seastar context (e.g. app.run() lambda).
+// Config options: key-value pairs, e.g. config_file, keyring, name, cluster.
+// assume_bootstrapped=1: skip conf/perf start (app already did C++ bootstrap).
+//
+
+#include "crimson/rbd/api/crimson_rbd_impl.h"
+#include "crimson/rbd/api/crimson_rbd_internal.h"
+
+#include <cerrno>
+#include <cstring>
+#include <map>
+#include <mutex>
+#include <string>
+#include <vector>
+
+#include "auth/KeyRing.h"
+#include "common/ceph_argparse.h"
+#include "crimson/client/io_context.h"
+#include "crimson/client/rados_client.h"
+#include "crimson/common/auth_handler.h"
+#include "crimson/common/config_proxy.h"
+#include "crimson/common/log.h"
+#include "crimson/common/perf_counters_collection.h"
+#include "crimson/mon/MonClient.h"
+#include "crimson/net/Messenger.h"
+#include "crimson/osd/main_config_bootstrap_helpers.h"
+#include "crimson/osdc/objecter.h"
+#include <sstream>
+
+#include "msg/msg_types.h"
+#include "rbd/rbd_crimson.h"
+
+namespace {
+
+seastar::logger& logger() {
+  return crimson::get_logger(ceph_subsys_client);
+}
+
+}  // namespace
+
+/// Phase 6.2: shared cluster map (cluster_name -> cluster)
+static std::mutex g_cluster_map_mutex;
+static std::map<std::string, crimson::rbd::crimson_rbd_cluster*> g_cluster_map;
+
+namespace {
+
+/// Parse config_options (key,val,key,val,...,NULL) into map.
+/// Supports: config_file/conf_file, keyring/keyfile, name, cluster.
+std::map<std::string, std::string>
+parse_config_options(const char* const* config_options, size_t num_config_options)
+{
+  std::map<std::string, std::string> opts;
+  if (!config_options) return opts;
+  size_t i = 0;
+  if (num_config_options > 0) {
+    while (i + 1 < num_config_options) {
+      const char* k = config_options[i];
+      const char* v = config_options[i + 1];
+      if (!k || !v) break;
+      opts[std::string(k)] = std::string(v);
+      i += 2;
+    }
+  } else {
+    while (config_options[i] && config_options[i + 1]) {
+      opts[std::string(config_options[i])] = std::string(config_options[i + 1]);
+      i += 2;
+    }
+  }
+  return opts;
+}
+
+/// Build argv from options for ceph_argparse_early_args and parse_argv.
+std::vector<std::string> build_argv(const std::map<std::string, std::string>& opts)
+{
+  std::vector<std::string> args = {"crimson_rbd"};
+  auto add = [&](const std::string& key, const char* arg) {
+    auto it = opts.find(key);
+    if (it != opts.end() && !it->second.empty()) {
+      args.push_back(arg);
+      args.push_back(it->second);
+    }
+  };
+  add("config_file", "-c");
+  add("conf_file", "-c");
+  add("keyring", "-k");
+  add("keyfile", "-k");
+  add("name", "-n");
+  add("cluster", "--cluster");
+  add("assume_bootstrapped", "");  // no argv; handled separately
+  return args;
+}
+
+}  // namespace
+
+extern "C" {
+
+int rbd_crimson_cluster_connect(rbd_crimson_cluster_t cluster,
+                                const char* const* config_options,
+                                size_t num_config_options)
+{
+  if (!cluster) return -EINVAL;
+  auto* c = static_cast<crimson::rbd::crimson_rbd_cluster*>(cluster);
+  if (c->connected) return 0;  // idempotent
+
+  auto opts = parse_config_options(config_options, num_config_options);
+  const bool assume_bootstrapped =
+    opts.count("assume_bootstrapped") && opts["assume_bootstrapped"] == "1";
+  auto args_str = build_argv(opts);
+  if (!assume_bootstrapped && args_str.size() < 2) {
+    logger().error("rbd_crimson_cluster_connect: need config (e.g. config_file, name)");
+    return -EINVAL;
+  }
+  if (assume_bootstrapped && args_str.size() < 2) {
+    args_str.push_back("-n");
+    args_str.push_back("client.admin");
+  }
+
+  std::vector<const char*> args;
+  for (const auto& s : args_str) {
+    args.push_back(s.c_str());
+  }
+
+  std::string cluster_name = "ceph";
+  std::string conf_file_list;
+  auto init_params = ceph_argparse_early_args(
+    args, CEPH_ENTITY_TYPE_CLIENT, &cluster_name, &conf_file_list);
+
+  try {
+    if (!assume_bootstrapped) {
+      c->owns_conf_perf = true;
+      crimson::common::sharded_conf().start(
+        init_params.name, cluster_name).get();
+      crimson::common::local_conf().start().get();
+      crimson::common::sharded_perf_coll().start().get();
+
+      crimson::common::local_conf().parse_config_files(conf_file_list).get();
+      crimson::common::local_conf().parse_env().get();
+      crimson::common::local_conf().parse_argv(args_str).get();
+
+      crimson::osd::populate_config_from_mon().get();
+    }
+
+    class CephAuthHandler : public crimson::common::AuthHandler {
+    public:
+      void handle_authentication(const EntityName&, const AuthCapsInfo&) override {}
+    };
+    c->auth_handler = std::make_unique<CephAuthHandler>();
+    c->msgr = crimson::net::Messenger::create(
+      entity_name_t(init_params.name.get_type(), -1),
+      "rbd_crimson",
+      crimson::osd::get_nonce(),
+      true);
+    c->monc = std::make_unique<crimson::mon::Client>(*c->msgr, *c->auth_handler);
+    c->msgr->set_auth_client(c->monc.get());
+    c->msgr->set_auth_server(c->monc.get());
+
+    c->rados = std::make_unique<crimson::client::RadosClient>(*c->msgr, *c->monc);
+    crimson::net::dispatchers_t dispatchers;
+    dispatchers.push_back(c->monc.get());
+    dispatchers.push_back(&c->rados->get_objecter());
+    c->msgr->start(dispatchers).get();
+
+    c->monc->start().get();
+    c->rados->get_objecter().set_client_incarnation(
+      static_cast<int>(crimson::osd::get_nonce() & 0x7fffffff));
+    c->rados->connect().get();
+    c->connected = true;
+    c->cluster_name = cluster_name;
+    if (!cluster_name.empty()) {
+      std::lock_guard lock(g_cluster_map_mutex);
+      g_cluster_map[cluster_name] = c;
+    }
+    return 0;
+  } catch (const std::system_error& e) {
+    logger().error("rbd_crimson_cluster_connect failed: {}", e.what());
+    return e.code().value() > 0 ? -e.code().value() : e.code().value();
+  } catch (...) {
+    logger().error("rbd_crimson_cluster_connect failed");
+    return -EIO;
+  }
+}
+
+void rbd_crimson_cluster_shutdown(rbd_crimson_cluster_t cluster)
+{
+  if (!cluster) return;
+  auto* c = static_cast<crimson::rbd::crimson_rbd_cluster*>(cluster);
+  if (!c->connected) return;
+  try {
+    if (c->rados) c->rados->shutdown().get();
+    if (c->monc) c->monc->stop().get();
+    if (c->msgr) {
+      c->msgr->stop();
+      c->msgr->shutdown().get();
+    }
+  } catch (...) {}
+  c->rados.reset();
+  c->monc.reset();
+  c->msgr = nullptr;
+  c->auth_handler.reset();
+  if (!c->cluster_name.empty()) {
+    std::lock_guard lock(g_cluster_map_mutex);
+    g_cluster_map.erase(c->cluster_name);
+  }
+  c->cluster_name.clear();
+  c->connected = false;
+  if (c->owns_conf_perf) {
+    c->owns_conf_perf = false;
+    try {
+      crimson::common::sharded_perf_coll().stop().get();
+      crimson::common::local_conf().stop().get();
+      crimson::common::sharded_conf().stop().get();
+    } catch (...) {}
+  }
+}
+
+int rbd_crimson_cluster_get(const char* cluster_name,
+                            rbd_crimson_cluster_t* cluster)
+{
+  if (!cluster_name || !cluster_name[0] || !cluster) return -EINVAL;
+  std::lock_guard lock(g_cluster_map_mutex);
+  auto it = g_cluster_map.find(cluster_name);
+  if (it == g_cluster_map.end()) return -ENOENT;
+  *cluster = it->second;
+  return 0;
+}
+
+int rbd_crimson_cluster_getaddrs(rbd_crimson_cluster_t cluster, char** addrs)
+{
+  if (!cluster || !addrs) return -EINVAL;
+  auto* c = static_cast<crimson::rbd::crimson_rbd_cluster*>(cluster);
+  if (!c->connected || !c->msgr) return -ENOTCONN;
+  std::ostringstream os;
+  os << c->msgr->get_myaddrs();
+  std::string s = os.str();
+  *addrs = strdup(s.c_str());
+  return 0;
+}
+
+int rbd_crimson_ioctx_create(rbd_crimson_cluster_t cluster,
+                             const char* pool_name,
+                             rbd_crimson_ioctx_t* ioctx)
+{
+  if (!cluster || !pool_name || !ioctx) return -EINVAL;
+  auto* c = static_cast<crimson::rbd::crimson_rbd_cluster*>(cluster);
+  if (!c->connected || !c->rados) return -ENOTCONN;
+
+  try {
+    auto ioc = c->rados->create_ioctx(pool_name).get();
+    *ioctx = new crimson::rbd::crimson_rbd_ioctx(std::move(ioc));
+    return 0;
+  } catch (const std::system_error& e) {
+    return e.code().value() > 0 ? -e.code().value() : e.code().value();
+  } catch (...) {
+    return -EIO;
+  }
+}
+
+int rbd_crimson_cluster_wait_for_osdmap(rbd_crimson_cluster_t cluster)
+{
+  if (!cluster) return -EINVAL;
+  auto* c = static_cast<crimson::rbd::crimson_rbd_cluster*>(cluster);
+  if (!c->connected || !c->rados) return -ENOTCONN;
+
+  try {
+    c->rados->get_objecter().wait_for_osdmap().get();
+    return 0;
+  } catch (const std::system_error& e) {
+    return e.code().value() > 0 ? -e.code().value() : e.code().value();
+  } catch (...) {
+    return -EIO;
+  }
+}
+
+}  // extern "C"
+
+namespace crimson::rbd {
+
+crimson_rbd_cluster::~crimson_rbd_cluster() = default;
+
+int create_cluster_impl(rbd_crimson_cluster_t* cluster)
+{
+  if (!cluster) return -EINVAL;
+  try {
+    *cluster = new crimson_rbd_cluster();
+    return 0;
+  } catch (...) {
+    return -ENOMEM;
+  }
+}
+
+void destroy_cluster_impl(rbd_crimson_cluster_t cluster)
+{
+  delete static_cast<crimson_rbd_cluster*>(cluster);
+}
+
+void unregister_cluster_from_map(rbd_crimson_cluster_t cluster)
+{
+  auto* c = static_cast<crimson_rbd_cluster*>(cluster);
+  if (!c || c->cluster_name.empty()) return;
+  std::lock_guard lock(g_cluster_map_mutex);
+  g_cluster_map.erase(c->cluster_name);
+}
+
+}  // namespace crimson::rbd
diff --git a/src/crimson/rbd/api/crimson_rbd_impl.h b/src/crimson/rbd/api/crimson_rbd_impl.h
new file mode 100644 (file)
index 0000000..85fec3d
--- /dev/null
@@ -0,0 +1,62 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+//
+// Internal structs for C API implementation. Shared by open.cc, io.cc, cluster.cc.
+//
+
+#ifndef CEPH_CRIMSON_RBD_API_CRIMSON_RBD_IMPL_H
+#define CEPH_CRIMSON_RBD_API_CRIMSON_RBD_IMPL_H
+
+#include <memory>
+#include <string>
+
+#include "crimson/client/io_context.h"
+#include "crimson/net/Fwd.h"
+#include "crimson/rbd/image.h"
+
+namespace crimson {
+
+namespace common {
+class AuthHandler;
+}
+namespace mon {
+class Client;
+}
+namespace net {
+class Messenger;
+}
+
+namespace client {
+class RadosClient;
+}
+
+}  // namespace crimson
+
+namespace crimson::rbd {
+
+struct crimson_rbd_ioctx {
+  crimson::client::IoCtx ioctx;
+  explicit crimson_rbd_ioctx(crimson::client::IoCtx ioc) : ioctx(std::move(ioc)) {}
+};
+
+struct crimson_rbd_image {
+  std::unique_ptr<Image> image;
+  explicit crimson_rbd_image(std::unique_ptr<Image> img) : image(std::move(img)) {}
+};
+
+struct crimson_rbd_cluster {
+  crimson_rbd_cluster() = default;
+  std::string cluster_name;  // Phase 6.2: for shared-cluster map
+  /// Destructor defined in cluster.cc (needs complete types).
+  ~crimson_rbd_cluster();
+
+  bool connected = false;
+  bool owns_conf_perf = false;  // true if we started sharded_conf/sharded_perf_coll
+  std::unique_ptr<crimson::common::AuthHandler> auth_handler;
+  crimson::net::MessengerRef msgr;
+  std::unique_ptr<crimson::mon::Client> monc;
+  std::unique_ptr<crimson::client::RadosClient> rados;
+};
+
+}  // namespace crimson::rbd
+
+#endif
diff --git a/src/crimson/rbd/api/crimson_rbd_internal.h b/src/crimson/rbd/api/crimson_rbd_internal.h
new file mode 100644 (file)
index 0000000..210255f
--- /dev/null
@@ -0,0 +1,39 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+//
+// Internal C++ API for integration tests.
+// Creates rbd_crimson_ioctx_t from IoCtx when full cluster C API is not yet available (Phase 6).
+//
+
+#ifndef CEPH_CRIMSON_RBD_API_CRIMSON_RBD_INTERNAL_H
+#define CEPH_CRIMSON_RBD_API_CRIMSON_RBD_INTERNAL_H
+
+#include "rbd/rbd_crimson.h"
+
+namespace crimson::client {
+class IoCtx;
+}
+
+namespace crimson::rbd {
+
+/// Create rbd_crimson_ioctx_t from existing IoCtx. For integration tests when
+/// rbd_crimson_cluster_connect / rbd_crimson_ioctx_create are not yet implemented.
+/// Caller must call rbd_crimson_ioctx_destroy when done.
+rbd_crimson_ioctx_t ioctx_create_from_iocontext(crimson::client::IoCtx ioctx);
+
+/// Open RBD image by id (skips name->id lookup). For integration tests when
+/// get_image_id fails due to auth/layout (crimson mon auth vs vstart).
+int rbd_open_by_id(rbd_crimson_ioctx_t ioctx, const char* image_id,
+                   rbd_image_t* image);
+
+/// Unregister cluster from shared map (called from cluster_destroy).
+void unregister_cluster_from_map(rbd_crimson_cluster_t cluster);
+
+/// Create cluster (must be in cluster.cc for complete types).
+int create_cluster_impl(rbd_crimson_cluster_t* cluster);
+
+/// Destroy cluster (must be in cluster.cc for complete types).
+void destroy_cluster_impl(rbd_crimson_cluster_t cluster);
+
+} // namespace crimson::rbd
+
+#endif
diff --git a/src/crimson/rbd/api/external_thread.cc b/src/crimson/rbd/api/external_thread.cc
new file mode 100644 (file)
index 0000000..19ae9f3
--- /dev/null
@@ -0,0 +1,73 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 sts=2 expandtab
+//
+// SPDX-License-Identifier: BSD-3-Clause
+// Copyright (C) 2025 Red Hat
+//
+// Phase A1: External-thread integration for SPDK reactor.
+// Bridges SPDK reactor loop to Seastar run_one_tick / register APIs.
+// Part of librbd_crimson; SPDK links librbd_crimson, no direct Seastar dep.
+
+#include "include/rbd/rbd_crimson.h"
+
+#include <memory>
+
+#include <seastar/core/app-template.hh>
+#include <seastar/core/reactor.hh>
+#include <seastar/core/smp.hh>
+#include <seastar/util/log.hh>
+
+namespace {
+
+std::unique_ptr<seastar::app_template> g_app;
+
+} // anonymous namespace
+
+extern "C" {
+
+int rbd_crimson_configure_external_threads(unsigned core_count)
+{
+  try {
+    seastar::app_template::seastar_options opts;
+    opts.smp_opts.thread_affinity.set_value(false);
+    opts.smp_opts.mbind.set_value(false);
+    opts.smp_opts.smp.set_value(core_count);
+    opts.smp_opts.lock_memory.set_value(false);
+    opts.log_opts.default_log_level.set_value(seastar::log_level::error);
+    opts.reactor_opts.no_handle_interrupt.set_value(true);
+
+    g_app = std::make_unique<seastar::app_template>(std::move(opts));
+    if (g_app->configure_external_thread_mode(core_count) != 0) {
+      g_app.reset();
+      return -1;
+    }
+    return 0;
+  } catch (...) {
+    return -1;
+  }
+}
+
+void rbd_crimson_register_reactor(unsigned shard_id)
+{
+  g_app->register_reactor_on_this_thread(shard_id);
+}
+
+int rbd_crimson_run_one_tick(void)
+{
+  return seastar::engine().run_one_tick() ? 1 : 0;
+}
+
+void rbd_crimson_reactor_cleanup(void)
+{
+  seastar::smp::cleanup_cpu();
+}
+
+void rbd_crimson_cleanup_all(void)
+{
+  if (g_app) {
+    g_app->cleanup_external_thread_mode();
+    g_app.reset();
+  }
+}
+
+} // extern "C"
diff --git a/src/crimson/rbd/api/io.cc b/src/crimson/rbd/api/io.cc
new file mode 100644 (file)
index 0000000..6154b48
--- /dev/null
@@ -0,0 +1,560 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+//
+// C API: rbd_aio_read, rbd_aio_write, rbd_aio_readv, rbd_aio_writev,
+// rbd_aio_create_completion, rbd_aio_release, rbd_aio_get_arg, rbd_aio_get_return_value.
+//
+
+#include "crimson/rbd/api/crimson_rbd_impl.h"
+
+#include <cerrno>
+#include <cstring>
+#include <sys/uio.h>
+
+#include <memory>
+#include <seastar/core/future.hh>
+#include <seastar/core/sleep.hh>
+#include <seastar/core/when_all.hh>
+
+#include "include/buffer.h"
+#include "rbd/rbd_crimson.h"
+#include "crimson/rbd/completion.h"
+#include "crimson/rbd/utils.h"
+
+namespace crimson::rbd {
+namespace {
+
+void schedule_aio_read(crimson_rbd_image* img, uint64_t off, size_t len,
+                       char* buf, crimson_rbd_completion* c);
+void schedule_aio_readv(crimson_rbd_image* img, uint64_t off, size_t len,
+                        const struct iovec* iov, int iovcnt, crimson_rbd_completion* c);
+void schedule_aio_write(crimson_rbd_image* img, uint64_t off, size_t len,
+                       const char* buf, crimson_rbd_completion* c);
+void schedule_aio_write_with_buf(crimson_rbd_image* img, uint64_t off, size_t len,
+                                 std::shared_ptr<std::vector<char>> buf,
+                                 crimson_rbd_completion* c);
+void schedule_aio_discard(crimson_rbd_image* img, uint64_t off, uint64_t len,
+                          crimson_rbd_completion* c);
+void schedule_aio_flush(crimson_rbd_image* img, crimson_rbd_completion* c);
+void schedule_aio_write_zeroes(crimson_rbd_image* img, uint64_t off, size_t len,
+                               crimson_rbd_completion* c);
+void schedule_aio_compare_and_writev(crimson_rbd_image* img, uint64_t off,
+    const struct iovec* cmp_iov, int cmp_iovcnt,
+    const struct iovec* iov, int iovcnt,
+    crimson_rbd_completion* c, uint64_t* mismatch_off);
+
+} // namespace
+} // namespace crimson::rbd
+
+extern "C" {
+
+using namespace crimson::rbd;
+
+// --- completion API ---
+
+int rbd_aio_create_completion(void* cb_arg, rbd_callback_t complete_cb,
+                              rbd_completion_t* c)
+{
+  if (!c) return -EINVAL;
+  try {
+    auto* comp = new crimson_rbd_completion();
+    comp->callback_arg = cb_arg;
+    comp->complete_cb = complete_cb;
+    *c = comp;
+    return 0;
+  } catch (...) {
+    return -ENOMEM;
+  }
+}
+
+int rbd_aio_wait_for_complete(rbd_completion_t c)
+{
+  auto* comp = static_cast<crimson_rbd_completion*>(c);
+  if (!comp) return -EINVAL;
+  while (!comp->complete.load(std::memory_order_acquire)) {
+    seastar::sleep(std::chrono::microseconds(10)).get();
+  }
+  return 0;
+}
+
+int rbd_aio_is_complete(rbd_completion_t c)
+{
+  auto* comp = static_cast<crimson_rbd_completion*>(c);
+  return comp && comp->complete.load(std::memory_order_acquire) ? 1 : 0;
+}
+
+ssize_t rbd_aio_get_return_value(rbd_completion_t c)
+{
+  auto* comp = static_cast<crimson_rbd_completion*>(c);
+  return comp ? comp->rval.load(std::memory_order_acquire) : -EINVAL;
+}
+
+void* rbd_aio_get_arg(rbd_completion_t c)
+{
+  auto* comp = static_cast<crimson_rbd_completion*>(c);
+  return comp ? comp->callback_arg : nullptr;
+}
+
+void rbd_aio_release(rbd_completion_t c)
+{
+  delete static_cast<crimson_rbd_completion*>(c);
+}
+
+// --- aio read / write ---
+
+int rbd_aio_read(rbd_image_t image, uint64_t off, size_t len, char* buf,
+                 rbd_completion_t c)
+{
+  if (!image || !buf || !c) return -EINVAL;
+  auto* img = static_cast<crimson_rbd_image*>(image);
+  auto* comp = static_cast<crimson_rbd_completion*>(c);
+  if (!img->image || !comp) return -EINVAL;
+
+  schedule_aio_read(img, off, len, buf, comp);
+  return 0;
+}
+
+int rbd_aio_write(rbd_image_t image, uint64_t off, size_t len, const char* buf,
+                  rbd_completion_t c)
+{
+  if (!image || !buf || !c) return -EINVAL;
+  auto* img = static_cast<crimson_rbd_image*>(image);
+  auto* comp = static_cast<crimson_rbd_completion*>(c);
+  if (!img->image || !comp) return -EINVAL;
+
+  schedule_aio_write(img, off, len, buf, comp);
+  return 0;
+}
+
+int rbd_aio_readv(rbd_image_t image, const struct iovec* iov, int iovcnt,
+                  uint64_t off, rbd_completion_t c)
+{
+  if (!image || !iov || iovcnt <= 0 || !c) return -EINVAL;
+  size_t total = 0;
+  for (int i = 0; i < iovcnt; ++i) {
+    total += iov[i].iov_len;
+  }
+  if (total == 0) return -EINVAL;
+
+  schedule_aio_readv(static_cast<crimson_rbd_image*>(image), off, total,
+                    iov, iovcnt, static_cast<crimson_rbd_completion*>(c));
+  return 0;
+}
+
+int rbd_aio_writev(rbd_image_t image, const struct iovec* iov, int iovcnt,
+                   uint64_t off, rbd_completion_t c)
+{
+  if (!image || !iov || iovcnt <= 0 || !c) return -EINVAL;
+  size_t total = 0;
+  for (int i = 0; i < iovcnt; ++i) {
+    total += iov[i].iov_len;
+  }
+  if (total == 0) return -EINVAL;
+
+  auto buf = std::make_shared<std::vector<char>>(total);
+  size_t pos = 0;
+  for (int i = 0; i < iovcnt; ++i) {
+    memcpy(buf->data() + pos, iov[i].iov_base, iov[i].iov_len);
+    pos += iov[i].iov_len;
+  }
+  schedule_aio_write_with_buf(static_cast<crimson_rbd_image*>(image), off, total,
+                              buf, static_cast<crimson_rbd_completion*>(c));
+  return 0;
+}
+
+// --- aio discard / flush / write_zeroes (Phase 3) ---
+
+int rbd_aio_discard(rbd_image_t image, uint64_t off, uint64_t len,
+                    rbd_completion_t c)
+{
+  if (!image || !c) return -EINVAL;
+  auto* img = static_cast<crimson_rbd_image*>(image);
+  auto* comp = static_cast<crimson_rbd_completion*>(c);
+  if (!img->image || !comp) return -EINVAL;
+
+  schedule_aio_discard(img, off, len, comp);
+  return 0;
+}
+
+int rbd_aio_flush(rbd_image_t image, rbd_completion_t c)
+{
+  if (!image || !c) return -EINVAL;
+  auto* img = static_cast<crimson_rbd_image*>(image);
+  auto* comp = static_cast<crimson_rbd_completion*>(c);
+  if (!img->image || !comp) return -EINVAL;
+
+  schedule_aio_flush(img, comp);
+  return 0;
+}
+
+int rbd_aio_write_zeroes(rbd_image_t image, uint64_t off, size_t len,
+                         rbd_completion_t c)
+{
+  if (!image || !c) return -EINVAL;
+  auto* img = static_cast<crimson_rbd_image*>(image);
+  auto* comp = static_cast<crimson_rbd_completion*>(c);
+  if (!img->image || !comp) return -EINVAL;
+
+  schedule_aio_write_zeroes(img, off, len, comp);
+  return 0;
+}
+
+#if defined(__linux__)
+int rbd_aio_compare_and_writev(rbd_image_t image, uint64_t off,
+                               const struct iovec* cmp_iov, int cmp_iovcnt,
+                               const struct iovec* iov, int iovcnt,
+                               rbd_completion_t c, uint64_t* mismatch_off)
+{
+  if (!image || !c) return -EINVAL;
+  auto* img = static_cast<crimson_rbd_image*>(image);
+  auto* comp = static_cast<crimson_rbd_completion*>(c);
+  if (!img->image || !comp) return -EINVAL;
+  if (!cmp_iov || cmp_iovcnt < 1 || !iov || iovcnt < 1) return -EINVAL;
+
+  schedule_aio_compare_and_writev(img, off, cmp_iov, cmp_iovcnt,
+                                  iov, iovcnt, comp, mismatch_off);
+  return 0;
+}
+#endif
+
+int rbd_flush(rbd_image_t image)
+{
+  if (!image) return -EINVAL;
+  (void)image;
+  return 0;  // No client-side cache; writes go direct to OSD
+}
+
+} // extern "C"
+
+namespace crimson::rbd {
+namespace {
+
+void schedule_aio_read(crimson_rbd_image* img, uint64_t off, size_t len,
+                       char* buf, crimson_rbd_completion* c)
+{
+  c->complete.store(false);  // allow reuse for rbd_aio_wait_for_complete
+  if (len == 0) {
+    c->set_complete(0);
+    return;
+  }
+  if (off + len > img->image->get_size()) {
+    c->set_complete(-EINVAL);
+    return;
+  }
+
+  const auto& image_id = img->image->get_image_id();
+  auto& ioctx = img->image->get_ioctx();
+  uint8_t order = img->image->get_order();
+
+  auto extents = extent_to_object_extents(off, len, order);
+  std::vector<seastar::future<ceph::bufferlist>> futures;
+  for (const auto& ex : extents) {
+    std::string oid = data_object_name(image_id, ex.object_no);
+    futures.push_back(ioctx.read(oid, ex.object_off, ex.length));
+  }
+
+  (void)seastar::when_all_succeed(futures.begin(), futures.end())
+    .then([buf, len, c](std::vector<ceph::bufferlist> results) {
+      size_t offset = 0;
+      for (auto& bl : results) {
+        size_t n = std::min(static_cast<size_t>(bl.length()), len - offset);
+        bl.begin().copy(n, buf + offset);
+        offset += n;
+      }
+      c->set_complete(static_cast<int>(offset));
+    })
+    .handle_exception([c](std::exception_ptr e) {
+      try {
+        std::rethrow_exception(e);
+      } catch (const std::system_error& se) {
+        c->set_complete(se.code().value());
+      } catch (...) {
+        c->set_complete(-EIO);
+      }
+    });
+}
+
+void schedule_aio_readv(crimson_rbd_image* img, uint64_t off, size_t len,
+                        const struct iovec* iov, int iovcnt, crimson_rbd_completion* c)
+{
+  c->complete.store(false);
+  if (len == 0) {
+    c->set_complete(0);
+    return;
+  }
+  if (off + len > img->image->get_size()) {
+    c->set_complete(-EINVAL);
+    return;
+  }
+
+  const auto& image_id = img->image->get_image_id();
+  auto& ioctx = img->image->get_ioctx();
+  uint8_t order = img->image->get_order();
+  auto iov_copy = std::make_shared<std::vector<struct iovec>>(iov, iov + iovcnt);
+
+  auto extents = extent_to_object_extents(off, len, order);
+  std::vector<seastar::future<ceph::bufferlist>> futures;
+  for (const auto& ex : extents) {
+    std::string oid = data_object_name(image_id, ex.object_no);
+    futures.push_back(ioctx.read(oid, ex.object_off, ex.length));
+  }
+
+  (void)seastar::when_all_succeed(futures.begin(), futures.end())
+    .then([c, iov_copy](std::vector<ceph::bufferlist> results) {
+      size_t total_copied = 0;
+      size_t iov_idx = 0;
+      size_t iov_off = 0;
+      for (auto& bl : results) {
+        size_t src_off = 0;
+        size_t remain = bl.length();
+        while (remain > 0 && iov_idx < iov_copy->size()) {
+          size_t space = (*iov_copy)[iov_idx].iov_len - iov_off;
+          size_t n = std::min(remain, space);
+          if (n > 0) {
+            bl.begin().copy(n, static_cast<char*>((*iov_copy)[iov_idx].iov_base) + iov_off);
+            total_copied += n;
+            src_off += n;
+            remain -= n;
+            iov_off += n;
+            if (iov_off >= (*iov_copy)[iov_idx].iov_len) {
+              iov_idx++;
+              iov_off = 0;
+            }
+          }
+        }
+      }
+      c->set_complete(static_cast<int>(total_copied));
+    })
+    .handle_exception([c](std::exception_ptr e) {
+      try {
+        std::rethrow_exception(e);
+      } catch (const std::system_error& se) {
+        c->set_complete(se.code().value());
+      } catch (...) {
+        c->set_complete(-EIO);
+      }
+    });
+}
+
+void schedule_aio_write(crimson_rbd_image* img, uint64_t off, size_t len,
+                       const char* buf, crimson_rbd_completion* c)
+{
+  c->complete.store(false);
+  schedule_aio_write_with_buf(img, off, len,
+    std::make_shared<std::vector<char>>(buf, buf + len), c);
+}
+
+void schedule_aio_write_with_buf(crimson_rbd_image* img, uint64_t off, size_t len,
+                                 std::shared_ptr<std::vector<char>> buf,
+                                 crimson_rbd_completion* c)
+{
+  c->complete.store(false);
+  if (img->image->is_read_only()) {
+    c->set_complete(-EACCES);
+    return;
+  }
+  if (len == 0) {
+    c->set_complete(0);
+    return;
+  }
+  if (off + len > img->image->get_size()) {
+    c->set_complete(-EINVAL);
+    return;
+  }
+
+  const auto& image_id = img->image->get_image_id();
+  auto& ioctx = img->image->get_ioctx();
+  uint8_t order = img->image->get_order();
+
+  auto extents = extent_to_object_extents(off, len, order);
+  std::vector<seastar::future<>> futures;
+  size_t buf_offset = 0;
+  for (const auto& ex : extents) {
+    ceph::bufferlist bl;
+    bl.append(buf->data() + buf_offset, ex.length);
+    buf_offset += ex.length;
+    std::string oid = data_object_name(image_id, ex.object_no);
+    futures.push_back(ioctx.write(oid, ex.object_off, std::move(bl)));
+  }
+
+  (void)seastar::when_all_succeed(futures.begin(), futures.end())
+    .then([c]() { c->set_complete(0); })
+    .handle_exception([c](std::exception_ptr e) {
+      try {
+        std::rethrow_exception(e);
+      } catch (const std::system_error& se) {
+        c->set_complete(se.code().value());
+      } catch (...) {
+        c->set_complete(-EIO);
+      }
+    });
+}
+
+void schedule_aio_discard(crimson_rbd_image* img, uint64_t off, uint64_t len,
+                          crimson_rbd_completion* c)
+{
+  c->complete.store(false);
+  if (img->image->is_read_only()) {
+    c->set_complete(-EACCES);
+    return;
+  }
+  if (len == 0) {
+    c->set_complete(0);
+    return;
+  }
+  if (off + len > img->image->get_size()) {
+    c->set_complete(-EINVAL);
+    return;
+  }
+
+  const auto& image_id = img->image->get_image_id();
+  auto& ioctx = img->image->get_ioctx();
+  uint8_t order = img->image->get_order();
+
+  auto extents = extent_to_object_extents(off, len, order);
+  std::vector<seastar::future<>> futures;
+  for (const auto& ex : extents) {
+    std::string oid = data_object_name(image_id, ex.object_no);
+    futures.push_back(ioctx.discard(oid, ex.object_off, ex.length));
+  }
+
+  (void)seastar::when_all_succeed(futures.begin(), futures.end())
+    .then([c]() { c->set_complete(0); })
+    .handle_exception([c](std::exception_ptr e) {
+      try {
+        std::rethrow_exception(e);
+      } catch (const std::system_error& se) {
+        c->set_complete(se.code().value());
+      } catch (...) {
+        c->set_complete(-EIO);
+      }
+    });
+}
+
+void schedule_aio_flush(crimson_rbd_image* img, crimson_rbd_completion* c)
+{
+  (void)img;
+  c->set_complete(0);  // No client-side cache; no-op
+}
+
+void schedule_aio_write_zeroes(crimson_rbd_image* img, uint64_t off, size_t len,
+                               crimson_rbd_completion* c)
+{
+  c->complete.store(false);
+  if (img->image->is_read_only()) {
+    c->set_complete(-EACCES);
+    return;
+  }
+  if (len == 0) {
+    c->set_complete(0);
+    return;
+  }
+  if (off + len > img->image->get_size()) {
+    c->set_complete(-EINVAL);
+    return;
+  }
+
+  const auto& image_id = img->image->get_image_id();
+  auto& ioctx = img->image->get_ioctx();
+  uint8_t order = img->image->get_order();
+
+  auto extents = extent_to_object_extents(off, len, order);
+  std::vector<seastar::future<>> futures;
+  for (const auto& ex : extents) {
+    std::string oid = data_object_name(image_id, ex.object_no);
+    futures.push_back(ioctx.write_zeroes(oid, ex.object_off, ex.length));
+  }
+
+  (void)seastar::when_all_succeed(futures.begin(), futures.end())
+    .then([c]() { c->set_complete(0); })
+    .handle_exception([c](std::exception_ptr e) {
+      try {
+        std::rethrow_exception(e);
+      } catch (const std::system_error& se) {
+        c->set_complete(se.code().value());
+      } catch (...) {
+        c->set_complete(-EIO);
+      }
+    });
+}
+
+/// Copy len bytes from iovecs starting at byte offset start into bufferlist.
+static ceph::bufferlist iovec_to_bufferlist(
+    const struct iovec* iov, int iovcnt, size_t start, size_t len)
+{
+  ceph::bufferlist bl;
+  size_t skip = start;
+  size_t remaining = len;
+  for (int i = 0; i < iovcnt && remaining > 0; ++i) {
+    size_t iov_len = iov[i].iov_len;
+    if (skip >= iov_len) {
+      skip -= iov_len;
+      continue;
+    }
+    size_t src_off = skip;
+    skip = 0;
+    size_t to_copy = std::min(iov_len - src_off, remaining);
+    bl.append(static_cast<const char*>(iov[i].iov_base) + src_off,
+              static_cast<unsigned>(to_copy));
+    remaining -= to_copy;
+  }
+  return bl;
+}
+
+void schedule_aio_compare_and_writev(crimson_rbd_image* img, uint64_t off,
+    const struct iovec* cmp_iov, int cmp_iovcnt,
+    const struct iovec* iov, int iovcnt,
+    crimson_rbd_completion* c, uint64_t* mismatch_off)
+{
+  c->complete.store(false);
+  if (img->image->is_read_only()) {
+    c->set_complete(-EACCES);
+    return;
+  }
+  size_t cmp_len = 0, write_len = 0;
+  for (int i = 0; i < cmp_iovcnt; ++i) cmp_len += cmp_iov[i].iov_len;
+  for (int i = 0; i < iovcnt; ++i) write_len += iov[i].iov_len;
+  if (cmp_len != write_len || cmp_len == 0) {
+    c->set_complete(-EINVAL);
+    return;
+  }
+  size_t len = cmp_len;
+  if (off + len > img->image->get_size()) {
+    c->set_complete(-EINVAL);
+    return;
+  }
+
+  const auto& image_id = img->image->get_image_id();
+  auto& ioctx = img->image->get_ioctx();
+  uint8_t order = img->image->get_order();
+  auto extents = extent_to_object_extents(off, len, order);
+
+  std::vector<seastar::future<>> futures;
+  size_t image_off = 0;
+  for (const auto& ex : extents) {
+    ceph::bufferlist cmp_bl = iovec_to_bufferlist(
+        cmp_iov, cmp_iovcnt, image_off, ex.length);
+    ceph::bufferlist write_bl = iovec_to_bufferlist(
+        iov, iovcnt, image_off, ex.length);
+    std::string oid = data_object_name(image_id, ex.object_no);
+    futures.push_back(ioctx.compare_and_write(oid, ex.object_off,
+        std::move(cmp_bl), ex.object_off, std::move(write_bl)));
+    image_off += ex.length;
+  }
+
+  (void)seastar::when_all_succeed(futures.begin(), futures.end())
+    .then([c]() { c->set_complete(0); })
+    .handle_exception([c, mismatch_off](std::exception_ptr e) {
+      try {
+        std::rethrow_exception(e);
+      } catch (const std::system_error& se) {
+        int err = se.code().value();
+        if (err == EILSEQ && mismatch_off) *mismatch_off = 0;
+        c->set_complete(err > 0 ? -err : err);
+      } catch (...) {
+        c->set_complete(-EIO);
+      }
+    });
+}
+
+} // namespace
+} // namespace crimson::rbd
diff --git a/src/crimson/rbd/api/metadata.cc b/src/crimson/rbd/api/metadata.cc
new file mode 100644 (file)
index 0000000..f6006b4
--- /dev/null
@@ -0,0 +1,98 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+//
+// Phase 4: rbd_metadata_get, rbd_metadata_set, rbd_resize
+//
+
+#include "crimson/rbd/api/crimson_rbd_impl.h"
+
+#include <cerrno>
+#include <cstring>
+
+#include "crimson/rbd/image_header.h"
+#include "crimson/rbd/utils.h"
+#include "rbd/rbd_crimson.h"
+
+extern "C" {
+
+int rbd_metadata_get(rbd_image_t image, const char* key, char* value, size_t* val_len)
+{
+  if (!image || !key || !val_len) return -EINVAL;
+  auto* img = static_cast<crimson::rbd::crimson_rbd_image*>(image);
+  if (!img || !img->image) return -EINVAL;
+
+  try {
+    std::string header_oid = crimson::rbd::header_name(img->image->get_image_id());
+    auto val = crimson::rbd::metadata_get(
+      img->image->get_ioctx(), header_oid, key).get();
+    if (*val_len < val.size() + 1) {
+      *val_len = val.size() + 1;
+      return -ERANGE;
+    }
+    if (value) {
+      memcpy(value, val.c_str(), val.size() + 1);
+    }
+    return 0;
+  } catch (const std::system_error& e) {
+    return e.code().value() > 0 ? -e.code().value() : e.code().value();
+  } catch (...) {
+    return -EIO;
+  }
+}
+
+int rbd_metadata_set(rbd_image_t image, const char* key, const char* value)
+{
+  if (!image || !key || !value) return -EINVAL;
+  auto* img = static_cast<crimson::rbd::crimson_rbd_image*>(image);
+  if (!img || !img->image) return -EINVAL;
+  if (img->image->is_read_only()) return -EROFS;
+
+  try {
+    std::string header_oid = crimson::rbd::header_name(img->image->get_image_id());
+    crimson::rbd::metadata_set(
+      img->image->get_ioctx(), header_oid, key, value).get();
+    return 0;
+  } catch (const std::system_error& e) {
+    return e.code().value() > 0 ? -e.code().value() : e.code().value();
+  } catch (...) {
+    return -EIO;
+  }
+}
+
+int rbd_resize(rbd_image_t image, uint64_t size)
+{
+  if (!image) return -EINVAL;
+  auto* img = static_cast<crimson::rbd::crimson_rbd_image*>(image);
+  if (!img || !img->image) return -EINVAL;
+  if (img->image->is_read_only()) return -EROFS;
+
+  try {
+    std::string header_oid = crimson::rbd::header_name(img->image->get_image_id());
+    crimson::rbd::set_size(
+      img->image->get_ioctx(), header_oid, size).get();
+    img->image->set_size(size);
+    return 0;
+  } catch (const std::system_error& e) {
+    return e.code().value() > 0 ? -e.code().value() : e.code().value();
+  } catch (...) {
+    return -EIO;
+  }
+}
+
+int rbd_update_watch(rbd_image_t image, uint64_t* handle,
+                     void (*callback)(void*), void* arg)
+{
+  (void)image;
+  (void)handle;
+  (void)callback;
+  (void)arg;
+  return -ENOSYS;  // Phase 7: OSD watch not implemented
+}
+
+int rbd_update_unwatch(rbd_image_t image, uint64_t handle)
+{
+  (void)image;
+  (void)handle;
+  return -ENOSYS;  // Phase 7
+}
+
+}  // extern "C"
diff --git a/src/crimson/rbd/api/open.cc b/src/crimson/rbd/api/open.cc
new file mode 100644 (file)
index 0000000..24383a3
--- /dev/null
@@ -0,0 +1,179 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+//
+// C API implementations: rbd_open, rbd_close, rbd_stat, rbd_get_size
+// and cluster/ioctx stubs (Phase 6 for full cluster bootstrap).
+//
+
+#include "crimson/rbd/api/crimson_rbd_impl.h"
+#include "crimson/rbd/api/crimson_rbd_internal.h"
+
+#include <cerrno>
+#include <cstring>
+#include <memory>
+#include <string>
+
+#include "rbd/rbd_crimson.h"
+#include "include/types.h"
+#include "crimson/rbd/image_header.h"
+#include "crimson/rbd/utils.h"
+
+extern "C" {
+
+using namespace crimson::rbd;
+
+// --- cluster (stub for Phase 1; Phase 6 will implement full bootstrap) ---
+
+int rbd_crimson_cluster_create(rbd_crimson_cluster_t* cluster)
+{
+  return crimson::rbd::create_cluster_impl(cluster);
+}
+
+void rbd_crimson_cluster_destroy(rbd_crimson_cluster_t cluster)
+{
+  if (!cluster) return;
+  rbd_crimson_cluster_shutdown(cluster);
+  crimson::rbd::unregister_cluster_from_map(cluster);
+  crimson::rbd::destroy_cluster_impl(cluster);
+}
+
+// rbd_crimson_cluster_connect, rbd_crimson_cluster_shutdown,
+// rbd_crimson_cluster_wait_for_osdmap, rbd_crimson_ioctx_create:
+// implemented in api/cluster.cc
+
+void rbd_crimson_ioctx_destroy(rbd_crimson_ioctx_t ioctx)
+{
+  delete static_cast<crimson_rbd_ioctx*>(ioctx);
+}
+
+// --- rbd_open / rbd_close / rbd_stat / rbd_get_size (Phase 1) ---
+
+static int do_open(crimson_rbd_ioctx* ioc, const char* name,
+                   rbd_image_t* image, bool read_only)
+{
+  if (!ioc || !name || !image) return -EINVAL;
+  if (!name[0]) return -EINVAL;
+
+  try {
+    auto img_id_fut = get_image_id(ioc->ioctx, name);
+    auto img_id = img_id_fut.get();
+    if (img_id.empty()) return -ENOENT;
+
+    std::string header_oid = header_name(img_id);
+    uint64_t snap_id = CEPH_NOSNAP;  // Phase 1: no snapshots
+
+    auto size_fut = crimson::rbd::get_size(ioc->ioctx, header_oid, snap_id);
+    auto [size, order] = size_fut.get();
+
+    auto feat_fut = crimson::rbd::get_features(ioc->ioctx, header_oid, read_only);
+    auto [features, incompatible] = feat_fut.get();
+    (void)incompatible;
+
+    auto img = std::make_unique<Image>(
+      ioc->ioctx,  // copy; ioctx remains valid for future opens
+      std::move(img_id),
+      size,
+      order,
+      features,
+      read_only);
+    *image = new crimson_rbd_image(std::move(img));
+    return 0;
+  } catch (const std::system_error& e) {
+    int err = e.code().value();
+    return err > 0 ? -err : err;  // C API uses negative errno on error
+  } catch (...) {
+    return -EIO;
+  }
+}
+
+int rbd_open(rbd_crimson_ioctx_t ioctx, const char* name,
+             rbd_image_t* image, const char* snap_name)
+{
+  (void)snap_name;  // Phase 1: no snapshots
+  return do_open(static_cast<crimson_rbd_ioctx*>(ioctx), name, image, false);
+}
+
+int rbd_open_read_only(rbd_crimson_ioctx_t ioctx, const char* name,
+                      rbd_image_t* image, const char* snap_name)
+{
+  (void)snap_name;
+  return do_open(static_cast<crimson_rbd_ioctx*>(ioctx), name, image, true);
+}
+
+static int do_open_by_id(crimson_rbd_ioctx* ioc, const char* image_id,
+                         rbd_image_t* image, bool read_only)
+{
+  if (!ioc || !image_id || !image || !image_id[0]) return -EINVAL;
+  std::string img_id(image_id);
+
+  try {
+    std::string header_oid = header_name(img_id);
+    uint64_t snap_id = CEPH_NOSNAP;
+    auto size_fut = crimson::rbd::get_size(ioc->ioctx, header_oid, snap_id);
+    auto [size, order] = size_fut.get();  // throws if cls exec fails
+    auto feat_fut = crimson::rbd::get_features(ioc->ioctx, header_oid, read_only);
+    auto [features, incompatible] = feat_fut.get();
+    (void)incompatible;
+    auto img = std::make_unique<Image>(
+      ioc->ioctx, std::move(img_id), size, order, features, read_only);
+    *image = new crimson_rbd_image(std::move(img));
+    return 0;
+  } catch (const std::system_error& e) {
+    int err = e.code().value();
+    return err > 0 ? -err : err;
+  } catch (...) {
+    return -EIO;
+  }
+}
+
+int rbd_close(rbd_image_t image)
+{
+  if (!image) return -EINVAL;
+  delete static_cast<crimson_rbd_image*>(image);
+  return 0;
+}
+
+int rbd_stat(rbd_image_t image, rbd_image_info_t* info, size_t infosize)
+{
+  if (!image || !info) return -EINVAL;
+  auto* img = static_cast<crimson_rbd_image*>(image);
+  if (!img->image) return -EINVAL;
+
+  if (infosize < sizeof(rbd_image_info_t)) return -ERANGE;
+
+  std::memset(info, 0, sizeof(*info));
+  info->size = img->image->get_size();
+  uint64_t obj_size = 1ULL << img->image->get_order();
+  info->obj_size = obj_size;
+  info->num_objs = (img->image->get_size() + obj_size - 1) / obj_size;
+  info->order = static_cast<int>(img->image->get_order());
+  return 0;
+}
+
+int rbd_get_size(rbd_image_t image, uint64_t* size)
+{
+  if (!image || !size) return -EINVAL;
+  auto* img = static_cast<crimson_rbd_image*>(image);
+  if (!img->image) return -EINVAL;
+  *size = img->image->get_size();
+  return 0;
+}
+
+} // extern "C"
+
+// --- Internal C++ API for integration tests (outside extern "C") ---
+
+namespace crimson::rbd {
+
+rbd_crimson_ioctx_t ioctx_create_from_iocontext(crimson::client::IoCtx ioctx)
+{
+  return new crimson_rbd_ioctx(std::move(ioctx));
+}
+
+int rbd_open_by_id(rbd_crimson_ioctx_t ioctx, const char* image_id,
+                   rbd_image_t* image)
+{
+  return do_open_by_id(static_cast<crimson_rbd_ioctx*>(ioctx),
+                       image_id, image, false);
+}
+
+} // namespace crimson::rbd
diff --git a/src/crimson/rbd/cluster.cc b/src/crimson/rbd/cluster.cc
new file mode 100644 (file)
index 0000000..0e226d7
--- /dev/null
@@ -0,0 +1,45 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+
+#include "crimson/rbd/cluster.h"
+
+#include "crimson/client/rados_client.h"
+#include "crimson/common/log.h"
+#include "crimson/mon/MonClient.h"
+#include "crimson/net/Messenger.h"
+
+namespace crimson::rbd {
+
+namespace {
+seastar::logger& logger() {
+  return crimson::get_logger(ceph_subsys_client);
+}
+}
+
+Cluster::Cluster(crimson::net::Messenger& msgr,
+                 crimson::mon::Client& monc)
+  : msgr(msgr), monc(monc)
+{
+  rados = std::make_unique<crimson::client::RadosClient>(msgr, monc);
+}
+
+Cluster::~Cluster() = default;
+
+seastar::future<> Cluster::connect()
+{
+  logger().debug("Cluster::connect");
+  return rados->connect();
+}
+
+seastar::future<crimson::client::IoCtx> Cluster::create_ioctx(
+  std::string_view pool_name)
+{
+  return rados->create_ioctx(pool_name);
+}
+
+seastar::future<> Cluster::shutdown()
+{
+  logger().debug("Cluster::shutdown");
+  return rados->shutdown();
+}
+
+} // namespace crimson::rbd
diff --git a/src/crimson/rbd/cluster.h b/src/crimson/rbd/cluster.h
new file mode 100644 (file)
index 0000000..e90101b
--- /dev/null
@@ -0,0 +1,45 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+
+#ifndef CEPH_CRIMSON_RBD_CLUSTER_H
+#define CEPH_CRIMSON_RBD_CLUSTER_H
+
+#include <memory>
+#include <string_view>
+
+#include <seastar/core/future.hh>
+
+#include "crimson/client/io_context.h"
+#include "crimson/client/rados_client.h"
+
+namespace crimson::mon {
+class Client;
+}
+namespace crimson::net {
+class Messenger;
+}
+
+namespace crimson::rbd {
+
+/**
+ * Cluster - wraps Crimson RadosClient.
+ * Caller owns Messenger and MonClient; Cluster holds RadosClient.
+ */
+class Cluster {
+public:
+  Cluster(crimson::net::Messenger& msgr,
+          crimson::mon::Client& monc);
+  ~Cluster();
+
+  seastar::future<> connect();
+  seastar::future<crimson::client::IoCtx> create_ioctx(std::string_view pool_name);
+  seastar::future<> shutdown();
+
+private:
+  crimson::net::Messenger& msgr;
+  crimson::mon::Client& monc;
+  std::unique_ptr<crimson::client::RadosClient> rados;
+};
+
+} // namespace crimson::rbd
+
+#endif
diff --git a/src/crimson/rbd/completion.cc b/src/crimson/rbd/completion.cc
new file mode 100644 (file)
index 0000000..c85310f
--- /dev/null
@@ -0,0 +1,9 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+
+#include "crimson/rbd/completion.h"
+
+/*
+ * Phase 1: Completion is a minimal stub.
+ * Phase 2 will add rbd_aio_create_completion, rbd_aio_release,
+ * rbd_aio_get_arg, rbd_aio_get_return_value for async I/O.
+ */
diff --git a/src/crimson/rbd/completion.h b/src/crimson/rbd/completion.h
new file mode 100644 (file)
index 0000000..f00de08
--- /dev/null
@@ -0,0 +1,32 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+
+#ifndef CEPH_CRIMSON_RBD_COMPLETION_H
+#define CEPH_CRIMSON_RBD_COMPLETION_H
+
+#include "rbd/rbd_crimson.h"
+
+#include <atomic>
+#include <functional>
+
+/**
+ * Internal completion structure. rbd_completion_t (void*) points to this.
+ * Used by rbd_aio_* for async I/O callbacks.
+ */
+struct crimson_rbd_completion {
+  void* callback_arg = nullptr;
+  rbd_callback_t complete_cb = nullptr;
+  std::atomic<int> rval{0};
+  std::atomic<bool> complete{false};
+
+  crimson_rbd_completion() = default;
+
+  void set_complete(int ret) {
+    rval.store(ret);
+    complete.store(true);
+    if (complete_cb) {
+      complete_cb(static_cast<rbd_completion_t>(this), callback_arg);
+    }
+  }
+};
+
+#endif
diff --git a/src/crimson/rbd/image.cc b/src/crimson/rbd/image.cc
new file mode 100644 (file)
index 0000000..9bf9d95
--- /dev/null
@@ -0,0 +1,23 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+
+#include "crimson/rbd/image.h"
+
+#include "crimson/client/io_context.h"
+
+namespace crimson::rbd {
+
+Image::Image(crimson::client::IoCtx ioctx,
+             std::string image_id,
+             uint64_t size,
+             uint8_t order,
+             uint64_t features,
+             bool read_only)
+  : ioctx(std::move(ioctx)),
+    image_id(std::move(image_id)),
+    size(size),
+    order(order),
+    features(features),
+    read_only(read_only)
+{}
+
+} // namespace crimson::rbd
diff --git a/src/crimson/rbd/image.h b/src/crimson/rbd/image.h
new file mode 100644 (file)
index 0000000..72fcdd8
--- /dev/null
@@ -0,0 +1,46 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+
+#ifndef CEPH_CRIMSON_RBD_IMAGE_H
+#define CEPH_CRIMSON_RBD_IMAGE_H
+
+#include <cstdint>
+#include <memory>
+#include <string>
+
+#include "crimson/client/io_context.h"
+
+namespace crimson::rbd {
+
+/**
+ * Image - RBD image handle.
+ * Holds image_id, size, order, features. Phase 1: read-only, no I/O.
+ */
+class Image {
+public:
+  Image(crimson::client::IoCtx ioctx,
+        std::string image_id,
+        uint64_t size,
+        uint8_t order,
+        uint64_t features,
+        bool read_only);
+
+  const std::string& get_image_id() const { return image_id; }
+  uint64_t get_size() const { return size; }
+  void set_size(uint64_t s) { size = s; }  // Phase 4: after resize
+  uint8_t get_order() const { return order; }
+  uint64_t get_features() const { return features; }
+  bool is_read_only() const { return read_only; }
+  crimson::client::IoCtx& get_ioctx() { return ioctx; }
+
+private:
+  crimson::client::IoCtx ioctx;
+  std::string image_id;
+  uint64_t size;
+  uint8_t order;
+  uint64_t features;
+  bool read_only;
+};
+
+} // namespace crimson::rbd
+
+#endif
diff --git a/src/crimson/rbd/image_header.cc b/src/crimson/rbd/image_header.cc
new file mode 100644 (file)
index 0000000..248ccc9
--- /dev/null
@@ -0,0 +1,157 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+
+#include "crimson/rbd/image_header.h"
+
+#include <map>
+
+#include "crimson/client/io_context.h"
+#include "crimson/rbd/utils.h"
+#include "include/rbd_types.h"
+#include "common/snap_types.h"
+#include "include/encoding.h"
+
+namespace crimson::rbd {
+
+seastar::future<std::pair<uint64_t, uint8_t>> get_size(
+  crimson::client::IoCtx& ioctx,
+  const std::string& header_oid,
+  uint64_t snap_id)
+{
+  ceph::bufferlist in;
+  ceph::encode(static_cast<snapid_t>(snap_id), in);
+
+  return ioctx.exec(header_oid, "rbd", "get_size", std::move(in))
+    .then([](ceph::bufferlist out) {
+      uint8_t order = 0;
+      uint64_t size = 0;
+      try {
+        auto p = out.cbegin();
+        ceph::decode(order, p);
+        ceph::decode(size, p);
+      } catch (const ceph::buffer::error&) {
+        return seastar::make_exception_future<std::pair<uint64_t, uint8_t>>(
+          std::system_error(EIO, std::generic_category(), "get_size decode"));
+      }
+      return seastar::make_ready_future<std::pair<uint64_t, uint8_t>>(
+        std::make_pair(size, order));
+    });
+}
+
+seastar::future<std::pair<uint64_t, uint64_t>> get_features(
+  crimson::client::IoCtx& ioctx,
+  const std::string& header_oid,
+  bool read_only)
+{
+  ceph::bufferlist in;
+  ceph::encode(static_cast<uint64_t>(CEPH_NOSNAP), in);
+  ceph::encode(read_only, in);
+
+  return ioctx.exec(header_oid, "rbd", "get_features", std::move(in))
+    .then([](ceph::bufferlist out) {
+      uint64_t features = 0;
+      uint64_t incompatible = 0;
+      try {
+        auto p = out.cbegin();
+        ceph::decode(features, p);
+        ceph::decode(incompatible, p);
+      } catch (const ceph::buffer::error&) {
+        return seastar::make_exception_future<std::pair<uint64_t, uint64_t>>(
+          std::system_error(EIO, std::generic_category(), "get_features decode"));
+      }
+      return seastar::make_ready_future<std::pair<uint64_t, uint64_t>>(
+        std::make_pair(features, incompatible));
+    });
+}
+
+namespace {
+seastar::future<std::string> decode_image_id(ceph::bufferlist out) {
+  std::string id;
+  try {
+    auto p = out.cbegin();
+    ceph::decode(id, p);
+  } catch (const ceph::buffer::error&) {
+    return seastar::make_exception_future<std::string>(
+      std::system_error(ENOENT, std::generic_category(), "get_image_id"));
+  }
+  return seastar::make_ready_future<std::string>(std::move(id));
+}
+}  // namespace
+
+seastar::future<std::string> get_image_id(
+  crimson::client::IoCtx& ioctx,
+  const std::string& image_name)
+{
+  // Prefer dir_get_id on rbd_directory (same as librbd). Fall back to
+  // get_id on rbd_id.<name> when dir_get_id returns ENOENT (e.g. minimal
+  // vstart setups or alternate layout).
+  ceph::bufferlist in;
+  ceph::encode(image_name, in);
+  return ioctx.exec(RBD_DIRECTORY, "rbd", "dir_get_id", std::move(in))
+    .then([](ceph::bufferlist out) { return decode_image_id(std::move(out)); })
+    .handle_exception([&ioctx, image_name](std::exception_ptr e) {
+      try {
+        std::rethrow_exception(e);
+      } catch (const std::system_error& err) {
+        if (err.code().value() != static_cast<int>(ENOENT)) {
+          return seastar::make_exception_future<std::string>(e);
+        }
+      } catch (...) {
+        return seastar::make_exception_future<std::string>(e);
+      }
+      const std::string id_oid = std::string(RBD_ID_PREFIX) + image_name;
+      ceph::bufferlist fallback_in;
+  return ioctx.exec(id_oid, "rbd", "get_id", std::move(fallback_in))
+    .then([](ceph::bufferlist out) { return decode_image_id(std::move(out)); });
+    });
+}
+
+seastar::future<std::string> metadata_get(
+  crimson::client::IoCtx& ioctx,
+  const std::string& header_oid,
+  const std::string& key)
+{
+  ceph::bufferlist in;
+  ceph::encode(key, in);
+  return ioctx.exec(header_oid, "rbd", "metadata_get", std::move(in))
+    .then([](ceph::bufferlist out) {
+      std::string value;
+      try {
+        auto p = out.cbegin();
+        ceph::decode(value, p);
+      } catch (const ceph::buffer::error&) {
+        return seastar::make_exception_future<std::string>(
+          std::system_error(ENOENT, std::generic_category(), "metadata_get"));
+      }
+      return seastar::make_ready_future<std::string>(std::move(value));
+    });
+}
+
+seastar::future<> metadata_set(
+  crimson::client::IoCtx& ioctx,
+  const std::string& header_oid,
+  const std::string& key,
+  const std::string& value)
+{
+  std::map<std::string, ceph::bufferlist> data;
+  ceph::bufferlist bl;
+  ceph::encode(value, bl);
+  data[key] = std::move(bl);
+
+  ceph::bufferlist in;
+  ceph::encode(data, in);
+  return ioctx.exec(header_oid, "rbd", "metadata_set", std::move(in))
+    .then([](ceph::bufferlist) { return seastar::make_ready_future<>(); });
+}
+
+seastar::future<> set_size(
+  crimson::client::IoCtx& ioctx,
+  const std::string& header_oid,
+  uint64_t size)
+{
+  ceph::bufferlist in;
+  ceph::encode(size, in);
+  return ioctx.exec(header_oid, "rbd", "set_size", std::move(in))
+    .then([](ceph::bufferlist) { return seastar::make_ready_future<>(); });
+}
+
+} // namespace crimson::rbd
diff --git a/src/crimson/rbd/image_header.h b/src/crimson/rbd/image_header.h
new file mode 100644 (file)
index 0000000..51a1571
--- /dev/null
@@ -0,0 +1,55 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+
+#ifndef CEPH_CRIMSON_RBD_IMAGE_HEADER_H
+#define CEPH_CRIMSON_RBD_IMAGE_HEADER_H
+
+#include <cstdint>
+#include <string>
+
+#include <seastar/core/future.hh>
+
+namespace crimson::client {
+class IoCtx;
+}
+
+namespace crimson::rbd {
+
+/// Read RBD header via cls/rbd. Uses IoCtx::exec for get_size, get_features.
+
+seastar::future<std::pair<uint64_t, uint8_t>> get_size(
+  crimson::client::IoCtx& ioctx,
+  const std::string& header_oid,
+  uint64_t snap_id);
+
+seastar::future<std::pair<uint64_t, uint64_t>> get_features(
+  crimson::client::IoCtx& ioctx,
+  const std::string& header_oid,
+  bool read_only);
+
+/// Resolve image name to image_id via rbd_directory (cls dir_get_id).
+seastar::future<std::string> get_image_id(
+  crimson::client::IoCtx& ioctx,
+  const std::string& image_name);
+
+/// Read metadata key from header object (cls metadata_get).
+seastar::future<std::string> metadata_get(
+  crimson::client::IoCtx& ioctx,
+  const std::string& header_oid,
+  const std::string& key);
+
+/// Write metadata key-value to header object (cls metadata_set).
+seastar::future<> metadata_set(
+  crimson::client::IoCtx& ioctx,
+  const std::string& header_oid,
+  const std::string& key,
+  const std::string& value);
+
+/// Update image size in header (cls set_size).
+seastar::future<> set_size(
+  crimson::client::IoCtx& ioctx,
+  const std::string& header_oid,
+  uint64_t size);
+
+} // namespace crimson::rbd
+
+#endif
diff --git a/src/crimson/rbd/utils.cc b/src/crimson/rbd/utils.cc
new file mode 100644 (file)
index 0000000..2f8c0e8
--- /dev/null
@@ -0,0 +1,50 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+
+#include "crimson/rbd/utils.h"
+
+#include "include/rbd_types.h"
+#include <algorithm>
+#include <cstdio>
+
+namespace crimson::rbd {
+
+std::string header_name(const std::string& image_id)
+{
+  return std::string(RBD_HEADER_PREFIX) + image_id;
+}
+
+std::string data_object_name(const std::string& image_id, uint64_t object_no)
+{
+  char buf[RBD_MAX_OBJ_NAME_SIZE];
+  int n = snprintf(buf, sizeof(buf), "%s%s.%016llx",
+                   RBD_DATA_PREFIX, image_id.c_str(),
+                   (unsigned long long)object_no);
+  if (n < 0 || static_cast<size_t>(n) >= sizeof(buf)) {
+    return {};
+  }
+  return std::string(buf);
+}
+
+std::vector<ObjectExtent> extent_to_object_extents(
+  uint64_t offset, uint64_t length, uint8_t order)
+{
+  std::vector<ObjectExtent> extents;
+  if (length == 0) return extents;
+
+  const uint64_t object_size = 1ULL << order;
+  uint64_t off = offset;
+  uint64_t remaining = length;
+
+  while (remaining > 0) {
+    uint64_t object_no = off / object_size;
+    uint64_t object_off = off % object_size;
+    uint64_t chunk = std::min(remaining, object_size - object_off);
+
+    extents.push_back({object_no, object_off, chunk});
+    off += chunk;
+    remaining -= chunk;
+  }
+  return extents;
+}
+
+} // namespace crimson::rbd
diff --git a/src/crimson/rbd/utils.h b/src/crimson/rbd/utils.h
new file mode 100644 (file)
index 0000000..1edf9f9
--- /dev/null
@@ -0,0 +1,32 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+
+#ifndef CEPH_CRIMSON_RBD_UTILS_H
+#define CEPH_CRIMSON_RBD_UTILS_H
+
+#include <cstdint>
+#include <string>
+#include <vector>
+
+namespace crimson::rbd {
+
+/// RBD header object name: rbd_header.<image_id>
+std::string header_name(const std::string& image_id);
+
+/// RBD data object name: rbd_data.<image_id>.<object_no>
+std::string data_object_name(const std::string& image_id, uint64_t object_no);
+
+/// Object extent: (object_no, object_off, length) for block-to-object mapping.
+struct ObjectExtent {
+  uint64_t object_no;
+  uint64_t object_off;
+  uint64_t length;
+};
+
+/// Map image (offset, length) to object extents. Simple layout: object_size = 1<<order,
+/// stripe_unit=object_size, stripe_count=1 (default RBD layout).
+std::vector<ObjectExtent> extent_to_object_extents(
+  uint64_t offset, uint64_t length, uint8_t order);
+
+} // namespace crimson::rbd
+
+#endif
index ae7b493d42f4bb4c43dd31bf83ac81ce87c31b25..dcfba0f7d3ff63a44f6bcbbc685fd42a62e21bd6 100644 (file)
@@ -32,6 +32,16 @@ target_link_libraries(crimson-rados-demo
   ${FMT_LIB})
 install(TARGETS crimson-rados-demo DESTINATION bin)
 
+add_executable(crimson-rbd-demo rbd_demo.cc)
+target_link_libraries(crimson-rbd-demo
+  crimson-main-config-bootstrap
+  crimson
+  librbd_crimson
+  crimson-common
+  legacy-option-headers
+  ${FMT_LIB})
+install(TARGETS crimson-rbd-demo DESTINATION bin)
+
 add_executable(perf-async-msgr perf_async_msgr.cc)
 target_link_libraries(perf-async-msgr ceph-common global ${ALLOC_LIBS})
 
diff --git a/src/crimson/tools/rbd_demo.cc b/src/crimson/tools/rbd_demo.cc
new file mode 100644 (file)
index 0000000..620ea32
--- /dev/null
@@ -0,0 +1,348 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+// vim: ts=8 sw=2 sts=2 expandtab
+//
+// Integration test for librbd_crimson: connect, rbd_open, rbd_stat,
+// rbd_aio_read/write, rbd_close, rbd_metadata_get/set, rbd_resize.
+// Two bootstrap paths: C++ (default) or C API (--use-c-api).
+//
+// Usage: crimson-rbd-demo -c ceph.conf -n client.admin --pool rbd --image testimg
+//        crimson-rbd-demo --use-c-api -c ceph.conf -n client.admin --pool rbd --image testimg
+//
+// Prerequisites: Create RBD image first:
+//   rbd create -s 1M testimg   # or: rbd create -p rbd -s 1M testimg
+//
+
+#include <functional>
+#include <iostream>
+
+#include <seastar/core/app-template.hh>
+#include <seastar/core/print.hh>
+#include <seastar/util/closeable.hh>
+#include <seastar/util/defer.hh>
+
+#include "auth/KeyRing.h"
+#include "common/entity_name.h"
+#include "crimson/client/io_context.h"
+#include "crimson/client/rados_client.h"
+#include "crimson/osdc/objecter.h"
+#include "crimson/common/config_proxy.h"
+#include "crimson/common/fatal_signal.h"
+#include "crimson/common/log.h"
+#include "crimson/common/perf_counters_collection.h"
+#include "crimson/mon/MonClient.h"
+#include "crimson/net/Messenger.h"
+#include "crimson/osd/main_config_bootstrap_helpers.h"
+#include "crimson/rbd/api/crimson_rbd_internal.h"
+#include "msg/msg_types.h"
+#include "rbd/rbd_crimson.h"
+
+namespace bpo = boost::program_options;
+
+static seastar::logger& logger() {
+  return crimson::get_logger(ceph_subsys_client);
+}
+
+int main(int argc, const char* argv[])
+{
+  auto early_result = crimson::osd::get_early_config_client(argc, argv);
+  if (!early_result.has_value()) {
+    std::cerr << "get_early_config_client failed: " << early_result.error()
+              << std::endl;
+    return early_result.error();
+  }
+  auto& early_config = early_result.value();
+
+  seastar::app_template::config app_cfg;
+  app_cfg.name = "crimson-rbd-demo";
+  app_cfg.auto_handle_sigint_sigterm = false;
+  seastar::app_template app(std::move(app_cfg));
+  app.add_options()
+    ("pool", bpo::value<std::string>()->default_value("rbd"),
+     "pool name")
+    ("image", bpo::value<std::string>()->default_value("testimg"),
+     "RBD image name (create with: rbd create -s 1M <name>)")
+    ("image-id", bpo::value<std::string>(),
+     "RBD image id (optional; skips name lookup when get_image_id fails)")
+    ("use-c-api", "use C API bootstrap (rbd_crimson_cluster_*) instead of C++")
+    ("debug", "enable debug logging");
+
+  try {
+    return app.run(
+      early_config.get_early_args().size(),
+      const_cast<char**>(early_config.get_early_args().data()),
+      [&] {
+        auto& config = app.configuration();
+        auto config_proxy_args = early_config.ceph_args;
+        return seastar::async([config_proxy_args, &config, &early_config] {
+          try {
+            FatalSignal fatal_signal;
+            if (config.count("debug")) {
+              seastar::global_logger_registry().set_all_loggers_level(
+                seastar::log_level::debug);
+            }
+
+            const auto pool_name = config["pool"].as<std::string>();
+            const auto image_name = config["image"].as<std::string>();
+            const bool use_c_api = config.count("use-c-api") != 0;
+            const bool has_image_id = config.count("image-id") != 0;
+            const auto image_id_opt = has_image_id
+              ? std::optional<std::string>(config["image-id"].as<std::string>())
+              : std::optional<std::string>();
+
+            // Lambda to run RBD operations (shared by C++ and C API paths)
+            auto run_rbd_ops = [&](rbd_crimson_ioctx_t rbd_ioctx,
+                                   std::function<void()> cleanup_ioctx) {
+              rbd_image_t image = nullptr;
+              int r;
+              if (image_id_opt) {
+                r = crimson::rbd::rbd_open_by_id(rbd_ioctx, image_id_opt->c_str(),
+                                                 &image);
+                if (r < 0) {
+                  logger().error("rbd_open_by_id({}) failed: {}",
+                                 *image_id_opt, r);
+                  return EXIT_FAILURE;
+                }
+              } else {
+                r = rbd_open(rbd_ioctx, image_name.c_str(), &image, nullptr);
+                if (r < 0) {
+                  logger().error("rbd_open({}) failed: {}", image_name, r);
+                  return EXIT_FAILURE;
+                }
+              }
+              auto close_image = seastar::defer([&] {
+                rbd_close(image);
+              });
+
+              rbd_image_info_t info;
+              r = rbd_stat(image, &info, sizeof(info));
+              if (r < 0) {
+                logger().error("rbd_stat failed: {}", r);
+                return EXIT_FAILURE;
+              }
+              logger().info("rbd_stat: size={} order={} num_objs={}",
+                           info.size, info.order, info.num_objs);
+
+              uint64_t size = 0;
+              r = rbd_get_size(image, &size);
+              if (r < 0) {
+                logger().error("rbd_get_size failed: {}", r);
+                return EXIT_FAILURE;
+              }
+              logger().info("rbd_get_size: {}", size);
+
+              // Phase 4: metadata_get, metadata_set, resize
+              char meta_val[256];
+              size_t meta_len = sizeof(meta_val);
+              r = rbd_metadata_get(image, "test_key", meta_val, &meta_len);
+              if (r == 0) {
+                logger().info("rbd_metadata_get test_key: {}", meta_val);
+              } else if (r != -ENOENT && r != -ERANGE) {
+                logger().warn("rbd_metadata_get test_key: {} (not fatal)", r);
+              }
+              r = rbd_metadata_set(image, "test_key", "test_value");
+              if (r < 0) {
+                logger().warn("rbd_metadata_set failed: {} (not fatal)", r);
+              } else {
+                meta_len = sizeof(meta_val);
+                r = rbd_metadata_get(image, "test_key", meta_val, &meta_len);
+                if (r == 0) {
+                  logger().info("rbd_metadata_get after set: {}", meta_val);
+                }
+              }
+              uint64_t orig_size = size;
+              if (size >= 8192) {
+                r = rbd_resize(image, size);  // no-op same size
+                if (r < 0) logger().warn("rbd_resize same: {} (not fatal)", r);
+              }
+              r = rbd_resize(image, size + 4096);
+              if (r < 0) {
+                logger().warn("rbd_resize +4096 failed: {} (not fatal)", r);
+              } else {
+                r = rbd_get_size(image, &size);
+                if (r == 0) logger().info("rbd_resize ok, new size: {}", size);
+                r = rbd_resize(image, orig_size);  // shrink back
+                if (r < 0) logger().warn("rbd_resize shrink: {} (not fatal)", r);
+              }
+
+              // Async write and read
+              constexpr size_t io_len = 4096;
+              std::vector<char> write_buf(io_len, 'x');
+              std::vector<char> read_buf(io_len, 0);
+
+              rbd_completion_t comp = nullptr;
+              r = rbd_aio_create_completion(nullptr,
+                [](rbd_completion_t, void*) {}, &comp);
+              if (r < 0) {
+                logger().error("rbd_aio_create_completion failed: {}", r);
+                return EXIT_FAILURE;
+              }
+              auto release_comp = seastar::defer([&] { rbd_aio_release(comp); });
+
+              r = rbd_aio_write(image, 0, io_len, write_buf.data(), comp);
+              if (r < 0) {
+                logger().error("rbd_aio_write failed: {}", r);
+                return EXIT_FAILURE;
+              }
+              rbd_aio_wait_for_complete(comp);
+              ssize_t ret = rbd_aio_get_return_value(comp);
+              if (ret < 0) {
+                logger().error("rbd_aio_write completion: {}", static_cast<int>(ret));
+                return EXIT_FAILURE;
+              }
+              logger().info("rbd_aio_write 0-{} ok", io_len);
+
+              r = rbd_aio_read(image, 0, io_len, read_buf.data(), comp);
+              if (r < 0) {
+                logger().error("rbd_aio_read failed: {}", r);
+                return EXIT_FAILURE;
+              }
+              rbd_aio_wait_for_complete(comp);
+              ret = rbd_aio_get_return_value(comp);
+              if (ret < 0) {
+                logger().error("rbd_aio_read completion: {}", static_cast<int>(ret));
+                return EXIT_FAILURE;
+              }
+              logger().info("rbd_aio_read 0-{} ok, ret={}", io_len, ret);
+
+              if (memcmp(write_buf.data(), read_buf.data(), io_len) != 0) {
+                logger().error("read mismatch");
+                return EXIT_FAILURE;
+              }
+              logger().info("read/write verify ok");
+              cleanup_ioctx();
+              return EXIT_SUCCESS;
+            };
+
+            if (use_c_api) {
+              // C API bootstrap path (standalone; see --use-c-api)
+              const char* config_opts_raw[] = {
+                "config_file", early_config.conf_file_list.c_str(),
+                "name", early_config.init_params.name.to_cstr(),
+                "cluster", early_config.cluster_name.c_str(),
+                "keyring", "keyring",  // match C++ path; vstart uses keyring in build dir
+                nullptr
+              };
+              std::vector<const char*> config_opts;
+              for (size_t i = 0; config_opts_raw[i]; i += 2) {
+                if (config_opts_raw[i + 1] && strlen(config_opts_raw[i + 1]) > 0) {
+                  config_opts.push_back(config_opts_raw[i]);
+                  config_opts.push_back(config_opts_raw[i + 1]);
+                }
+              }
+              if (config_opts.empty() || early_config.conf_file_list.empty()) {
+                logger().error("C API path needs -c config_file and -n name");
+                return EXIT_FAILURE;
+              }
+
+              rbd_crimson_cluster_t cluster = nullptr;
+              int r = rbd_crimson_cluster_create(&cluster);
+              if (r < 0 || !cluster) {
+                logger().error("rbd_crimson_cluster_create failed: {}", r);
+                return EXIT_FAILURE;
+              }
+              auto destroy_cluster = seastar::defer([&] {
+                rbd_crimson_cluster_shutdown(cluster);
+                rbd_crimson_cluster_destroy(cluster);
+              });
+
+              r = rbd_crimson_cluster_connect(cluster,
+                config_opts.data(), config_opts.size());
+              if (r < 0) {
+                logger().error("rbd_crimson_cluster_connect failed: {}", r);
+                return EXIT_FAILURE;
+              }
+
+              rbd_crimson_ioctx_t rbd_ioctx = nullptr;
+              r = rbd_crimson_ioctx_create(cluster, pool_name.c_str(), &rbd_ioctx);
+              if (r < 0 || !rbd_ioctx) {
+                logger().error("rbd_crimson_ioctx_create failed: {}", r);
+                return EXIT_FAILURE;
+              }
+              auto destroy_ioctx = seastar::defer([&] {
+                rbd_crimson_ioctx_destroy(rbd_ioctx);
+              });
+
+              int result = run_rbd_ops(rbd_ioctx, [&] {
+                rbd_crimson_ioctx_destroy(rbd_ioctx);
+                destroy_ioctx.cancel();
+              });
+              if (result != EXIT_SUCCESS) return result;
+              logger().info("crimson-rbd-demo completed successfully");
+              return EXIT_SUCCESS;
+            }
+
+            // C++ bootstrap path
+            crimson::common::sharded_conf().start(
+              early_config.init_params.name,
+              early_config.cluster_name).get();
+            crimson::common::local_conf().start().get();
+            auto stop_conf = seastar::deferred_stop(
+              crimson::common::sharded_conf());
+            crimson::common::sharded_perf_coll().start().get();
+            auto stop_perf = seastar::deferred_stop(
+              crimson::common::sharded_perf_coll());
+
+            crimson::common::local_conf().parse_config_files(
+              early_config.conf_file_list).get();
+            crimson::common::local_conf().parse_env().get();
+            crimson::common::local_conf().parse_argv(
+              config_proxy_args).get();
+
+            crimson::osd::populate_config_from_mon().get();
+
+            class DemoAuthHandler : public crimson::common::AuthHandler {
+            public:
+              void handle_authentication(const EntityName& name,
+                                        const AuthCapsInfo& caps) override {}
+            };
+            auto auth_handler = std::make_unique<DemoAuthHandler>();
+            auto msgr = crimson::net::Messenger::create(
+              entity_name_t(early_config.init_params.name.get_type(), -1),
+              "rbd_demo",
+              crimson::osd::get_nonce(),
+              true);
+            crimson::mon::Client monc(*msgr, *auth_handler);
+            msgr->set_auth_client(&monc);
+            msgr->set_auth_server(&monc);
+
+            crimson::client::RadosClient rados(*msgr, monc);
+            crimson::net::dispatchers_t dispatchers;
+            dispatchers.push_back(&monc);
+            dispatchers.push_back(&rados.get_objecter());
+            msgr->start(dispatchers).get();
+            auto stop_msgr = seastar::defer([&] {
+              msgr->stop();
+              msgr->shutdown().get();
+            });
+
+            monc.start().get();
+            auto stop_monc = seastar::defer([&] { monc.stop().get(); });
+
+            rados.get_objecter().set_client_incarnation(
+              static_cast<int>(crimson::osd::get_nonce() & 0x7fffffff));
+            rados.connect().get();
+            auto ioctx = rados.create_ioctx(pool_name).get();
+
+            // Wrap IoCtx for librbd_crimson C API
+            auto rbd_ioctx = crimson::rbd::ioctx_create_from_iocontext(
+              std::move(ioctx));
+            auto destroy_ioctx = seastar::defer(
+              [&] { rbd_crimson_ioctx_destroy(rbd_ioctx); });
+
+            int result = run_rbd_ops(rbd_ioctx, []{});
+            if (result != EXIT_SUCCESS) return result;
+
+            rados.shutdown().get();
+            logger().info("crimson-rbd-demo completed successfully");
+            return EXIT_SUCCESS;
+          } catch (const std::exception& e) {
+            logger().error("crimson-rbd-demo failed: {}", e.what());
+            return EXIT_FAILURE;
+          }
+        });
+      });
+  } catch (const std::exception& e) {
+    std::cerr << "FATAL: " << e.what() << std::endl;
+    return EXIT_FAILURE;
+  }
+}
index cb9c2fea8f80b708b4a4500853e91382355e0f96..0b6d9ae084c6f66ad59afa7e07dcae14e6b88a49 100644 (file)
@@ -30,6 +30,12 @@ if(WITH_RBD)
     DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/rbd)
 endif()
 
+if(WITH_CRIMSON)
+  install(FILES
+    rbd/rbd_crimson.h
+    DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/rbd)
+endif()
+
 if(WITH_RADOSGW)
   install(FILES
     rados/librgw.h
diff --git a/src/include/rbd/rbd_crimson.h b/src/include/rbd/rbd_crimson.h
new file mode 100644 (file)
index 0000000..b16b5fb
--- /dev/null
@@ -0,0 +1,140 @@
+// -*- mode:C; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+// vim: ts=8 sw=2 sts=2 expandtab
+//
+// librbd_crimson C API - compatible with rbd/librbd.h
+// Uses Crimson Objecter; no librados dependency.
+
+#ifndef CEPH_RBD_CRIMSON_H
+#define CEPH_RBD_CRIMSON_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#if defined(__linux__)
+#include <sys/uio.h>
+#endif
+
+/* Opaque handles - same as librbd for ABI compatibility */
+typedef void *rbd_image_t;
+typedef void *rbd_completion_t;
+typedef void (*rbd_callback_t)(rbd_completion_t cb, void *arg);
+
+/* Crimson-specific: cluster and ioctx (replace rados_* for now) */
+typedef void *rbd_crimson_cluster_t;
+typedef void *rbd_crimson_ioctx_t;
+
+/* rbd_stat / rbd_image_info_t equivalent */
+typedef struct {
+  uint64_t size;
+  uint64_t obj_size;
+  uint64_t num_objs;
+  int order;
+  char block_name_prefix[24];
+  int64_t parent_pool;
+  char parent_name[96];
+} rbd_image_info_t;
+
+int rbd_crimson_cluster_create(rbd_crimson_cluster_t *cluster);
+void rbd_crimson_cluster_destroy(rbd_crimson_cluster_t cluster);
+int rbd_crimson_cluster_connect(rbd_crimson_cluster_t cluster,
+                                const char *const *config_options,
+                                size_t num_config_options);
+void rbd_crimson_cluster_shutdown(rbd_crimson_cluster_t cluster);
+
+/* Phase 6.2: Look up cluster by name (from connect with cluster config option). */
+int rbd_crimson_cluster_get(const char *cluster_name,
+                            rbd_crimson_cluster_t *cluster);
+
+/* Get cluster addresses (nonce) for rbd_register_cluster. Caller frees *addrs. */
+int rbd_crimson_cluster_getaddrs(rbd_crimson_cluster_t cluster, char **addrs);
+
+/* Wait for OSDMap before issuing I/O. Optional; create_ioctx also waits. */
+int rbd_crimson_cluster_wait_for_osdmap(rbd_crimson_cluster_t cluster);
+
+int rbd_crimson_ioctx_create(rbd_crimson_cluster_t cluster,
+                             const char *pool_name,
+                             rbd_crimson_ioctx_t *ioctx);
+void rbd_crimson_ioctx_destroy(rbd_crimson_ioctx_t ioctx);
+
+int rbd_open(rbd_crimson_ioctx_t ioctx, const char *name,
+             rbd_image_t *image, const char *snap_name);
+int rbd_open_read_only(rbd_crimson_ioctx_t ioctx, const char *name,
+                       rbd_image_t *image, const char *snap_name);
+int rbd_close(rbd_image_t image);
+
+int rbd_stat(rbd_image_t image, rbd_image_info_t *info, size_t infosize);
+int rbd_get_size(rbd_image_t image, uint64_t *size);
+
+/* Phase 4: Metadata and resize */
+int rbd_metadata_get(rbd_image_t image, const char *key, char *value, size_t *val_len);
+int rbd_metadata_set(rbd_image_t image, const char *key, const char *value);
+int rbd_resize(rbd_image_t image, uint64_t size);
+
+/* Watch (Phase 7 — stubbed, returns -ENOSYS) */
+int rbd_update_watch(rbd_image_t image, uint64_t *handle,
+                     void (*callback)(void *arg), void *arg);
+int rbd_update_unwatch(rbd_image_t image, uint64_t handle);
+
+/* Async I/O (Phase 2) */
+int rbd_aio_read(rbd_image_t image, uint64_t off, size_t len, char *buf,
+                 rbd_completion_t c);
+int rbd_aio_readv(rbd_image_t image, const struct iovec *iov, int iovcnt,
+                  uint64_t off, rbd_completion_t c);
+int rbd_aio_write(rbd_image_t image, uint64_t off, size_t len, const char *buf,
+                  rbd_completion_t c);
+int rbd_aio_writev(rbd_image_t image, const struct iovec *iov, int iovcnt,
+                   uint64_t off, rbd_completion_t c);
+int rbd_aio_discard(rbd_image_t image, uint64_t off, uint64_t len,
+                    rbd_completion_t c);
+int rbd_aio_flush(rbd_image_t image, rbd_completion_t c);
+int rbd_aio_write_zeroes(rbd_image_t image, uint64_t off, size_t len,
+                         rbd_completion_t c);
+#if defined(__linux__)
+int rbd_aio_compare_and_writev(rbd_image_t image, uint64_t off,
+                               const struct iovec *cmp_iov, int cmp_iovcnt,
+                               const struct iovec *iov, int iovcnt,
+                               rbd_completion_t c, uint64_t *mismatch_off);
+#endif
+
+int rbd_flush(rbd_image_t image);
+
+int rbd_aio_create_completion(void *cb_arg, rbd_callback_t complete_cb,
+                              rbd_completion_t *c);
+int rbd_aio_wait_for_complete(rbd_completion_t c);
+int rbd_aio_is_complete(rbd_completion_t c);
+ssize_t rbd_aio_get_return_value(rbd_completion_t c);
+void *rbd_aio_get_arg(rbd_completion_t c);
+void rbd_aio_release(rbd_completion_t c);
+
+/* Phase A1: External-thread integration for SPDK reactor
+ * When SPDK hosts the threads, each reactor thread runs a Seastar reactor.
+ * Call these from SPDK reactor loop. Part of librbd_crimson; no direct
+ * SPDK->Seastar dependency. Build ceph-nvmeof against librbd_crimson RPM.
+ */
+
+/** Configure Seastar for external-thread mode. Call once before spdk_reactors_start(). */
+int rbd_crimson_configure_external_threads(unsigned core_count);
+
+/** Register a Seastar reactor on the current thread. Call once per SPDK reactor thread. */
+void rbd_crimson_register_reactor(unsigned shard_id);
+
+/** Advance the Seastar reactor one tick. Call once per reactor loop iteration. Returns 1 if more work, 0 if stopped. */
+int rbd_crimson_run_one_tick(void);
+
+/** Clean up the Seastar reactor on the current thread. Call when SPDK reactor thread exits. */
+void rbd_crimson_reactor_cleanup(void);
+
+/** Clean up all Seastar resources. Call from main thread after spdk_reactors_fini. */
+void rbd_crimson_cleanup_all(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* CEPH_RBD_CRIMSON_H */
index 7347cf6f4f966929d5dc5b3fd7e34d771c9b3f85..6e70097b40d2673fc9ca86b16ebd564e300e4592 160000 (submodule)
@@ -1 +1 @@
-Subproject commit 7347cf6f4f966929d5dc5b3fd7e34d771c9b3f85
+Subproject commit 6e70097b40d2673fc9ca86b16ebd564e300e4592
index 5c2c23849505007577722262b5d1e9970d0d498b..e0336e0448c56fa1944e331843b4bd1d2423bab3 100644 (file)
@@ -159,3 +159,11 @@ add_ceph_test(unittest-crimson-objecter
 target_link_libraries(unittest-crimson-objecter
   crimson
   crimson::gtest)
+
+add_executable(unittest-crimson-rbd
+  test_rbd.cc)
+add_ceph_unittest(unittest-crimson-rbd
+  --memory 256M --smp 1)
+target_link_libraries(unittest-crimson-rbd
+  librbd_crimson
+  GTest::Main)
diff --git a/src/test/crimson/test_rbd.cc b/src/test/crimson/test_rbd.cc
new file mode 100644 (file)
index 0000000..8a37acd
--- /dev/null
@@ -0,0 +1,81 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+// vim: ts=8 sw=2 sts=2 expandtab
+//
+// Unit tests for librbd_crimson: utils (header_name, data_object_name,
+// extent_to_object_extents). No cluster required.
+//
+
+#include <gtest/gtest.h>
+
+#include "crimson/rbd/utils.h"
+
+using namespace crimson::rbd;
+
+TEST(rbd_utils, header_name)
+{
+  EXPECT_EQ(header_name("abc123"), "rbd_header.abc123");
+  EXPECT_EQ(header_name(""), "rbd_header.");
+}
+
+TEST(rbd_utils, data_object_name)
+{
+  EXPECT_EQ(data_object_name("abc123", 0), "rbd_data.abc123.0000000000000000");
+  EXPECT_EQ(data_object_name("abc123", 1), "rbd_data.abc123.0000000000000001");
+  EXPECT_EQ(data_object_name("id", 0x12345678),
+            "rbd_data.id.0000000012345678");
+}
+
+TEST(rbd_utils, extent_to_object_extents_empty)
+{
+  auto extents = extent_to_object_extents(0, 0, 22);
+  EXPECT_TRUE(extents.empty());
+}
+
+TEST(rbd_utils, extent_to_object_extents_single_object)
+{
+  // order 22 -> object_size 4MB
+  auto extents = extent_to_object_extents(0, 4096, 22);
+  ASSERT_EQ(extents.size(), 1u);
+  EXPECT_EQ(extents[0].object_no, 0u);
+  EXPECT_EQ(extents[0].object_off, 0u);
+  EXPECT_EQ(extents[0].length, 4096u);
+}
+
+TEST(rbd_utils, extent_to_object_extents_crosses_boundary)
+{
+  // order 12 -> object_size 4KB
+  auto extents = extent_to_object_extents(2048, 4096, 12);
+  ASSERT_EQ(extents.size(), 2u);
+  EXPECT_EQ(extents[0].object_no, 0u);
+  EXPECT_EQ(extents[0].object_off, 2048u);
+  EXPECT_EQ(extents[0].length, 2048u);
+  EXPECT_EQ(extents[1].object_no, 1u);
+  EXPECT_EQ(extents[1].object_off, 0u);
+  EXPECT_EQ(extents[1].length, 2048u);
+}
+
+TEST(rbd_utils, extent_to_object_extents_multiple_objects)
+{
+  // order 12 -> object_size 4KB
+  auto extents = extent_to_object_extents(0, 12288, 12);
+  ASSERT_EQ(extents.size(), 3u);
+  EXPECT_EQ(extents[0].object_no, 0u);
+  EXPECT_EQ(extents[0].object_off, 0u);
+  EXPECT_EQ(extents[0].length, 4096u);
+  EXPECT_EQ(extents[1].object_no, 1u);
+  EXPECT_EQ(extents[1].object_off, 0u);
+  EXPECT_EQ(extents[1].length, 4096u);
+  EXPECT_EQ(extents[2].object_no, 2u);
+  EXPECT_EQ(extents[2].object_off, 0u);
+  EXPECT_EQ(extents[2].length, 4096u);
+}
+
+TEST(rbd_utils, extent_to_object_extents_unaligned)
+{
+  // order 12 -> object_size 4KB; offset 100, length 500
+  auto extents = extent_to_object_extents(100, 500, 12);
+  ASSERT_EQ(extents.size(), 1u);
+  EXPECT_EQ(extents[0].object_no, 0u);
+  EXPECT_EQ(extents[0].object_off, 100u);
+  EXPECT_EQ(extents[0].length, 500u);
+}