From: Alexander Indenbaum Date: Mon, 23 Feb 2026 16:07:38 +0000 (+0200) Subject: crimson/rbd: add librbd_crimson Phase 1 skeleton X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fheads%2Fwip-baum-20260225-02;p=ceph-ci.git crimson/rbd: add librbd_crimson Phase 1 skeleton Implement rbd_open, rbd_close, rbd_stat, rbd_get_size; cluster, image header read (cls/rbd); name→id via rbd_id.. Cluster/ioctx C API stubbed for Phase 6. crimson/rbd: add Phase 2 async I/O - rbd_aio_read, rbd_aio_write Add block-to-object mapping (extent_to_object_extents), completion API, and rbd_aio_read/readv, rbd_aio_write/writev using IoCtx read/write. crimson/rbd: add Phase 3 - rbd_aio_discard, rbd_aio_flush, rbd_aio_write_zeroes, rbd_flush Implement UNMAP, FLUSH, WRITE_ZEROES; map discard/write_zeroes to IoCtx; flush is no-op (no client cache). librbd_crimson: add crimson-rbd-demo integration test, fix build crimson/rbd: implement cluster C API crimson/rbd: add rbd_metadata_get, rbd_metadata_set, rbd_resize crimson/rbd: add C API integration test, unit tests, and coverage matrix crimson/rbd: add external-thread API for SPDK Phase A1 integration Add C API for running Seastar reactors inside SPDK reactor threads: - rbd_crimson_configure_external_threads(): configure before spdk_reactors_start() - rbd_crimson_register_reactor(): register reactor on each SPDK reactor thread - rbd_crimson_run_one_tick(): advance reactor once per loop iteration - rbd_crimson_reactor_cleanup(): per-thread cleanup - rbd_crimson_cleanup_all(): global cleanup after spdk_reactors_fini Implement in api/external_thread.cc by wrapping Seastar app_template and reactor APIs. Lets SPDK link librbd_crimson for Phase A1 without depending on Seastar directly. Signed-off-by: Alexander Indenbaum CEPH-BUILD-FLAVOR: crimson --- diff --git a/ceph.spec.in b/ceph.spec.in index 40cf31e054b..a625cbd8642 100644 --- a/ceph.spec.in +++ b/ceph.spec.in @@ -2509,6 +2509,38 @@ fi %{_libdir}/librbd_tp.so %endif +%if 0%{with crimson} +%package -n librbd-crimson1 +Summary: Crimson RBD client library (Seastar-native) +%if 0%{?suse_version} +Group: System/Libraries +%endif +%description -n librbd-crimson1 +librbd-crimson is a Seastar-native RBD client library for the Ceph +distributed storage system. Requires Crimson build (WITH_CRIMSON=ON). + +%package -n librbd-crimson-devel +Summary: Crimson RBD client headers +%if 0%{?suse_version} +Group: Development/Libraries/C and C++ +%endif +Requires: librbd-crimson1 = %{_epoch_prefix}%{version}-%{release} +Provides: librbd-crimson1-devel = %{_epoch_prefix}%{version}-%{release} +%description -n librbd-crimson-devel +Headers and development files for librbd-crimson + +%files -n librbd-crimson1 +%{_libdir}/librbd_crimson.so.* + +%post -n librbd-crimson1 -p /sbin/ldconfig + +%postun -n librbd-crimson1 -p /sbin/ldconfig + +%files -n librbd-crimson-devel +%{_includedir}/rbd/rbd_crimson.h +%{_libdir}/librbd_crimson.so +%endif + %files -n librgw2 %{_libdir}/librgw.so.* %if %{with lttng} diff --git a/src/crimson/CMakeLists.txt b/src/crimson/CMakeLists.txt index d1236a41c95..4f1c4883881 100644 --- a/src/crimson/CMakeLists.txt +++ b/src/crimson/CMakeLists.txt @@ -207,4 +207,5 @@ target_link_libraries(crimson add_subdirectory(admin) add_subdirectory(os) add_subdirectory(osd) +add_subdirectory(rbd) add_subdirectory(tools) diff --git a/src/crimson/rbd/CMakeLists.txt b/src/crimson/rbd/CMakeLists.txt new file mode 100644 index 00000000000..aba50a61eed --- /dev/null +++ b/src/crimson/rbd/CMakeLists.txt @@ -0,0 +1,47 @@ +# librbd_crimson - Seastar-native RBD client + +set(crimson_rbd_srcs + utils.cc + image_header.cc + cluster.cc + image.cc + completion.cc + api/open.cc + api/io.cc + api/cluster.cc + api/metadata.cc + api/external_thread.cc +) + +# Build shared lib when ENABLE_SHARED (for RPM packaging); else static +add_library(librbd_crimson ${CEPH_SHARED} ${crimson_rbd_srcs}) + +target_include_directories(librbd_crimson + PUBLIC + ${CMAKE_SOURCE_DIR}/src/include + ${CMAKE_SOURCE_DIR}/src + PRIVATE + ${CMAKE_SOURCE_DIR}/src/cls/rbd +) + +target_link_libraries(librbd_crimson + PUBLIC + crimson + crimson::cflags + PRIVATE + cls_rbd_client +) + +if(ENABLE_SHARED) + set_target_properties(librbd_crimson PROPERTIES + OUTPUT_NAME rbd_crimson + VERSION 1.0.0 + SOVERSION 1 + CXX_VISIBILITY_PRESET hidden + VISIBILITY_INLINES_HIDDEN ON) +endif() + +install(TARGETS librbd_crimson + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) diff --git a/src/crimson/rbd/api/cluster.cc b/src/crimson/rbd/api/cluster.cc new file mode 100644 index 00000000000..1cc2a780fad --- /dev/null +++ b/src/crimson/rbd/api/cluster.cc @@ -0,0 +1,309 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- +// +// C API: rbd_crimson_cluster_connect, rbd_crimson_ioctx_create, +// rbd_crimson_cluster_wait_for_osdmap, rbd_crimson_cluster_get. Phase 6. +// +// Must be called from within a Seastar context (e.g. app.run() lambda). +// Config options: key-value pairs, e.g. config_file, keyring, name, cluster. +// assume_bootstrapped=1: skip conf/perf start (app already did C++ bootstrap). +// + +#include "crimson/rbd/api/crimson_rbd_impl.h" +#include "crimson/rbd/api/crimson_rbd_internal.h" + +#include +#include +#include +#include +#include +#include + +#include "auth/KeyRing.h" +#include "common/ceph_argparse.h" +#include "crimson/client/io_context.h" +#include "crimson/client/rados_client.h" +#include "crimson/common/auth_handler.h" +#include "crimson/common/config_proxy.h" +#include "crimson/common/log.h" +#include "crimson/common/perf_counters_collection.h" +#include "crimson/mon/MonClient.h" +#include "crimson/net/Messenger.h" +#include "crimson/osd/main_config_bootstrap_helpers.h" +#include "crimson/osdc/objecter.h" +#include + +#include "msg/msg_types.h" +#include "rbd/rbd_crimson.h" + +namespace { + +seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_client); +} + +} // namespace + +/// Phase 6.2: shared cluster map (cluster_name -> cluster) +static std::mutex g_cluster_map_mutex; +static std::map g_cluster_map; + +namespace { + +/// Parse config_options (key,val,key,val,...,NULL) into map. +/// Supports: config_file/conf_file, keyring/keyfile, name, cluster. +std::map +parse_config_options(const char* const* config_options, size_t num_config_options) +{ + std::map opts; + if (!config_options) return opts; + size_t i = 0; + if (num_config_options > 0) { + while (i + 1 < num_config_options) { + const char* k = config_options[i]; + const char* v = config_options[i + 1]; + if (!k || !v) break; + opts[std::string(k)] = std::string(v); + i += 2; + } + } else { + while (config_options[i] && config_options[i + 1]) { + opts[std::string(config_options[i])] = std::string(config_options[i + 1]); + i += 2; + } + } + return opts; +} + +/// Build argv from options for ceph_argparse_early_args and parse_argv. +std::vector build_argv(const std::map& opts) +{ + std::vector args = {"crimson_rbd"}; + auto add = [&](const std::string& key, const char* arg) { + auto it = opts.find(key); + if (it != opts.end() && !it->second.empty()) { + args.push_back(arg); + args.push_back(it->second); + } + }; + add("config_file", "-c"); + add("conf_file", "-c"); + add("keyring", "-k"); + add("keyfile", "-k"); + add("name", "-n"); + add("cluster", "--cluster"); + add("assume_bootstrapped", ""); // no argv; handled separately + return args; +} + +} // namespace + +extern "C" { + +int rbd_crimson_cluster_connect(rbd_crimson_cluster_t cluster, + const char* const* config_options, + size_t num_config_options) +{ + if (!cluster) return -EINVAL; + auto* c = static_cast(cluster); + if (c->connected) return 0; // idempotent + + auto opts = parse_config_options(config_options, num_config_options); + const bool assume_bootstrapped = + opts.count("assume_bootstrapped") && opts["assume_bootstrapped"] == "1"; + auto args_str = build_argv(opts); + if (!assume_bootstrapped && args_str.size() < 2) { + logger().error("rbd_crimson_cluster_connect: need config (e.g. config_file, name)"); + return -EINVAL; + } + if (assume_bootstrapped && args_str.size() < 2) { + args_str.push_back("-n"); + args_str.push_back("client.admin"); + } + + std::vector args; + for (const auto& s : args_str) { + args.push_back(s.c_str()); + } + + std::string cluster_name = "ceph"; + std::string conf_file_list; + auto init_params = ceph_argparse_early_args( + args, CEPH_ENTITY_TYPE_CLIENT, &cluster_name, &conf_file_list); + + try { + if (!assume_bootstrapped) { + c->owns_conf_perf = true; + crimson::common::sharded_conf().start( + init_params.name, cluster_name).get(); + crimson::common::local_conf().start().get(); + crimson::common::sharded_perf_coll().start().get(); + + crimson::common::local_conf().parse_config_files(conf_file_list).get(); + crimson::common::local_conf().parse_env().get(); + crimson::common::local_conf().parse_argv(args_str).get(); + + crimson::osd::populate_config_from_mon().get(); + } + + class CephAuthHandler : public crimson::common::AuthHandler { + public: + void handle_authentication(const EntityName&, const AuthCapsInfo&) override {} + }; + c->auth_handler = std::make_unique(); + c->msgr = crimson::net::Messenger::create( + entity_name_t(init_params.name.get_type(), -1), + "rbd_crimson", + crimson::osd::get_nonce(), + true); + c->monc = std::make_unique(*c->msgr, *c->auth_handler); + c->msgr->set_auth_client(c->monc.get()); + c->msgr->set_auth_server(c->monc.get()); + + c->rados = std::make_unique(*c->msgr, *c->monc); + crimson::net::dispatchers_t dispatchers; + dispatchers.push_back(c->monc.get()); + dispatchers.push_back(&c->rados->get_objecter()); + c->msgr->start(dispatchers).get(); + + c->monc->start().get(); + c->rados->get_objecter().set_client_incarnation( + static_cast(crimson::osd::get_nonce() & 0x7fffffff)); + c->rados->connect().get(); + c->connected = true; + c->cluster_name = cluster_name; + if (!cluster_name.empty()) { + std::lock_guard lock(g_cluster_map_mutex); + g_cluster_map[cluster_name] = c; + } + return 0; + } catch (const std::system_error& e) { + logger().error("rbd_crimson_cluster_connect failed: {}", e.what()); + return e.code().value() > 0 ? -e.code().value() : e.code().value(); + } catch (...) { + logger().error("rbd_crimson_cluster_connect failed"); + return -EIO; + } +} + +void rbd_crimson_cluster_shutdown(rbd_crimson_cluster_t cluster) +{ + if (!cluster) return; + auto* c = static_cast(cluster); + if (!c->connected) return; + try { + if (c->rados) c->rados->shutdown().get(); + if (c->monc) c->monc->stop().get(); + if (c->msgr) { + c->msgr->stop(); + c->msgr->shutdown().get(); + } + } catch (...) {} + c->rados.reset(); + c->monc.reset(); + c->msgr = nullptr; + c->auth_handler.reset(); + if (!c->cluster_name.empty()) { + std::lock_guard lock(g_cluster_map_mutex); + g_cluster_map.erase(c->cluster_name); + } + c->cluster_name.clear(); + c->connected = false; + if (c->owns_conf_perf) { + c->owns_conf_perf = false; + try { + crimson::common::sharded_perf_coll().stop().get(); + crimson::common::local_conf().stop().get(); + crimson::common::sharded_conf().stop().get(); + } catch (...) {} + } +} + +int rbd_crimson_cluster_get(const char* cluster_name, + rbd_crimson_cluster_t* cluster) +{ + if (!cluster_name || !cluster_name[0] || !cluster) return -EINVAL; + std::lock_guard lock(g_cluster_map_mutex); + auto it = g_cluster_map.find(cluster_name); + if (it == g_cluster_map.end()) return -ENOENT; + *cluster = it->second; + return 0; +} + +int rbd_crimson_cluster_getaddrs(rbd_crimson_cluster_t cluster, char** addrs) +{ + if (!cluster || !addrs) return -EINVAL; + auto* c = static_cast(cluster); + if (!c->connected || !c->msgr) return -ENOTCONN; + std::ostringstream os; + os << c->msgr->get_myaddrs(); + std::string s = os.str(); + *addrs = strdup(s.c_str()); + return 0; +} + +int rbd_crimson_ioctx_create(rbd_crimson_cluster_t cluster, + const char* pool_name, + rbd_crimson_ioctx_t* ioctx) +{ + if (!cluster || !pool_name || !ioctx) return -EINVAL; + auto* c = static_cast(cluster); + if (!c->connected || !c->rados) return -ENOTCONN; + + try { + auto ioc = c->rados->create_ioctx(pool_name).get(); + *ioctx = new crimson::rbd::crimson_rbd_ioctx(std::move(ioc)); + return 0; + } catch (const std::system_error& e) { + return e.code().value() > 0 ? -e.code().value() : e.code().value(); + } catch (...) { + return -EIO; + } +} + +int rbd_crimson_cluster_wait_for_osdmap(rbd_crimson_cluster_t cluster) +{ + if (!cluster) return -EINVAL; + auto* c = static_cast(cluster); + if (!c->connected || !c->rados) return -ENOTCONN; + + try { + c->rados->get_objecter().wait_for_osdmap().get(); + return 0; + } catch (const std::system_error& e) { + return e.code().value() > 0 ? -e.code().value() : e.code().value(); + } catch (...) { + return -EIO; + } +} + +} // extern "C" + +namespace crimson::rbd { + +crimson_rbd_cluster::~crimson_rbd_cluster() = default; + +int create_cluster_impl(rbd_crimson_cluster_t* cluster) +{ + if (!cluster) return -EINVAL; + try { + *cluster = new crimson_rbd_cluster(); + return 0; + } catch (...) { + return -ENOMEM; + } +} + +void destroy_cluster_impl(rbd_crimson_cluster_t cluster) +{ + delete static_cast(cluster); +} + +void unregister_cluster_from_map(rbd_crimson_cluster_t cluster) +{ + auto* c = static_cast(cluster); + if (!c || c->cluster_name.empty()) return; + std::lock_guard lock(g_cluster_map_mutex); + g_cluster_map.erase(c->cluster_name); +} + +} // namespace crimson::rbd diff --git a/src/crimson/rbd/api/crimson_rbd_impl.h b/src/crimson/rbd/api/crimson_rbd_impl.h new file mode 100644 index 00000000000..85fec3d535f --- /dev/null +++ b/src/crimson/rbd/api/crimson_rbd_impl.h @@ -0,0 +1,62 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- +// +// Internal structs for C API implementation. Shared by open.cc, io.cc, cluster.cc. +// + +#ifndef CEPH_CRIMSON_RBD_API_CRIMSON_RBD_IMPL_H +#define CEPH_CRIMSON_RBD_API_CRIMSON_RBD_IMPL_H + +#include +#include + +#include "crimson/client/io_context.h" +#include "crimson/net/Fwd.h" +#include "crimson/rbd/image.h" + +namespace crimson { + +namespace common { +class AuthHandler; +} +namespace mon { +class Client; +} +namespace net { +class Messenger; +} + +namespace client { +class RadosClient; +} + +} // namespace crimson + +namespace crimson::rbd { + +struct crimson_rbd_ioctx { + crimson::client::IoCtx ioctx; + explicit crimson_rbd_ioctx(crimson::client::IoCtx ioc) : ioctx(std::move(ioc)) {} +}; + +struct crimson_rbd_image { + std::unique_ptr image; + explicit crimson_rbd_image(std::unique_ptr img) : image(std::move(img)) {} +}; + +struct crimson_rbd_cluster { + crimson_rbd_cluster() = default; + std::string cluster_name; // Phase 6.2: for shared-cluster map + /// Destructor defined in cluster.cc (needs complete types). + ~crimson_rbd_cluster(); + + bool connected = false; + bool owns_conf_perf = false; // true if we started sharded_conf/sharded_perf_coll + std::unique_ptr auth_handler; + crimson::net::MessengerRef msgr; + std::unique_ptr monc; + std::unique_ptr rados; +}; + +} // namespace crimson::rbd + +#endif diff --git a/src/crimson/rbd/api/crimson_rbd_internal.h b/src/crimson/rbd/api/crimson_rbd_internal.h new file mode 100644 index 00000000000..210255fdb5e --- /dev/null +++ b/src/crimson/rbd/api/crimson_rbd_internal.h @@ -0,0 +1,39 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- +// +// Internal C++ API for integration tests. +// Creates rbd_crimson_ioctx_t from IoCtx when full cluster C API is not yet available (Phase 6). +// + +#ifndef CEPH_CRIMSON_RBD_API_CRIMSON_RBD_INTERNAL_H +#define CEPH_CRIMSON_RBD_API_CRIMSON_RBD_INTERNAL_H + +#include "rbd/rbd_crimson.h" + +namespace crimson::client { +class IoCtx; +} + +namespace crimson::rbd { + +/// Create rbd_crimson_ioctx_t from existing IoCtx. For integration tests when +/// rbd_crimson_cluster_connect / rbd_crimson_ioctx_create are not yet implemented. +/// Caller must call rbd_crimson_ioctx_destroy when done. +rbd_crimson_ioctx_t ioctx_create_from_iocontext(crimson::client::IoCtx ioctx); + +/// Open RBD image by id (skips name->id lookup). For integration tests when +/// get_image_id fails due to auth/layout (crimson mon auth vs vstart). +int rbd_open_by_id(rbd_crimson_ioctx_t ioctx, const char* image_id, + rbd_image_t* image); + +/// Unregister cluster from shared map (called from cluster_destroy). +void unregister_cluster_from_map(rbd_crimson_cluster_t cluster); + +/// Create cluster (must be in cluster.cc for complete types). +int create_cluster_impl(rbd_crimson_cluster_t* cluster); + +/// Destroy cluster (must be in cluster.cc for complete types). +void destroy_cluster_impl(rbd_crimson_cluster_t cluster); + +} // namespace crimson::rbd + +#endif diff --git a/src/crimson/rbd/api/external_thread.cc b/src/crimson/rbd/api/external_thread.cc new file mode 100644 index 00000000000..19ae9f35725 --- /dev/null +++ b/src/crimson/rbd/api/external_thread.cc @@ -0,0 +1,73 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 sts=2 expandtab +// +// SPDX-License-Identifier: BSD-3-Clause +// Copyright (C) 2025 Red Hat +// +// Phase A1: External-thread integration for SPDK reactor. +// Bridges SPDK reactor loop to Seastar run_one_tick / register APIs. +// Part of librbd_crimson; SPDK links librbd_crimson, no direct Seastar dep. + +#include "include/rbd/rbd_crimson.h" + +#include + +#include +#include +#include +#include + +namespace { + +std::unique_ptr g_app; + +} // anonymous namespace + +extern "C" { + +int rbd_crimson_configure_external_threads(unsigned core_count) +{ + try { + seastar::app_template::seastar_options opts; + opts.smp_opts.thread_affinity.set_value(false); + opts.smp_opts.mbind.set_value(false); + opts.smp_opts.smp.set_value(core_count); + opts.smp_opts.lock_memory.set_value(false); + opts.log_opts.default_log_level.set_value(seastar::log_level::error); + opts.reactor_opts.no_handle_interrupt.set_value(true); + + g_app = std::make_unique(std::move(opts)); + if (g_app->configure_external_thread_mode(core_count) != 0) { + g_app.reset(); + return -1; + } + return 0; + } catch (...) { + return -1; + } +} + +void rbd_crimson_register_reactor(unsigned shard_id) +{ + g_app->register_reactor_on_this_thread(shard_id); +} + +int rbd_crimson_run_one_tick(void) +{ + return seastar::engine().run_one_tick() ? 1 : 0; +} + +void rbd_crimson_reactor_cleanup(void) +{ + seastar::smp::cleanup_cpu(); +} + +void rbd_crimson_cleanup_all(void) +{ + if (g_app) { + g_app->cleanup_external_thread_mode(); + g_app.reset(); + } +} + +} // extern "C" diff --git a/src/crimson/rbd/api/io.cc b/src/crimson/rbd/api/io.cc new file mode 100644 index 00000000000..6154b48995d --- /dev/null +++ b/src/crimson/rbd/api/io.cc @@ -0,0 +1,560 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- +// +// C API: rbd_aio_read, rbd_aio_write, rbd_aio_readv, rbd_aio_writev, +// rbd_aio_create_completion, rbd_aio_release, rbd_aio_get_arg, rbd_aio_get_return_value. +// + +#include "crimson/rbd/api/crimson_rbd_impl.h" + +#include +#include +#include + +#include +#include +#include +#include + +#include "include/buffer.h" +#include "rbd/rbd_crimson.h" +#include "crimson/rbd/completion.h" +#include "crimson/rbd/utils.h" + +namespace crimson::rbd { +namespace { + +void schedule_aio_read(crimson_rbd_image* img, uint64_t off, size_t len, + char* buf, crimson_rbd_completion* c); +void schedule_aio_readv(crimson_rbd_image* img, uint64_t off, size_t len, + const struct iovec* iov, int iovcnt, crimson_rbd_completion* c); +void schedule_aio_write(crimson_rbd_image* img, uint64_t off, size_t len, + const char* buf, crimson_rbd_completion* c); +void schedule_aio_write_with_buf(crimson_rbd_image* img, uint64_t off, size_t len, + std::shared_ptr> buf, + crimson_rbd_completion* c); +void schedule_aio_discard(crimson_rbd_image* img, uint64_t off, uint64_t len, + crimson_rbd_completion* c); +void schedule_aio_flush(crimson_rbd_image* img, crimson_rbd_completion* c); +void schedule_aio_write_zeroes(crimson_rbd_image* img, uint64_t off, size_t len, + crimson_rbd_completion* c); +void schedule_aio_compare_and_writev(crimson_rbd_image* img, uint64_t off, + const struct iovec* cmp_iov, int cmp_iovcnt, + const struct iovec* iov, int iovcnt, + crimson_rbd_completion* c, uint64_t* mismatch_off); + +} // namespace +} // namespace crimson::rbd + +extern "C" { + +using namespace crimson::rbd; + +// --- completion API --- + +int rbd_aio_create_completion(void* cb_arg, rbd_callback_t complete_cb, + rbd_completion_t* c) +{ + if (!c) return -EINVAL; + try { + auto* comp = new crimson_rbd_completion(); + comp->callback_arg = cb_arg; + comp->complete_cb = complete_cb; + *c = comp; + return 0; + } catch (...) { + return -ENOMEM; + } +} + +int rbd_aio_wait_for_complete(rbd_completion_t c) +{ + auto* comp = static_cast(c); + if (!comp) return -EINVAL; + while (!comp->complete.load(std::memory_order_acquire)) { + seastar::sleep(std::chrono::microseconds(10)).get(); + } + return 0; +} + +int rbd_aio_is_complete(rbd_completion_t c) +{ + auto* comp = static_cast(c); + return comp && comp->complete.load(std::memory_order_acquire) ? 1 : 0; +} + +ssize_t rbd_aio_get_return_value(rbd_completion_t c) +{ + auto* comp = static_cast(c); + return comp ? comp->rval.load(std::memory_order_acquire) : -EINVAL; +} + +void* rbd_aio_get_arg(rbd_completion_t c) +{ + auto* comp = static_cast(c); + return comp ? comp->callback_arg : nullptr; +} + +void rbd_aio_release(rbd_completion_t c) +{ + delete static_cast(c); +} + +// --- aio read / write --- + +int rbd_aio_read(rbd_image_t image, uint64_t off, size_t len, char* buf, + rbd_completion_t c) +{ + if (!image || !buf || !c) return -EINVAL; + auto* img = static_cast(image); + auto* comp = static_cast(c); + if (!img->image || !comp) return -EINVAL; + + schedule_aio_read(img, off, len, buf, comp); + return 0; +} + +int rbd_aio_write(rbd_image_t image, uint64_t off, size_t len, const char* buf, + rbd_completion_t c) +{ + if (!image || !buf || !c) return -EINVAL; + auto* img = static_cast(image); + auto* comp = static_cast(c); + if (!img->image || !comp) return -EINVAL; + + schedule_aio_write(img, off, len, buf, comp); + return 0; +} + +int rbd_aio_readv(rbd_image_t image, const struct iovec* iov, int iovcnt, + uint64_t off, rbd_completion_t c) +{ + if (!image || !iov || iovcnt <= 0 || !c) return -EINVAL; + size_t total = 0; + for (int i = 0; i < iovcnt; ++i) { + total += iov[i].iov_len; + } + if (total == 0) return -EINVAL; + + schedule_aio_readv(static_cast(image), off, total, + iov, iovcnt, static_cast(c)); + return 0; +} + +int rbd_aio_writev(rbd_image_t image, const struct iovec* iov, int iovcnt, + uint64_t off, rbd_completion_t c) +{ + if (!image || !iov || iovcnt <= 0 || !c) return -EINVAL; + size_t total = 0; + for (int i = 0; i < iovcnt; ++i) { + total += iov[i].iov_len; + } + if (total == 0) return -EINVAL; + + auto buf = std::make_shared>(total); + size_t pos = 0; + for (int i = 0; i < iovcnt; ++i) { + memcpy(buf->data() + pos, iov[i].iov_base, iov[i].iov_len); + pos += iov[i].iov_len; + } + schedule_aio_write_with_buf(static_cast(image), off, total, + buf, static_cast(c)); + return 0; +} + +// --- aio discard / flush / write_zeroes (Phase 3) --- + +int rbd_aio_discard(rbd_image_t image, uint64_t off, uint64_t len, + rbd_completion_t c) +{ + if (!image || !c) return -EINVAL; + auto* img = static_cast(image); + auto* comp = static_cast(c); + if (!img->image || !comp) return -EINVAL; + + schedule_aio_discard(img, off, len, comp); + return 0; +} + +int rbd_aio_flush(rbd_image_t image, rbd_completion_t c) +{ + if (!image || !c) return -EINVAL; + auto* img = static_cast(image); + auto* comp = static_cast(c); + if (!img->image || !comp) return -EINVAL; + + schedule_aio_flush(img, comp); + return 0; +} + +int rbd_aio_write_zeroes(rbd_image_t image, uint64_t off, size_t len, + rbd_completion_t c) +{ + if (!image || !c) return -EINVAL; + auto* img = static_cast(image); + auto* comp = static_cast(c); + if (!img->image || !comp) return -EINVAL; + + schedule_aio_write_zeroes(img, off, len, comp); + return 0; +} + +#if defined(__linux__) +int rbd_aio_compare_and_writev(rbd_image_t image, uint64_t off, + const struct iovec* cmp_iov, int cmp_iovcnt, + const struct iovec* iov, int iovcnt, + rbd_completion_t c, uint64_t* mismatch_off) +{ + if (!image || !c) return -EINVAL; + auto* img = static_cast(image); + auto* comp = static_cast(c); + if (!img->image || !comp) return -EINVAL; + if (!cmp_iov || cmp_iovcnt < 1 || !iov || iovcnt < 1) return -EINVAL; + + schedule_aio_compare_and_writev(img, off, cmp_iov, cmp_iovcnt, + iov, iovcnt, comp, mismatch_off); + return 0; +} +#endif + +int rbd_flush(rbd_image_t image) +{ + if (!image) return -EINVAL; + (void)image; + return 0; // No client-side cache; writes go direct to OSD +} + +} // extern "C" + +namespace crimson::rbd { +namespace { + +void schedule_aio_read(crimson_rbd_image* img, uint64_t off, size_t len, + char* buf, crimson_rbd_completion* c) +{ + c->complete.store(false); // allow reuse for rbd_aio_wait_for_complete + if (len == 0) { + c->set_complete(0); + return; + } + if (off + len > img->image->get_size()) { + c->set_complete(-EINVAL); + return; + } + + const auto& image_id = img->image->get_image_id(); + auto& ioctx = img->image->get_ioctx(); + uint8_t order = img->image->get_order(); + + auto extents = extent_to_object_extents(off, len, order); + std::vector> futures; + for (const auto& ex : extents) { + std::string oid = data_object_name(image_id, ex.object_no); + futures.push_back(ioctx.read(oid, ex.object_off, ex.length)); + } + + (void)seastar::when_all_succeed(futures.begin(), futures.end()) + .then([buf, len, c](std::vector results) { + size_t offset = 0; + for (auto& bl : results) { + size_t n = std::min(static_cast(bl.length()), len - offset); + bl.begin().copy(n, buf + offset); + offset += n; + } + c->set_complete(static_cast(offset)); + }) + .handle_exception([c](std::exception_ptr e) { + try { + std::rethrow_exception(e); + } catch (const std::system_error& se) { + c->set_complete(se.code().value()); + } catch (...) { + c->set_complete(-EIO); + } + }); +} + +void schedule_aio_readv(crimson_rbd_image* img, uint64_t off, size_t len, + const struct iovec* iov, int iovcnt, crimson_rbd_completion* c) +{ + c->complete.store(false); + if (len == 0) { + c->set_complete(0); + return; + } + if (off + len > img->image->get_size()) { + c->set_complete(-EINVAL); + return; + } + + const auto& image_id = img->image->get_image_id(); + auto& ioctx = img->image->get_ioctx(); + uint8_t order = img->image->get_order(); + auto iov_copy = std::make_shared>(iov, iov + iovcnt); + + auto extents = extent_to_object_extents(off, len, order); + std::vector> futures; + for (const auto& ex : extents) { + std::string oid = data_object_name(image_id, ex.object_no); + futures.push_back(ioctx.read(oid, ex.object_off, ex.length)); + } + + (void)seastar::when_all_succeed(futures.begin(), futures.end()) + .then([c, iov_copy](std::vector results) { + size_t total_copied = 0; + size_t iov_idx = 0; + size_t iov_off = 0; + for (auto& bl : results) { + size_t src_off = 0; + size_t remain = bl.length(); + while (remain > 0 && iov_idx < iov_copy->size()) { + size_t space = (*iov_copy)[iov_idx].iov_len - iov_off; + size_t n = std::min(remain, space); + if (n > 0) { + bl.begin().copy(n, static_cast((*iov_copy)[iov_idx].iov_base) + iov_off); + total_copied += n; + src_off += n; + remain -= n; + iov_off += n; + if (iov_off >= (*iov_copy)[iov_idx].iov_len) { + iov_idx++; + iov_off = 0; + } + } + } + } + c->set_complete(static_cast(total_copied)); + }) + .handle_exception([c](std::exception_ptr e) { + try { + std::rethrow_exception(e); + } catch (const std::system_error& se) { + c->set_complete(se.code().value()); + } catch (...) { + c->set_complete(-EIO); + } + }); +} + +void schedule_aio_write(crimson_rbd_image* img, uint64_t off, size_t len, + const char* buf, crimson_rbd_completion* c) +{ + c->complete.store(false); + schedule_aio_write_with_buf(img, off, len, + std::make_shared>(buf, buf + len), c); +} + +void schedule_aio_write_with_buf(crimson_rbd_image* img, uint64_t off, size_t len, + std::shared_ptr> buf, + crimson_rbd_completion* c) +{ + c->complete.store(false); + if (img->image->is_read_only()) { + c->set_complete(-EACCES); + return; + } + if (len == 0) { + c->set_complete(0); + return; + } + if (off + len > img->image->get_size()) { + c->set_complete(-EINVAL); + return; + } + + const auto& image_id = img->image->get_image_id(); + auto& ioctx = img->image->get_ioctx(); + uint8_t order = img->image->get_order(); + + auto extents = extent_to_object_extents(off, len, order); + std::vector> futures; + size_t buf_offset = 0; + for (const auto& ex : extents) { + ceph::bufferlist bl; + bl.append(buf->data() + buf_offset, ex.length); + buf_offset += ex.length; + std::string oid = data_object_name(image_id, ex.object_no); + futures.push_back(ioctx.write(oid, ex.object_off, std::move(bl))); + } + + (void)seastar::when_all_succeed(futures.begin(), futures.end()) + .then([c]() { c->set_complete(0); }) + .handle_exception([c](std::exception_ptr e) { + try { + std::rethrow_exception(e); + } catch (const std::system_error& se) { + c->set_complete(se.code().value()); + } catch (...) { + c->set_complete(-EIO); + } + }); +} + +void schedule_aio_discard(crimson_rbd_image* img, uint64_t off, uint64_t len, + crimson_rbd_completion* c) +{ + c->complete.store(false); + if (img->image->is_read_only()) { + c->set_complete(-EACCES); + return; + } + if (len == 0) { + c->set_complete(0); + return; + } + if (off + len > img->image->get_size()) { + c->set_complete(-EINVAL); + return; + } + + const auto& image_id = img->image->get_image_id(); + auto& ioctx = img->image->get_ioctx(); + uint8_t order = img->image->get_order(); + + auto extents = extent_to_object_extents(off, len, order); + std::vector> futures; + for (const auto& ex : extents) { + std::string oid = data_object_name(image_id, ex.object_no); + futures.push_back(ioctx.discard(oid, ex.object_off, ex.length)); + } + + (void)seastar::when_all_succeed(futures.begin(), futures.end()) + .then([c]() { c->set_complete(0); }) + .handle_exception([c](std::exception_ptr e) { + try { + std::rethrow_exception(e); + } catch (const std::system_error& se) { + c->set_complete(se.code().value()); + } catch (...) { + c->set_complete(-EIO); + } + }); +} + +void schedule_aio_flush(crimson_rbd_image* img, crimson_rbd_completion* c) +{ + (void)img; + c->set_complete(0); // No client-side cache; no-op +} + +void schedule_aio_write_zeroes(crimson_rbd_image* img, uint64_t off, size_t len, + crimson_rbd_completion* c) +{ + c->complete.store(false); + if (img->image->is_read_only()) { + c->set_complete(-EACCES); + return; + } + if (len == 0) { + c->set_complete(0); + return; + } + if (off + len > img->image->get_size()) { + c->set_complete(-EINVAL); + return; + } + + const auto& image_id = img->image->get_image_id(); + auto& ioctx = img->image->get_ioctx(); + uint8_t order = img->image->get_order(); + + auto extents = extent_to_object_extents(off, len, order); + std::vector> futures; + for (const auto& ex : extents) { + std::string oid = data_object_name(image_id, ex.object_no); + futures.push_back(ioctx.write_zeroes(oid, ex.object_off, ex.length)); + } + + (void)seastar::when_all_succeed(futures.begin(), futures.end()) + .then([c]() { c->set_complete(0); }) + .handle_exception([c](std::exception_ptr e) { + try { + std::rethrow_exception(e); + } catch (const std::system_error& se) { + c->set_complete(se.code().value()); + } catch (...) { + c->set_complete(-EIO); + } + }); +} + +/// Copy len bytes from iovecs starting at byte offset start into bufferlist. +static ceph::bufferlist iovec_to_bufferlist( + const struct iovec* iov, int iovcnt, size_t start, size_t len) +{ + ceph::bufferlist bl; + size_t skip = start; + size_t remaining = len; + for (int i = 0; i < iovcnt && remaining > 0; ++i) { + size_t iov_len = iov[i].iov_len; + if (skip >= iov_len) { + skip -= iov_len; + continue; + } + size_t src_off = skip; + skip = 0; + size_t to_copy = std::min(iov_len - src_off, remaining); + bl.append(static_cast(iov[i].iov_base) + src_off, + static_cast(to_copy)); + remaining -= to_copy; + } + return bl; +} + +void schedule_aio_compare_and_writev(crimson_rbd_image* img, uint64_t off, + const struct iovec* cmp_iov, int cmp_iovcnt, + const struct iovec* iov, int iovcnt, + crimson_rbd_completion* c, uint64_t* mismatch_off) +{ + c->complete.store(false); + if (img->image->is_read_only()) { + c->set_complete(-EACCES); + return; + } + size_t cmp_len = 0, write_len = 0; + for (int i = 0; i < cmp_iovcnt; ++i) cmp_len += cmp_iov[i].iov_len; + for (int i = 0; i < iovcnt; ++i) write_len += iov[i].iov_len; + if (cmp_len != write_len || cmp_len == 0) { + c->set_complete(-EINVAL); + return; + } + size_t len = cmp_len; + if (off + len > img->image->get_size()) { + c->set_complete(-EINVAL); + return; + } + + const auto& image_id = img->image->get_image_id(); + auto& ioctx = img->image->get_ioctx(); + uint8_t order = img->image->get_order(); + auto extents = extent_to_object_extents(off, len, order); + + std::vector> futures; + size_t image_off = 0; + for (const auto& ex : extents) { + ceph::bufferlist cmp_bl = iovec_to_bufferlist( + cmp_iov, cmp_iovcnt, image_off, ex.length); + ceph::bufferlist write_bl = iovec_to_bufferlist( + iov, iovcnt, image_off, ex.length); + std::string oid = data_object_name(image_id, ex.object_no); + futures.push_back(ioctx.compare_and_write(oid, ex.object_off, + std::move(cmp_bl), ex.object_off, std::move(write_bl))); + image_off += ex.length; + } + + (void)seastar::when_all_succeed(futures.begin(), futures.end()) + .then([c]() { c->set_complete(0); }) + .handle_exception([c, mismatch_off](std::exception_ptr e) { + try { + std::rethrow_exception(e); + } catch (const std::system_error& se) { + int err = se.code().value(); + if (err == EILSEQ && mismatch_off) *mismatch_off = 0; + c->set_complete(err > 0 ? -err : err); + } catch (...) { + c->set_complete(-EIO); + } + }); +} + +} // namespace +} // namespace crimson::rbd diff --git a/src/crimson/rbd/api/metadata.cc b/src/crimson/rbd/api/metadata.cc new file mode 100644 index 00000000000..f6006b43cc7 --- /dev/null +++ b/src/crimson/rbd/api/metadata.cc @@ -0,0 +1,98 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- +// +// Phase 4: rbd_metadata_get, rbd_metadata_set, rbd_resize +// + +#include "crimson/rbd/api/crimson_rbd_impl.h" + +#include +#include + +#include "crimson/rbd/image_header.h" +#include "crimson/rbd/utils.h" +#include "rbd/rbd_crimson.h" + +extern "C" { + +int rbd_metadata_get(rbd_image_t image, const char* key, char* value, size_t* val_len) +{ + if (!image || !key || !val_len) return -EINVAL; + auto* img = static_cast(image); + if (!img || !img->image) return -EINVAL; + + try { + std::string header_oid = crimson::rbd::header_name(img->image->get_image_id()); + auto val = crimson::rbd::metadata_get( + img->image->get_ioctx(), header_oid, key).get(); + if (*val_len < val.size() + 1) { + *val_len = val.size() + 1; + return -ERANGE; + } + if (value) { + memcpy(value, val.c_str(), val.size() + 1); + } + return 0; + } catch (const std::system_error& e) { + return e.code().value() > 0 ? -e.code().value() : e.code().value(); + } catch (...) { + return -EIO; + } +} + +int rbd_metadata_set(rbd_image_t image, const char* key, const char* value) +{ + if (!image || !key || !value) return -EINVAL; + auto* img = static_cast(image); + if (!img || !img->image) return -EINVAL; + if (img->image->is_read_only()) return -EROFS; + + try { + std::string header_oid = crimson::rbd::header_name(img->image->get_image_id()); + crimson::rbd::metadata_set( + img->image->get_ioctx(), header_oid, key, value).get(); + return 0; + } catch (const std::system_error& e) { + return e.code().value() > 0 ? -e.code().value() : e.code().value(); + } catch (...) { + return -EIO; + } +} + +int rbd_resize(rbd_image_t image, uint64_t size) +{ + if (!image) return -EINVAL; + auto* img = static_cast(image); + if (!img || !img->image) return -EINVAL; + if (img->image->is_read_only()) return -EROFS; + + try { + std::string header_oid = crimson::rbd::header_name(img->image->get_image_id()); + crimson::rbd::set_size( + img->image->get_ioctx(), header_oid, size).get(); + img->image->set_size(size); + return 0; + } catch (const std::system_error& e) { + return e.code().value() > 0 ? -e.code().value() : e.code().value(); + } catch (...) { + return -EIO; + } +} + +int rbd_update_watch(rbd_image_t image, uint64_t* handle, + void (*callback)(void*), void* arg) +{ + (void)image; + (void)handle; + (void)callback; + (void)arg; + return -ENOSYS; // Phase 7: OSD watch not implemented +} + +int rbd_update_unwatch(rbd_image_t image, uint64_t handle) +{ + (void)image; + (void)handle; + return -ENOSYS; // Phase 7 +} + +} // extern "C" diff --git a/src/crimson/rbd/api/open.cc b/src/crimson/rbd/api/open.cc new file mode 100644 index 00000000000..24383a3f559 --- /dev/null +++ b/src/crimson/rbd/api/open.cc @@ -0,0 +1,179 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- +// +// C API implementations: rbd_open, rbd_close, rbd_stat, rbd_get_size +// and cluster/ioctx stubs (Phase 6 for full cluster bootstrap). +// + +#include "crimson/rbd/api/crimson_rbd_impl.h" +#include "crimson/rbd/api/crimson_rbd_internal.h" + +#include +#include +#include +#include + +#include "rbd/rbd_crimson.h" +#include "include/types.h" +#include "crimson/rbd/image_header.h" +#include "crimson/rbd/utils.h" + +extern "C" { + +using namespace crimson::rbd; + +// --- cluster (stub for Phase 1; Phase 6 will implement full bootstrap) --- + +int rbd_crimson_cluster_create(rbd_crimson_cluster_t* cluster) +{ + return crimson::rbd::create_cluster_impl(cluster); +} + +void rbd_crimson_cluster_destroy(rbd_crimson_cluster_t cluster) +{ + if (!cluster) return; + rbd_crimson_cluster_shutdown(cluster); + crimson::rbd::unregister_cluster_from_map(cluster); + crimson::rbd::destroy_cluster_impl(cluster); +} + +// rbd_crimson_cluster_connect, rbd_crimson_cluster_shutdown, +// rbd_crimson_cluster_wait_for_osdmap, rbd_crimson_ioctx_create: +// implemented in api/cluster.cc + +void rbd_crimson_ioctx_destroy(rbd_crimson_ioctx_t ioctx) +{ + delete static_cast(ioctx); +} + +// --- rbd_open / rbd_close / rbd_stat / rbd_get_size (Phase 1) --- + +static int do_open(crimson_rbd_ioctx* ioc, const char* name, + rbd_image_t* image, bool read_only) +{ + if (!ioc || !name || !image) return -EINVAL; + if (!name[0]) return -EINVAL; + + try { + auto img_id_fut = get_image_id(ioc->ioctx, name); + auto img_id = img_id_fut.get(); + if (img_id.empty()) return -ENOENT; + + std::string header_oid = header_name(img_id); + uint64_t snap_id = CEPH_NOSNAP; // Phase 1: no snapshots + + auto size_fut = crimson::rbd::get_size(ioc->ioctx, header_oid, snap_id); + auto [size, order] = size_fut.get(); + + auto feat_fut = crimson::rbd::get_features(ioc->ioctx, header_oid, read_only); + auto [features, incompatible] = feat_fut.get(); + (void)incompatible; + + auto img = std::make_unique( + ioc->ioctx, // copy; ioctx remains valid for future opens + std::move(img_id), + size, + order, + features, + read_only); + *image = new crimson_rbd_image(std::move(img)); + return 0; + } catch (const std::system_error& e) { + int err = e.code().value(); + return err > 0 ? -err : err; // C API uses negative errno on error + } catch (...) { + return -EIO; + } +} + +int rbd_open(rbd_crimson_ioctx_t ioctx, const char* name, + rbd_image_t* image, const char* snap_name) +{ + (void)snap_name; // Phase 1: no snapshots + return do_open(static_cast(ioctx), name, image, false); +} + +int rbd_open_read_only(rbd_crimson_ioctx_t ioctx, const char* name, + rbd_image_t* image, const char* snap_name) +{ + (void)snap_name; + return do_open(static_cast(ioctx), name, image, true); +} + +static int do_open_by_id(crimson_rbd_ioctx* ioc, const char* image_id, + rbd_image_t* image, bool read_only) +{ + if (!ioc || !image_id || !image || !image_id[0]) return -EINVAL; + std::string img_id(image_id); + + try { + std::string header_oid = header_name(img_id); + uint64_t snap_id = CEPH_NOSNAP; + auto size_fut = crimson::rbd::get_size(ioc->ioctx, header_oid, snap_id); + auto [size, order] = size_fut.get(); // throws if cls exec fails + auto feat_fut = crimson::rbd::get_features(ioc->ioctx, header_oid, read_only); + auto [features, incompatible] = feat_fut.get(); + (void)incompatible; + auto img = std::make_unique( + ioc->ioctx, std::move(img_id), size, order, features, read_only); + *image = new crimson_rbd_image(std::move(img)); + return 0; + } catch (const std::system_error& e) { + int err = e.code().value(); + return err > 0 ? -err : err; + } catch (...) { + return -EIO; + } +} + +int rbd_close(rbd_image_t image) +{ + if (!image) return -EINVAL; + delete static_cast(image); + return 0; +} + +int rbd_stat(rbd_image_t image, rbd_image_info_t* info, size_t infosize) +{ + if (!image || !info) return -EINVAL; + auto* img = static_cast(image); + if (!img->image) return -EINVAL; + + if (infosize < sizeof(rbd_image_info_t)) return -ERANGE; + + std::memset(info, 0, sizeof(*info)); + info->size = img->image->get_size(); + uint64_t obj_size = 1ULL << img->image->get_order(); + info->obj_size = obj_size; + info->num_objs = (img->image->get_size() + obj_size - 1) / obj_size; + info->order = static_cast(img->image->get_order()); + return 0; +} + +int rbd_get_size(rbd_image_t image, uint64_t* size) +{ + if (!image || !size) return -EINVAL; + auto* img = static_cast(image); + if (!img->image) return -EINVAL; + *size = img->image->get_size(); + return 0; +} + +} // extern "C" + +// --- Internal C++ API for integration tests (outside extern "C") --- + +namespace crimson::rbd { + +rbd_crimson_ioctx_t ioctx_create_from_iocontext(crimson::client::IoCtx ioctx) +{ + return new crimson_rbd_ioctx(std::move(ioctx)); +} + +int rbd_open_by_id(rbd_crimson_ioctx_t ioctx, const char* image_id, + rbd_image_t* image) +{ + return do_open_by_id(static_cast(ioctx), + image_id, image, false); +} + +} // namespace crimson::rbd diff --git a/src/crimson/rbd/cluster.cc b/src/crimson/rbd/cluster.cc new file mode 100644 index 00000000000..0e226d7a79d --- /dev/null +++ b/src/crimson/rbd/cluster.cc @@ -0,0 +1,45 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- + +#include "crimson/rbd/cluster.h" + +#include "crimson/client/rados_client.h" +#include "crimson/common/log.h" +#include "crimson/mon/MonClient.h" +#include "crimson/net/Messenger.h" + +namespace crimson::rbd { + +namespace { +seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_client); +} +} + +Cluster::Cluster(crimson::net::Messenger& msgr, + crimson::mon::Client& monc) + : msgr(msgr), monc(monc) +{ + rados = std::make_unique(msgr, monc); +} + +Cluster::~Cluster() = default; + +seastar::future<> Cluster::connect() +{ + logger().debug("Cluster::connect"); + return rados->connect(); +} + +seastar::future Cluster::create_ioctx( + std::string_view pool_name) +{ + return rados->create_ioctx(pool_name); +} + +seastar::future<> Cluster::shutdown() +{ + logger().debug("Cluster::shutdown"); + return rados->shutdown(); +} + +} // namespace crimson::rbd diff --git a/src/crimson/rbd/cluster.h b/src/crimson/rbd/cluster.h new file mode 100644 index 00000000000..e90101be521 --- /dev/null +++ b/src/crimson/rbd/cluster.h @@ -0,0 +1,45 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- + +#ifndef CEPH_CRIMSON_RBD_CLUSTER_H +#define CEPH_CRIMSON_RBD_CLUSTER_H + +#include +#include + +#include + +#include "crimson/client/io_context.h" +#include "crimson/client/rados_client.h" + +namespace crimson::mon { +class Client; +} +namespace crimson::net { +class Messenger; +} + +namespace crimson::rbd { + +/** + * Cluster - wraps Crimson RadosClient. + * Caller owns Messenger and MonClient; Cluster holds RadosClient. + */ +class Cluster { +public: + Cluster(crimson::net::Messenger& msgr, + crimson::mon::Client& monc); + ~Cluster(); + + seastar::future<> connect(); + seastar::future create_ioctx(std::string_view pool_name); + seastar::future<> shutdown(); + +private: + crimson::net::Messenger& msgr; + crimson::mon::Client& monc; + std::unique_ptr rados; +}; + +} // namespace crimson::rbd + +#endif diff --git a/src/crimson/rbd/completion.cc b/src/crimson/rbd/completion.cc new file mode 100644 index 00000000000..c85310f0f35 --- /dev/null +++ b/src/crimson/rbd/completion.cc @@ -0,0 +1,9 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- + +#include "crimson/rbd/completion.h" + +/* + * Phase 1: Completion is a minimal stub. + * Phase 2 will add rbd_aio_create_completion, rbd_aio_release, + * rbd_aio_get_arg, rbd_aio_get_return_value for async I/O. + */ diff --git a/src/crimson/rbd/completion.h b/src/crimson/rbd/completion.h new file mode 100644 index 00000000000..f00de08a1b8 --- /dev/null +++ b/src/crimson/rbd/completion.h @@ -0,0 +1,32 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- + +#ifndef CEPH_CRIMSON_RBD_COMPLETION_H +#define CEPH_CRIMSON_RBD_COMPLETION_H + +#include "rbd/rbd_crimson.h" + +#include +#include + +/** + * Internal completion structure. rbd_completion_t (void*) points to this. + * Used by rbd_aio_* for async I/O callbacks. + */ +struct crimson_rbd_completion { + void* callback_arg = nullptr; + rbd_callback_t complete_cb = nullptr; + std::atomic rval{0}; + std::atomic complete{false}; + + crimson_rbd_completion() = default; + + void set_complete(int ret) { + rval.store(ret); + complete.store(true); + if (complete_cb) { + complete_cb(static_cast(this), callback_arg); + } + } +}; + +#endif diff --git a/src/crimson/rbd/image.cc b/src/crimson/rbd/image.cc new file mode 100644 index 00000000000..9bf9d9523ef --- /dev/null +++ b/src/crimson/rbd/image.cc @@ -0,0 +1,23 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- + +#include "crimson/rbd/image.h" + +#include "crimson/client/io_context.h" + +namespace crimson::rbd { + +Image::Image(crimson::client::IoCtx ioctx, + std::string image_id, + uint64_t size, + uint8_t order, + uint64_t features, + bool read_only) + : ioctx(std::move(ioctx)), + image_id(std::move(image_id)), + size(size), + order(order), + features(features), + read_only(read_only) +{} + +} // namespace crimson::rbd diff --git a/src/crimson/rbd/image.h b/src/crimson/rbd/image.h new file mode 100644 index 00000000000..72fcdd880bf --- /dev/null +++ b/src/crimson/rbd/image.h @@ -0,0 +1,46 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- + +#ifndef CEPH_CRIMSON_RBD_IMAGE_H +#define CEPH_CRIMSON_RBD_IMAGE_H + +#include +#include +#include + +#include "crimson/client/io_context.h" + +namespace crimson::rbd { + +/** + * Image - RBD image handle. + * Holds image_id, size, order, features. Phase 1: read-only, no I/O. + */ +class Image { +public: + Image(crimson::client::IoCtx ioctx, + std::string image_id, + uint64_t size, + uint8_t order, + uint64_t features, + bool read_only); + + const std::string& get_image_id() const { return image_id; } + uint64_t get_size() const { return size; } + void set_size(uint64_t s) { size = s; } // Phase 4: after resize + uint8_t get_order() const { return order; } + uint64_t get_features() const { return features; } + bool is_read_only() const { return read_only; } + crimson::client::IoCtx& get_ioctx() { return ioctx; } + +private: + crimson::client::IoCtx ioctx; + std::string image_id; + uint64_t size; + uint8_t order; + uint64_t features; + bool read_only; +}; + +} // namespace crimson::rbd + +#endif diff --git a/src/crimson/rbd/image_header.cc b/src/crimson/rbd/image_header.cc new file mode 100644 index 00000000000..248ccc9e709 --- /dev/null +++ b/src/crimson/rbd/image_header.cc @@ -0,0 +1,157 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- + +#include "crimson/rbd/image_header.h" + +#include + +#include "crimson/client/io_context.h" +#include "crimson/rbd/utils.h" +#include "include/rbd_types.h" +#include "common/snap_types.h" +#include "include/encoding.h" + +namespace crimson::rbd { + +seastar::future> get_size( + crimson::client::IoCtx& ioctx, + const std::string& header_oid, + uint64_t snap_id) +{ + ceph::bufferlist in; + ceph::encode(static_cast(snap_id), in); + + return ioctx.exec(header_oid, "rbd", "get_size", std::move(in)) + .then([](ceph::bufferlist out) { + uint8_t order = 0; + uint64_t size = 0; + try { + auto p = out.cbegin(); + ceph::decode(order, p); + ceph::decode(size, p); + } catch (const ceph::buffer::error&) { + return seastar::make_exception_future>( + std::system_error(EIO, std::generic_category(), "get_size decode")); + } + return seastar::make_ready_future>( + std::make_pair(size, order)); + }); +} + +seastar::future> get_features( + crimson::client::IoCtx& ioctx, + const std::string& header_oid, + bool read_only) +{ + ceph::bufferlist in; + ceph::encode(static_cast(CEPH_NOSNAP), in); + ceph::encode(read_only, in); + + return ioctx.exec(header_oid, "rbd", "get_features", std::move(in)) + .then([](ceph::bufferlist out) { + uint64_t features = 0; + uint64_t incompatible = 0; + try { + auto p = out.cbegin(); + ceph::decode(features, p); + ceph::decode(incompatible, p); + } catch (const ceph::buffer::error&) { + return seastar::make_exception_future>( + std::system_error(EIO, std::generic_category(), "get_features decode")); + } + return seastar::make_ready_future>( + std::make_pair(features, incompatible)); + }); +} + +namespace { +seastar::future decode_image_id(ceph::bufferlist out) { + std::string id; + try { + auto p = out.cbegin(); + ceph::decode(id, p); + } catch (const ceph::buffer::error&) { + return seastar::make_exception_future( + std::system_error(ENOENT, std::generic_category(), "get_image_id")); + } + return seastar::make_ready_future(std::move(id)); +} +} // namespace + +seastar::future get_image_id( + crimson::client::IoCtx& ioctx, + const std::string& image_name) +{ + // Prefer dir_get_id on rbd_directory (same as librbd). Fall back to + // get_id on rbd_id. when dir_get_id returns ENOENT (e.g. minimal + // vstart setups or alternate layout). + ceph::bufferlist in; + ceph::encode(image_name, in); + return ioctx.exec(RBD_DIRECTORY, "rbd", "dir_get_id", std::move(in)) + .then([](ceph::bufferlist out) { return decode_image_id(std::move(out)); }) + .handle_exception([&ioctx, image_name](std::exception_ptr e) { + try { + std::rethrow_exception(e); + } catch (const std::system_error& err) { + if (err.code().value() != static_cast(ENOENT)) { + return seastar::make_exception_future(e); + } + } catch (...) { + return seastar::make_exception_future(e); + } + const std::string id_oid = std::string(RBD_ID_PREFIX) + image_name; + ceph::bufferlist fallback_in; + return ioctx.exec(id_oid, "rbd", "get_id", std::move(fallback_in)) + .then([](ceph::bufferlist out) { return decode_image_id(std::move(out)); }); + }); +} + +seastar::future metadata_get( + crimson::client::IoCtx& ioctx, + const std::string& header_oid, + const std::string& key) +{ + ceph::bufferlist in; + ceph::encode(key, in); + return ioctx.exec(header_oid, "rbd", "metadata_get", std::move(in)) + .then([](ceph::bufferlist out) { + std::string value; + try { + auto p = out.cbegin(); + ceph::decode(value, p); + } catch (const ceph::buffer::error&) { + return seastar::make_exception_future( + std::system_error(ENOENT, std::generic_category(), "metadata_get")); + } + return seastar::make_ready_future(std::move(value)); + }); +} + +seastar::future<> metadata_set( + crimson::client::IoCtx& ioctx, + const std::string& header_oid, + const std::string& key, + const std::string& value) +{ + std::map data; + ceph::bufferlist bl; + ceph::encode(value, bl); + data[key] = std::move(bl); + + ceph::bufferlist in; + ceph::encode(data, in); + return ioctx.exec(header_oid, "rbd", "metadata_set", std::move(in)) + .then([](ceph::bufferlist) { return seastar::make_ready_future<>(); }); +} + +seastar::future<> set_size( + crimson::client::IoCtx& ioctx, + const std::string& header_oid, + uint64_t size) +{ + ceph::bufferlist in; + ceph::encode(size, in); + return ioctx.exec(header_oid, "rbd", "set_size", std::move(in)) + .then([](ceph::bufferlist) { return seastar::make_ready_future<>(); }); +} + +} // namespace crimson::rbd diff --git a/src/crimson/rbd/image_header.h b/src/crimson/rbd/image_header.h new file mode 100644 index 00000000000..51a15716fa4 --- /dev/null +++ b/src/crimson/rbd/image_header.h @@ -0,0 +1,55 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- + +#ifndef CEPH_CRIMSON_RBD_IMAGE_HEADER_H +#define CEPH_CRIMSON_RBD_IMAGE_HEADER_H + +#include +#include + +#include + +namespace crimson::client { +class IoCtx; +} + +namespace crimson::rbd { + +/// Read RBD header via cls/rbd. Uses IoCtx::exec for get_size, get_features. + +seastar::future> get_size( + crimson::client::IoCtx& ioctx, + const std::string& header_oid, + uint64_t snap_id); + +seastar::future> get_features( + crimson::client::IoCtx& ioctx, + const std::string& header_oid, + bool read_only); + +/// Resolve image name to image_id via rbd_directory (cls dir_get_id). +seastar::future get_image_id( + crimson::client::IoCtx& ioctx, + const std::string& image_name); + +/// Read metadata key from header object (cls metadata_get). +seastar::future metadata_get( + crimson::client::IoCtx& ioctx, + const std::string& header_oid, + const std::string& key); + +/// Write metadata key-value to header object (cls metadata_set). +seastar::future<> metadata_set( + crimson::client::IoCtx& ioctx, + const std::string& header_oid, + const std::string& key, + const std::string& value); + +/// Update image size in header (cls set_size). +seastar::future<> set_size( + crimson::client::IoCtx& ioctx, + const std::string& header_oid, + uint64_t size); + +} // namespace crimson::rbd + +#endif diff --git a/src/crimson/rbd/utils.cc b/src/crimson/rbd/utils.cc new file mode 100644 index 00000000000..2f8c0e87e69 --- /dev/null +++ b/src/crimson/rbd/utils.cc @@ -0,0 +1,50 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- + +#include "crimson/rbd/utils.h" + +#include "include/rbd_types.h" +#include +#include + +namespace crimson::rbd { + +std::string header_name(const std::string& image_id) +{ + return std::string(RBD_HEADER_PREFIX) + image_id; +} + +std::string data_object_name(const std::string& image_id, uint64_t object_no) +{ + char buf[RBD_MAX_OBJ_NAME_SIZE]; + int n = snprintf(buf, sizeof(buf), "%s%s.%016llx", + RBD_DATA_PREFIX, image_id.c_str(), + (unsigned long long)object_no); + if (n < 0 || static_cast(n) >= sizeof(buf)) { + return {}; + } + return std::string(buf); +} + +std::vector extent_to_object_extents( + uint64_t offset, uint64_t length, uint8_t order) +{ + std::vector extents; + if (length == 0) return extents; + + const uint64_t object_size = 1ULL << order; + uint64_t off = offset; + uint64_t remaining = length; + + while (remaining > 0) { + uint64_t object_no = off / object_size; + uint64_t object_off = off % object_size; + uint64_t chunk = std::min(remaining, object_size - object_off); + + extents.push_back({object_no, object_off, chunk}); + off += chunk; + remaining -= chunk; + } + return extents; +} + +} // namespace crimson::rbd diff --git a/src/crimson/rbd/utils.h b/src/crimson/rbd/utils.h new file mode 100644 index 00000000000..1edf9f9cb3d --- /dev/null +++ b/src/crimson/rbd/utils.h @@ -0,0 +1,32 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- + +#ifndef CEPH_CRIMSON_RBD_UTILS_H +#define CEPH_CRIMSON_RBD_UTILS_H + +#include +#include +#include + +namespace crimson::rbd { + +/// RBD header object name: rbd_header. +std::string header_name(const std::string& image_id); + +/// RBD data object name: rbd_data.. +std::string data_object_name(const std::string& image_id, uint64_t object_no); + +/// Object extent: (object_no, object_off, length) for block-to-object mapping. +struct ObjectExtent { + uint64_t object_no; + uint64_t object_off; + uint64_t length; +}; + +/// Map image (offset, length) to object extents. Simple layout: object_size = 1< extent_to_object_extents( + uint64_t offset, uint64_t length, uint8_t order); + +} // namespace crimson::rbd + +#endif diff --git a/src/crimson/tools/CMakeLists.txt b/src/crimson/tools/CMakeLists.txt index ae7b493d42f..dcfba0f7d3f 100644 --- a/src/crimson/tools/CMakeLists.txt +++ b/src/crimson/tools/CMakeLists.txt @@ -32,6 +32,16 @@ target_link_libraries(crimson-rados-demo ${FMT_LIB}) install(TARGETS crimson-rados-demo DESTINATION bin) +add_executable(crimson-rbd-demo rbd_demo.cc) +target_link_libraries(crimson-rbd-demo + crimson-main-config-bootstrap + crimson + librbd_crimson + crimson-common + legacy-option-headers + ${FMT_LIB}) +install(TARGETS crimson-rbd-demo DESTINATION bin) + add_executable(perf-async-msgr perf_async_msgr.cc) target_link_libraries(perf-async-msgr ceph-common global ${ALLOC_LIBS}) diff --git a/src/crimson/tools/rbd_demo.cc b/src/crimson/tools/rbd_demo.cc new file mode 100644 index 00000000000..620ea3245dd --- /dev/null +++ b/src/crimson/tools/rbd_demo.cc @@ -0,0 +1,348 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- +// vim: ts=8 sw=2 sts=2 expandtab +// +// Integration test for librbd_crimson: connect, rbd_open, rbd_stat, +// rbd_aio_read/write, rbd_close, rbd_metadata_get/set, rbd_resize. +// Two bootstrap paths: C++ (default) or C API (--use-c-api). +// +// Usage: crimson-rbd-demo -c ceph.conf -n client.admin --pool rbd --image testimg +// crimson-rbd-demo --use-c-api -c ceph.conf -n client.admin --pool rbd --image testimg +// +// Prerequisites: Create RBD image first: +// rbd create -s 1M testimg # or: rbd create -p rbd -s 1M testimg +// + +#include +#include + +#include +#include +#include +#include + +#include "auth/KeyRing.h" +#include "common/entity_name.h" +#include "crimson/client/io_context.h" +#include "crimson/client/rados_client.h" +#include "crimson/osdc/objecter.h" +#include "crimson/common/config_proxy.h" +#include "crimson/common/fatal_signal.h" +#include "crimson/common/log.h" +#include "crimson/common/perf_counters_collection.h" +#include "crimson/mon/MonClient.h" +#include "crimson/net/Messenger.h" +#include "crimson/osd/main_config_bootstrap_helpers.h" +#include "crimson/rbd/api/crimson_rbd_internal.h" +#include "msg/msg_types.h" +#include "rbd/rbd_crimson.h" + +namespace bpo = boost::program_options; + +static seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_client); +} + +int main(int argc, const char* argv[]) +{ + auto early_result = crimson::osd::get_early_config_client(argc, argv); + if (!early_result.has_value()) { + std::cerr << "get_early_config_client failed: " << early_result.error() + << std::endl; + return early_result.error(); + } + auto& early_config = early_result.value(); + + seastar::app_template::config app_cfg; + app_cfg.name = "crimson-rbd-demo"; + app_cfg.auto_handle_sigint_sigterm = false; + seastar::app_template app(std::move(app_cfg)); + app.add_options() + ("pool", bpo::value()->default_value("rbd"), + "pool name") + ("image", bpo::value()->default_value("testimg"), + "RBD image name (create with: rbd create -s 1M )") + ("image-id", bpo::value(), + "RBD image id (optional; skips name lookup when get_image_id fails)") + ("use-c-api", "use C API bootstrap (rbd_crimson_cluster_*) instead of C++") + ("debug", "enable debug logging"); + + try { + return app.run( + early_config.get_early_args().size(), + const_cast(early_config.get_early_args().data()), + [&] { + auto& config = app.configuration(); + auto config_proxy_args = early_config.ceph_args; + return seastar::async([config_proxy_args, &config, &early_config] { + try { + FatalSignal fatal_signal; + if (config.count("debug")) { + seastar::global_logger_registry().set_all_loggers_level( + seastar::log_level::debug); + } + + const auto pool_name = config["pool"].as(); + const auto image_name = config["image"].as(); + const bool use_c_api = config.count("use-c-api") != 0; + const bool has_image_id = config.count("image-id") != 0; + const auto image_id_opt = has_image_id + ? std::optional(config["image-id"].as()) + : std::optional(); + + // Lambda to run RBD operations (shared by C++ and C API paths) + auto run_rbd_ops = [&](rbd_crimson_ioctx_t rbd_ioctx, + std::function cleanup_ioctx) { + rbd_image_t image = nullptr; + int r; + if (image_id_opt) { + r = crimson::rbd::rbd_open_by_id(rbd_ioctx, image_id_opt->c_str(), + &image); + if (r < 0) { + logger().error("rbd_open_by_id({}) failed: {}", + *image_id_opt, r); + return EXIT_FAILURE; + } + } else { + r = rbd_open(rbd_ioctx, image_name.c_str(), &image, nullptr); + if (r < 0) { + logger().error("rbd_open({}) failed: {}", image_name, r); + return EXIT_FAILURE; + } + } + auto close_image = seastar::defer([&] { + rbd_close(image); + }); + + rbd_image_info_t info; + r = rbd_stat(image, &info, sizeof(info)); + if (r < 0) { + logger().error("rbd_stat failed: {}", r); + return EXIT_FAILURE; + } + logger().info("rbd_stat: size={} order={} num_objs={}", + info.size, info.order, info.num_objs); + + uint64_t size = 0; + r = rbd_get_size(image, &size); + if (r < 0) { + logger().error("rbd_get_size failed: {}", r); + return EXIT_FAILURE; + } + logger().info("rbd_get_size: {}", size); + + // Phase 4: metadata_get, metadata_set, resize + char meta_val[256]; + size_t meta_len = sizeof(meta_val); + r = rbd_metadata_get(image, "test_key", meta_val, &meta_len); + if (r == 0) { + logger().info("rbd_metadata_get test_key: {}", meta_val); + } else if (r != -ENOENT && r != -ERANGE) { + logger().warn("rbd_metadata_get test_key: {} (not fatal)", r); + } + r = rbd_metadata_set(image, "test_key", "test_value"); + if (r < 0) { + logger().warn("rbd_metadata_set failed: {} (not fatal)", r); + } else { + meta_len = sizeof(meta_val); + r = rbd_metadata_get(image, "test_key", meta_val, &meta_len); + if (r == 0) { + logger().info("rbd_metadata_get after set: {}", meta_val); + } + } + uint64_t orig_size = size; + if (size >= 8192) { + r = rbd_resize(image, size); // no-op same size + if (r < 0) logger().warn("rbd_resize same: {} (not fatal)", r); + } + r = rbd_resize(image, size + 4096); + if (r < 0) { + logger().warn("rbd_resize +4096 failed: {} (not fatal)", r); + } else { + r = rbd_get_size(image, &size); + if (r == 0) logger().info("rbd_resize ok, new size: {}", size); + r = rbd_resize(image, orig_size); // shrink back + if (r < 0) logger().warn("rbd_resize shrink: {} (not fatal)", r); + } + + // Async write and read + constexpr size_t io_len = 4096; + std::vector write_buf(io_len, 'x'); + std::vector read_buf(io_len, 0); + + rbd_completion_t comp = nullptr; + r = rbd_aio_create_completion(nullptr, + [](rbd_completion_t, void*) {}, &comp); + if (r < 0) { + logger().error("rbd_aio_create_completion failed: {}", r); + return EXIT_FAILURE; + } + auto release_comp = seastar::defer([&] { rbd_aio_release(comp); }); + + r = rbd_aio_write(image, 0, io_len, write_buf.data(), comp); + if (r < 0) { + logger().error("rbd_aio_write failed: {}", r); + return EXIT_FAILURE; + } + rbd_aio_wait_for_complete(comp); + ssize_t ret = rbd_aio_get_return_value(comp); + if (ret < 0) { + logger().error("rbd_aio_write completion: {}", static_cast(ret)); + return EXIT_FAILURE; + } + logger().info("rbd_aio_write 0-{} ok", io_len); + + r = rbd_aio_read(image, 0, io_len, read_buf.data(), comp); + if (r < 0) { + logger().error("rbd_aio_read failed: {}", r); + return EXIT_FAILURE; + } + rbd_aio_wait_for_complete(comp); + ret = rbd_aio_get_return_value(comp); + if (ret < 0) { + logger().error("rbd_aio_read completion: {}", static_cast(ret)); + return EXIT_FAILURE; + } + logger().info("rbd_aio_read 0-{} ok, ret={}", io_len, ret); + + if (memcmp(write_buf.data(), read_buf.data(), io_len) != 0) { + logger().error("read mismatch"); + return EXIT_FAILURE; + } + logger().info("read/write verify ok"); + cleanup_ioctx(); + return EXIT_SUCCESS; + }; + + if (use_c_api) { + // C API bootstrap path (standalone; see --use-c-api) + const char* config_opts_raw[] = { + "config_file", early_config.conf_file_list.c_str(), + "name", early_config.init_params.name.to_cstr(), + "cluster", early_config.cluster_name.c_str(), + "keyring", "keyring", // match C++ path; vstart uses keyring in build dir + nullptr + }; + std::vector config_opts; + for (size_t i = 0; config_opts_raw[i]; i += 2) { + if (config_opts_raw[i + 1] && strlen(config_opts_raw[i + 1]) > 0) { + config_opts.push_back(config_opts_raw[i]); + config_opts.push_back(config_opts_raw[i + 1]); + } + } + if (config_opts.empty() || early_config.conf_file_list.empty()) { + logger().error("C API path needs -c config_file and -n name"); + return EXIT_FAILURE; + } + + rbd_crimson_cluster_t cluster = nullptr; + int r = rbd_crimson_cluster_create(&cluster); + if (r < 0 || !cluster) { + logger().error("rbd_crimson_cluster_create failed: {}", r); + return EXIT_FAILURE; + } + auto destroy_cluster = seastar::defer([&] { + rbd_crimson_cluster_shutdown(cluster); + rbd_crimson_cluster_destroy(cluster); + }); + + r = rbd_crimson_cluster_connect(cluster, + config_opts.data(), config_opts.size()); + if (r < 0) { + logger().error("rbd_crimson_cluster_connect failed: {}", r); + return EXIT_FAILURE; + } + + rbd_crimson_ioctx_t rbd_ioctx = nullptr; + r = rbd_crimson_ioctx_create(cluster, pool_name.c_str(), &rbd_ioctx); + if (r < 0 || !rbd_ioctx) { + logger().error("rbd_crimson_ioctx_create failed: {}", r); + return EXIT_FAILURE; + } + auto destroy_ioctx = seastar::defer([&] { + rbd_crimson_ioctx_destroy(rbd_ioctx); + }); + + int result = run_rbd_ops(rbd_ioctx, [&] { + rbd_crimson_ioctx_destroy(rbd_ioctx); + destroy_ioctx.cancel(); + }); + if (result != EXIT_SUCCESS) return result; + logger().info("crimson-rbd-demo completed successfully"); + return EXIT_SUCCESS; + } + + // C++ bootstrap path + crimson::common::sharded_conf().start( + early_config.init_params.name, + early_config.cluster_name).get(); + crimson::common::local_conf().start().get(); + auto stop_conf = seastar::deferred_stop( + crimson::common::sharded_conf()); + crimson::common::sharded_perf_coll().start().get(); + auto stop_perf = seastar::deferred_stop( + crimson::common::sharded_perf_coll()); + + crimson::common::local_conf().parse_config_files( + early_config.conf_file_list).get(); + crimson::common::local_conf().parse_env().get(); + crimson::common::local_conf().parse_argv( + config_proxy_args).get(); + + crimson::osd::populate_config_from_mon().get(); + + class DemoAuthHandler : public crimson::common::AuthHandler { + public: + void handle_authentication(const EntityName& name, + const AuthCapsInfo& caps) override {} + }; + auto auth_handler = std::make_unique(); + auto msgr = crimson::net::Messenger::create( + entity_name_t(early_config.init_params.name.get_type(), -1), + "rbd_demo", + crimson::osd::get_nonce(), + true); + crimson::mon::Client monc(*msgr, *auth_handler); + msgr->set_auth_client(&monc); + msgr->set_auth_server(&monc); + + crimson::client::RadosClient rados(*msgr, monc); + crimson::net::dispatchers_t dispatchers; + dispatchers.push_back(&monc); + dispatchers.push_back(&rados.get_objecter()); + msgr->start(dispatchers).get(); + auto stop_msgr = seastar::defer([&] { + msgr->stop(); + msgr->shutdown().get(); + }); + + monc.start().get(); + auto stop_monc = seastar::defer([&] { monc.stop().get(); }); + + rados.get_objecter().set_client_incarnation( + static_cast(crimson::osd::get_nonce() & 0x7fffffff)); + rados.connect().get(); + auto ioctx = rados.create_ioctx(pool_name).get(); + + // Wrap IoCtx for librbd_crimson C API + auto rbd_ioctx = crimson::rbd::ioctx_create_from_iocontext( + std::move(ioctx)); + auto destroy_ioctx = seastar::defer( + [&] { rbd_crimson_ioctx_destroy(rbd_ioctx); }); + + int result = run_rbd_ops(rbd_ioctx, []{}); + if (result != EXIT_SUCCESS) return result; + + rados.shutdown().get(); + logger().info("crimson-rbd-demo completed successfully"); + return EXIT_SUCCESS; + } catch (const std::exception& e) { + logger().error("crimson-rbd-demo failed: {}", e.what()); + return EXIT_FAILURE; + } + }); + }); + } catch (const std::exception& e) { + std::cerr << "FATAL: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/src/include/CMakeLists.txt b/src/include/CMakeLists.txt index cb9c2fea8f8..0b6d9ae084c 100644 --- a/src/include/CMakeLists.txt +++ b/src/include/CMakeLists.txt @@ -30,6 +30,12 @@ if(WITH_RBD) DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/rbd) endif() +if(WITH_CRIMSON) + install(FILES + rbd/rbd_crimson.h + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/rbd) +endif() + if(WITH_RADOSGW) install(FILES rados/librgw.h diff --git a/src/include/rbd/rbd_crimson.h b/src/include/rbd/rbd_crimson.h new file mode 100644 index 00000000000..b16b5fba234 --- /dev/null +++ b/src/include/rbd/rbd_crimson.h @@ -0,0 +1,140 @@ +// -*- mode:C; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- +// vim: ts=8 sw=2 sts=2 expandtab +// +// librbd_crimson C API - compatible with rbd/librbd.h +// Uses Crimson Objecter; no librados dependency. + +#ifndef CEPH_RBD_CRIMSON_H +#define CEPH_RBD_CRIMSON_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include + +#if defined(__linux__) +#include +#endif + +/* Opaque handles - same as librbd for ABI compatibility */ +typedef void *rbd_image_t; +typedef void *rbd_completion_t; +typedef void (*rbd_callback_t)(rbd_completion_t cb, void *arg); + +/* Crimson-specific: cluster and ioctx (replace rados_* for now) */ +typedef void *rbd_crimson_cluster_t; +typedef void *rbd_crimson_ioctx_t; + +/* rbd_stat / rbd_image_info_t equivalent */ +typedef struct { + uint64_t size; + uint64_t obj_size; + uint64_t num_objs; + int order; + char block_name_prefix[24]; + int64_t parent_pool; + char parent_name[96]; +} rbd_image_info_t; + +int rbd_crimson_cluster_create(rbd_crimson_cluster_t *cluster); +void rbd_crimson_cluster_destroy(rbd_crimson_cluster_t cluster); +int rbd_crimson_cluster_connect(rbd_crimson_cluster_t cluster, + const char *const *config_options, + size_t num_config_options); +void rbd_crimson_cluster_shutdown(rbd_crimson_cluster_t cluster); + +/* Phase 6.2: Look up cluster by name (from connect with cluster config option). */ +int rbd_crimson_cluster_get(const char *cluster_name, + rbd_crimson_cluster_t *cluster); + +/* Get cluster addresses (nonce) for rbd_register_cluster. Caller frees *addrs. */ +int rbd_crimson_cluster_getaddrs(rbd_crimson_cluster_t cluster, char **addrs); + +/* Wait for OSDMap before issuing I/O. Optional; create_ioctx also waits. */ +int rbd_crimson_cluster_wait_for_osdmap(rbd_crimson_cluster_t cluster); + +int rbd_crimson_ioctx_create(rbd_crimson_cluster_t cluster, + const char *pool_name, + rbd_crimson_ioctx_t *ioctx); +void rbd_crimson_ioctx_destroy(rbd_crimson_ioctx_t ioctx); + +int rbd_open(rbd_crimson_ioctx_t ioctx, const char *name, + rbd_image_t *image, const char *snap_name); +int rbd_open_read_only(rbd_crimson_ioctx_t ioctx, const char *name, + rbd_image_t *image, const char *snap_name); +int rbd_close(rbd_image_t image); + +int rbd_stat(rbd_image_t image, rbd_image_info_t *info, size_t infosize); +int rbd_get_size(rbd_image_t image, uint64_t *size); + +/* Phase 4: Metadata and resize */ +int rbd_metadata_get(rbd_image_t image, const char *key, char *value, size_t *val_len); +int rbd_metadata_set(rbd_image_t image, const char *key, const char *value); +int rbd_resize(rbd_image_t image, uint64_t size); + +/* Watch (Phase 7 — stubbed, returns -ENOSYS) */ +int rbd_update_watch(rbd_image_t image, uint64_t *handle, + void (*callback)(void *arg), void *arg); +int rbd_update_unwatch(rbd_image_t image, uint64_t handle); + +/* Async I/O (Phase 2) */ +int rbd_aio_read(rbd_image_t image, uint64_t off, size_t len, char *buf, + rbd_completion_t c); +int rbd_aio_readv(rbd_image_t image, const struct iovec *iov, int iovcnt, + uint64_t off, rbd_completion_t c); +int rbd_aio_write(rbd_image_t image, uint64_t off, size_t len, const char *buf, + rbd_completion_t c); +int rbd_aio_writev(rbd_image_t image, const struct iovec *iov, int iovcnt, + uint64_t off, rbd_completion_t c); +int rbd_aio_discard(rbd_image_t image, uint64_t off, uint64_t len, + rbd_completion_t c); +int rbd_aio_flush(rbd_image_t image, rbd_completion_t c); +int rbd_aio_write_zeroes(rbd_image_t image, uint64_t off, size_t len, + rbd_completion_t c); +#if defined(__linux__) +int rbd_aio_compare_and_writev(rbd_image_t image, uint64_t off, + const struct iovec *cmp_iov, int cmp_iovcnt, + const struct iovec *iov, int iovcnt, + rbd_completion_t c, uint64_t *mismatch_off); +#endif + +int rbd_flush(rbd_image_t image); + +int rbd_aio_create_completion(void *cb_arg, rbd_callback_t complete_cb, + rbd_completion_t *c); +int rbd_aio_wait_for_complete(rbd_completion_t c); +int rbd_aio_is_complete(rbd_completion_t c); +ssize_t rbd_aio_get_return_value(rbd_completion_t c); +void *rbd_aio_get_arg(rbd_completion_t c); +void rbd_aio_release(rbd_completion_t c); + +/* Phase A1: External-thread integration for SPDK reactor + * When SPDK hosts the threads, each reactor thread runs a Seastar reactor. + * Call these from SPDK reactor loop. Part of librbd_crimson; no direct + * SPDK->Seastar dependency. Build ceph-nvmeof against librbd_crimson RPM. + */ + +/** Configure Seastar for external-thread mode. Call once before spdk_reactors_start(). */ +int rbd_crimson_configure_external_threads(unsigned core_count); + +/** Register a Seastar reactor on the current thread. Call once per SPDK reactor thread. */ +void rbd_crimson_register_reactor(unsigned shard_id); + +/** Advance the Seastar reactor one tick. Call once per reactor loop iteration. Returns 1 if more work, 0 if stopped. */ +int rbd_crimson_run_one_tick(void); + +/** Clean up the Seastar reactor on the current thread. Call when SPDK reactor thread exits. */ +void rbd_crimson_reactor_cleanup(void); + +/** Clean up all Seastar resources. Call from main thread after spdk_reactors_fini. */ +void rbd_crimson_cleanup_all(void); + +#ifdef __cplusplus +} +#endif + +#endif /* CEPH_RBD_CRIMSON_H */ diff --git a/src/seastar b/src/seastar index 7347cf6f4f9..6e70097b40d 160000 --- a/src/seastar +++ b/src/seastar @@ -1 +1 @@ -Subproject commit 7347cf6f4f966929d5dc5b3fd7e34d771c9b3f85 +Subproject commit 6e70097b40d2673fc9ca86b16ebd564e300e4592 diff --git a/src/test/crimson/CMakeLists.txt b/src/test/crimson/CMakeLists.txt index 5c2c2384950..e0336e0448c 100644 --- a/src/test/crimson/CMakeLists.txt +++ b/src/test/crimson/CMakeLists.txt @@ -159,3 +159,11 @@ add_ceph_test(unittest-crimson-objecter target_link_libraries(unittest-crimson-objecter crimson crimson::gtest) + +add_executable(unittest-crimson-rbd + test_rbd.cc) +add_ceph_unittest(unittest-crimson-rbd + --memory 256M --smp 1) +target_link_libraries(unittest-crimson-rbd + librbd_crimson + GTest::Main) diff --git a/src/test/crimson/test_rbd.cc b/src/test/crimson/test_rbd.cc new file mode 100644 index 00000000000..8a37acdafd4 --- /dev/null +++ b/src/test/crimson/test_rbd.cc @@ -0,0 +1,81 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- +// vim: ts=8 sw=2 sts=2 expandtab +// +// Unit tests for librbd_crimson: utils (header_name, data_object_name, +// extent_to_object_extents). No cluster required. +// + +#include + +#include "crimson/rbd/utils.h" + +using namespace crimson::rbd; + +TEST(rbd_utils, header_name) +{ + EXPECT_EQ(header_name("abc123"), "rbd_header.abc123"); + EXPECT_EQ(header_name(""), "rbd_header."); +} + +TEST(rbd_utils, data_object_name) +{ + EXPECT_EQ(data_object_name("abc123", 0), "rbd_data.abc123.0000000000000000"); + EXPECT_EQ(data_object_name("abc123", 1), "rbd_data.abc123.0000000000000001"); + EXPECT_EQ(data_object_name("id", 0x12345678), + "rbd_data.id.0000000012345678"); +} + +TEST(rbd_utils, extent_to_object_extents_empty) +{ + auto extents = extent_to_object_extents(0, 0, 22); + EXPECT_TRUE(extents.empty()); +} + +TEST(rbd_utils, extent_to_object_extents_single_object) +{ + // order 22 -> object_size 4MB + auto extents = extent_to_object_extents(0, 4096, 22); + ASSERT_EQ(extents.size(), 1u); + EXPECT_EQ(extents[0].object_no, 0u); + EXPECT_EQ(extents[0].object_off, 0u); + EXPECT_EQ(extents[0].length, 4096u); +} + +TEST(rbd_utils, extent_to_object_extents_crosses_boundary) +{ + // order 12 -> object_size 4KB + auto extents = extent_to_object_extents(2048, 4096, 12); + ASSERT_EQ(extents.size(), 2u); + EXPECT_EQ(extents[0].object_no, 0u); + EXPECT_EQ(extents[0].object_off, 2048u); + EXPECT_EQ(extents[0].length, 2048u); + EXPECT_EQ(extents[1].object_no, 1u); + EXPECT_EQ(extents[1].object_off, 0u); + EXPECT_EQ(extents[1].length, 2048u); +} + +TEST(rbd_utils, extent_to_object_extents_multiple_objects) +{ + // order 12 -> object_size 4KB + auto extents = extent_to_object_extents(0, 12288, 12); + ASSERT_EQ(extents.size(), 3u); + EXPECT_EQ(extents[0].object_no, 0u); + EXPECT_EQ(extents[0].object_off, 0u); + EXPECT_EQ(extents[0].length, 4096u); + EXPECT_EQ(extents[1].object_no, 1u); + EXPECT_EQ(extents[1].object_off, 0u); + EXPECT_EQ(extents[1].length, 4096u); + EXPECT_EQ(extents[2].object_no, 2u); + EXPECT_EQ(extents[2].object_off, 0u); + EXPECT_EQ(extents[2].length, 4096u); +} + +TEST(rbd_utils, extent_to_object_extents_unaligned) +{ + // order 12 -> object_size 4KB; offset 100, length 500 + auto extents = extent_to_object_extents(100, 500, 12); + ASSERT_EQ(extents.size(), 1u); + EXPECT_EQ(extents[0].object_no, 0u); + EXPECT_EQ(extents[0].object_off, 100u); + EXPECT_EQ(extents[0].length, 500u); +}