${PROJECT_SOURCE_DIR}/src/osd/HitSet.cc
${PROJECT_SOURCE_DIR}/src/osd/OSDMap.cc
${PROJECT_SOURCE_DIR}/src/osd/PGPeeringEvent.cc
+ ${PROJECT_SOURCE_DIR}/src/common/scrub_types.cc
${PROJECT_SOURCE_DIR}/src/xxHash/xxhash.c
${crimson_common_srcs}
$<TARGET_OBJECTS:common_mountcephfs_objs>
replicated_recovery_backend.cc
scheduler/scheduler.cc
scheduler/mclock_scheduler.cc
+ scrub/scrub_machine.cc
+ scrub/scrub_validator.cc
osdmap_gate.cc
pg_activation_blocker.cc
pg_map.cc
objclass.cc
${PROJECT_SOURCE_DIR}/src/objclass/class_api.cc
${PROJECT_SOURCE_DIR}/src/osd/ClassHandler.cc
+ ${PROJECT_SOURCE_DIR}/src/osd/ECUtil.cc
${PROJECT_SOURCE_DIR}/src/osd/osd_op_util.cc
${PROJECT_SOURCE_DIR}/src/osd/OSDCap.cc
${PROJECT_SOURCE_DIR}/src/osd/PeeringState.cc
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "include/ceph_assert.h"
+
+#include "crimson/osd/scrub/scrub_machine.h"
+
+namespace crimson::osd::scrub {
+
+WaitUpdate::WaitUpdate(my_context ctx) : ScrubState(ctx)
+{
+ auto &cs = context<ChunkState>();
+ cs.range_reserved = true;
+ assert(cs.range);
+ get_scrub_context().reserve_range(cs.range->start, cs.range->end);
+}
+
+ScanRange::ScanRange(my_context ctx) : ScrubState(ctx)
+{
+ ceph_assert(context<ChunkState>().range);
+ const auto &cs = context<ChunkState>();
+ const auto &range = cs.range.value();
+ get_scrub_context(
+ ).foreach_id_to_scrub([this, &range, &cs](const auto &id) {
+ get_scrub_context().scan_range(
+ id, cs.version,
+ context<Scrubbing>().deep,
+ range.start, range.end);
+ waiting_on++;
+ });
+}
+
+sc::result ScanRange::react(const ScrubContext::scan_range_complete_t &event)
+{
+ auto [_, inserted] = maps.insert(event.value.to_pair());
+ ceph_assert(inserted);
+ ceph_assert(waiting_on > 0);
+ --waiting_on;
+
+ if (waiting_on > 0) {
+ return discard_event();
+ } else {
+ ceph_assert(context<ChunkState>().range);
+ {
+ auto results = validate_chunk(
+ get_scrub_context().get_dpp(),
+ context<Scrubbing>().policy,
+ maps);
+ context<Scrubbing>().stats.add(results.stats);
+ get_scrub_context().emit_chunk_result(
+ *(context<ChunkState>().range),
+ std::move(results));
+ }
+ if (context<ChunkState>().range->end.is_max()) {
+ get_scrub_context().emit_scrub_result(
+ context<Scrubbing>().deep,
+ context<Scrubbing>().stats);
+ return transit<PrimaryActive>();
+ } else {
+ context<Scrubbing>().advance_current(
+ context<ChunkState>().range->end);
+ return transit<ChunkState>();
+ }
+ }
+}
+
+ReplicaScanChunk::ReplicaScanChunk(my_context ctx) : ScrubState(ctx)
+{
+ auto &to_scan = context<ReplicaChunkState>().to_scan;
+ get_scrub_context().generate_and_submit_chunk_result(
+ to_scan.start,
+ to_scan.end,
+ to_scan.deep);
+}
+
+};
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <string>
+#include <ranges>
+
+#include <boost/statechart/custom_reaction.hpp>
+#include <boost/statechart/deferral.hpp>
+#include <boost/statechart/event.hpp>
+#include <boost/statechart/event_base.hpp>
+#include <boost/statechart/in_state_reaction.hpp>
+#include <boost/statechart/simple_state.hpp>
+#include <boost/statechart/state.hpp>
+#include <boost/statechart/state_machine.hpp>
+#include <boost/statechart/transition.hpp>
+
+#include "common/fmt_common.h"
+#include "common/hobject.h"
+#include "common/hobject_fmt.h"
+#include "crimson/common/log.h"
+#include "osd/osd_types_fmt.h"
+#include "scrub_validator.h"
+
+namespace crimson::osd::scrub {
+
+/* Development Notes
+ *
+ * Notes:
+ * - We're leaving out all of the throttle waits. We actually want to handle
+ * that using crimson's operation throttler machinery.
+ *
+ * TODOs:
+ * - Leaving SnapMapper validation to later work
+ * - Note, each replica should validate and repair locally as the SnapMapper
+ * is meant to be a local index of the local object contents
+ * - Leaving preemption for later
+ * - Leaving scheduling for later, for now the only way to trigger a scrub
+ * is via the ceph tell <pgid> [deep_]scrub command
+ */
+
+namespace sc = boost::statechart;
+
+template <typename T>
+struct simple_event_t : sc::event<T> {
+ template <typename FormatContext>
+ auto fmt_print_ctx(FormatContext & ctx) const {
+ return fmt::format_to(ctx.out(), "{}", T::event_name);
+ }
+};
+
+template <typename T, has_formatter V>
+struct value_event_t : sc::event<T> {
+ const V value;
+
+ template <typename... Args>
+ value_event_t(Args&&... args) : value(std::forward<Args>(args)...) {}
+
+ value_event_t(const value_event_t &) = default;
+ value_event_t(value_event_t &&) = default;
+ value_event_t &operator=(const value_event_t&) = default;
+ value_event_t &operator=(value_event_t&&) = default;
+
+ template <typename FormatContext>
+ auto fmt_print_ctx(FormatContext & ctx) const {
+ return fmt::format_to(ctx.out(), "{}", T::event_name);
+ }
+};
+
+
+#define SIMPLE_EVENT(T) struct T : simple_event_t<T> { \
+ static constexpr const char * event_name = #T; \
+ };
+
+#define VALUE_EVENT(T, V) struct T : value_event_t<T, V> { \
+ static constexpr const char * event_name = #T; \
+ \
+ template <typename... Args> \
+ T(Args&&... args) : value_event_t( \
+ std::forward<Args>(args)...) {} \
+ };
+
+/**
+ * ScrubContext
+ *
+ * Interface to external PG/OSD/IO machinery.
+ *
+ * Methods which may take time return immediately and define an event which
+ * will be asynchronously delivered to the state machine with the result. This
+ * is a bit clumsy to use, but should render this component highly testable.
+ *
+ * Events sent as a completion to a ScrubContext interface method are defined
+ * within ScrubContext. Other events are defined within ScrubMachine.
+ */
+struct ScrubContext {
+ /// return ids to scrub
+ virtual const std::set<pg_shard_t> &get_ids_to_scrub() const = 0;
+
+ /// iterates over each pg_shard_t to scrub
+ template <typename F>
+ void foreach_id_to_scrub(F &&f) {
+ for (const auto &id : get_ids_to_scrub()) {
+ std::invoke(f, id);
+ }
+ }
+
+ /// return struct defining chunk validation rules
+ virtual chunk_validation_policy_t get_policy() const = 0;
+
+ /// notifies implementation of scrub start
+ virtual void notify_scrub_start(bool deep) = 0;
+
+ /// notifies implementation of scrub end
+ virtual void notify_scrub_end(bool deep) = 0;
+
+ /// requests range to scrub starting at start
+ struct request_range_result_t {
+ hobject_t start;
+ hobject_t end;
+
+ request_range_result_t(
+ const hobject_t &start,
+ const hobject_t &end) : start(start), end(end) {}
+
+ auto fmt_print_ctx(auto &ctx) const -> decltype(ctx.out()) {
+ return fmt::format_to(ctx.out(), "start: {}, end: {}", start, end);
+ }
+ };
+ VALUE_EVENT(request_range_complete_t, request_range_result_t);
+ virtual void request_range(
+ const hobject_t &start) = 0;
+
+ /// reserves range [start, end)
+ VALUE_EVENT(reserve_range_complete_t, eversion_t);
+ virtual void reserve_range(
+ const hobject_t &start,
+ const hobject_t &end) = 0;
+
+ /// waits until implementation has committed up to version
+ SIMPLE_EVENT(await_update_complete_t);
+ virtual bool await_update(
+ const eversion_t &version) = 0;
+
+ /// cancel in progress or currently reserved range
+ virtual void release_range() = 0;
+
+ /// scans [begin, end) on target as of version
+ struct scan_range_value_t {
+ pg_shard_t from;
+ ScrubMap map;
+
+ template <typename Map>
+ scan_range_value_t(
+ pg_shard_t from,
+ Map &&map) : from(from), map(std::forward<Map>(map)) {}
+
+ auto to_pair() const { return std::make_pair(from, map); }
+ auto fmt_print_ctx(auto &ctx) const -> decltype(ctx.out()) {
+ return fmt::format_to(ctx.out(), "from: {}", from);
+ }
+ };
+ VALUE_EVENT(scan_range_complete_t, scan_range_value_t);
+ virtual void scan_range(
+ pg_shard_t target,
+ eversion_t version,
+ bool deep,
+ const hobject_t &start,
+ const hobject_t &end) = 0;
+
+ /// instructs implmentatino to scan [begin, end) and emit result to primary
+ SIMPLE_EVENT(generate_and_submit_chunk_result_complete_t);
+ virtual void generate_and_submit_chunk_result(
+ const hobject_t &begin,
+ const hobject_t &end,
+ bool deep) = 0;
+
+ /// notifies implementation of chunk scrub results
+ virtual void emit_chunk_result(
+ const request_range_result_t &range,
+ chunk_result_t &&result) = 0;
+
+ /// notifies implementation of full scrub results
+ virtual void emit_scrub_result(
+ bool deep,
+ object_stat_sum_t scrub_stats) = 0;
+
+ /// get dpp instance for logging
+ virtual DoutPrefixProvider &get_dpp() = 0;
+};
+
+struct Crash;
+struct Inactive;
+
+namespace events {
+/// reset ScrubMachine
+SIMPLE_EVENT(reset_t);
+
+/// start (deep) scrub
+struct start_scrub_event_t {
+ bool deep = false;
+
+ start_scrub_event_t(bool deep) : deep(deep) {}
+
+ auto fmt_print_ctx(auto &ctx) const -> decltype(ctx.out()) {
+ return fmt::format_to(ctx.out(), "deep: {}", deep);
+ }
+};
+VALUE_EVENT(start_scrub_t, start_scrub_event_t);
+
+/// notifies ScrubMachine about a write on oid resulting in delta_stats
+struct op_stat_event_t {
+ hobject_t oid;
+ object_stat_sum_t delta_stats;
+
+ op_stat_event_t(
+ hobject_t oid,
+ object_stat_sum_t delta_stats) : oid(oid), delta_stats(delta_stats) {}
+
+ auto fmt_print_ctx(auto &ctx) const -> decltype(ctx.out()) {
+ return fmt::format_to(ctx.out(), "oid: {}", oid);
+ }
+};
+VALUE_EVENT(op_stats_t, op_stat_event_t);
+
+/// Prepares statemachine for primary events
+SIMPLE_EVENT(primary_activate_t);
+
+/// Prepares statemachine for replica events
+SIMPLE_EVENT(replica_activate_t);
+
+/// Instructs replica to (deep) scrub [start, end) as of version version
+struct replica_scan_event_t {
+ hobject_t start;
+ hobject_t end;
+ eversion_t version;
+ bool deep = false;
+
+ replica_scan_event_t() = default;
+
+ replica_scan_event_t(
+ hobject_t start,
+ hobject_t end,
+ eversion_t version,
+ bool deep) : start(start), end(end), version(version), deep(deep) {}
+
+ auto fmt_print_ctx(auto &ctx) const -> decltype(ctx.out()) {
+ return fmt::format_to(
+ ctx.out(), "start: {}, end: {}, version: {}, deep: {}",
+ start, end, version, deep);
+ }
+};
+VALUE_EVENT(replica_scan_t, replica_scan_event_t);
+
+}
+
+
+/**
+ * ScrubMachine
+ *
+ * Manages orchestration of rados's distributed scrub process.
+ *
+ * There are two general ways in which ScrubMachine may need to release
+ * resources:
+ * - interval_change_t -- represents case where PG as a whole undergoes
+ * a distributed mapping change. Distributed resources are released
+ * implicitly as remote PG instances receive the new map. Local
+ * resources are still released by ScrubMachine via ScrubContext methods
+ * generally via state destructors
+ * - otherwise, ScrubMachine is responsible for notifying remote PG
+ * instances via the appropriate ScrubContext methods again generally
+ * from state destructors.
+ *
+ * TODO: interval_change_t will be added with remote reservations.
+ */
+class ScrubMachine
+ : public sc::state_machine<ScrubMachine, Inactive> {
+public:
+ static constexpr std::string_view full_name = "ScrubMachine";
+
+ ScrubContext &context;
+ ScrubMachine(ScrubContext &context) : context(context) {}
+};
+
+/**
+ * ScrubState
+ *
+ * Template defining machinery/state common to all scrub state machine
+ * states.
+ */
+template <typename S, typename P, typename... T>
+struct ScrubState : sc::state<S, P, T...> {
+ using sc_base = sc::state<S, P, T...>;
+ DoutPrefixProvider &dpp;
+
+ /* machinery for populating a full_name member for each ScrubState with
+ * ScrubMachine/.../ParentState/ChildState full_name */
+ template <std::string_view const &PN, typename PI,
+ std::string_view const &CN, typename CI>
+ struct concat;
+
+ template <std::string_view const &PN, std::size_t... PI,
+ std::string_view const &CN, std::size_t... CI>
+ struct concat<PN, std::index_sequence<PI...>, CN, std::index_sequence<CI...>> {
+ static constexpr size_t value_size = PN.size() + CN.size() + 1;
+ static constexpr const char value[value_size]{PN[PI]..., '/', CN[CI]...};
+ };
+
+ template <std::string_view const &PN, std::string_view const &CN>
+ struct join {
+ using conc = concat<
+ PN, std::make_index_sequence<PN.size()>,
+ CN, std::make_index_sequence<CN.size()>>;
+ static constexpr std::string_view value{
+ conc::value,
+ conc::value_size
+ };
+ };
+
+ /// Populated with ScrubMachine/.../Parent/Child for each state Child
+ static constexpr std::string_view full_name =
+ join<P::full_name, S::state_name>::value;
+
+ template <typename C>
+ explicit ScrubState(C ctx) : sc_base(ctx), dpp(get_scrub_context().get_dpp()) {
+ LOG_PREFIX(ScrubState::ScrubState);
+ SUBDEBUGDPP(osd, "entering state {}", dpp, full_name);
+ }
+
+ ~ScrubState() {
+ LOG_PREFIX(ScrubState::~ScrubState);
+ SUBDEBUGDPP(osd, "exiting state {}", dpp, full_name);
+ }
+
+ auto &get_scrub_context() {
+ return sc_base::template context<ScrubMachine>().context;
+ }
+};
+
+struct Crash : ScrubState<Crash, ScrubMachine> {
+ static constexpr std::string_view state_name = "Crash";
+ explicit Crash(my_context ctx) : ScrubState(ctx) {
+ ceph_abort("Crash state impossible");
+ }
+
+};
+
+struct PrimaryActive;
+struct ReplicaActive;
+struct Inactive : ScrubState<Inactive, ScrubMachine> {
+ static constexpr std::string_view state_name = "Inactive";
+ explicit Inactive(my_context ctx) : ScrubState(ctx) {}
+
+ using reactions = boost::mpl::list<
+ sc::transition<events::primary_activate_t, PrimaryActive>,
+ sc::transition<events::replica_activate_t, ReplicaActive>,
+ sc::custom_reaction<events::reset_t>,
+ sc::custom_reaction<events::start_scrub_t>,
+ sc::custom_reaction<events::op_stats_t>,
+ sc::transition< boost::statechart::event_base, Crash >
+ >;
+
+ sc::result react(const events::reset_t &) {
+ return discard_event();
+ }
+ sc::result react(const events::start_scrub_t &) {
+ return discard_event();
+ }
+ sc::result react(const events::op_stats_t &) {
+ return discard_event();
+ }
+};
+
+struct AwaitScrub;
+struct PrimaryActive : ScrubState<PrimaryActive, ScrubMachine, AwaitScrub> {
+ static constexpr std::string_view state_name = "PrimaryActive";
+ explicit PrimaryActive(my_context ctx) : ScrubState(ctx) {}
+
+ bool local_reservation_held = false;
+ std::set<pg_shard_t> remote_reservations_held;
+
+ using reactions = boost::mpl::list<
+ sc::transition<events::reset_t, Inactive>,
+ sc::custom_reaction<events::start_scrub_t>,
+ sc::custom_reaction<events::op_stats_t>,
+ sc::transition< boost::statechart::event_base, Crash >
+ >;
+
+ sc::result react(const events::start_scrub_t &event) {
+ return discard_event();
+ }
+
+ sc::result react(const events::op_stats_t &) {
+ return discard_event();
+ }
+};
+
+namespace internal_events {
+VALUE_EVENT(set_deep_t, bool);
+}
+
+struct Scrubbing;
+struct AwaitScrub : ScrubState<AwaitScrub, PrimaryActive> {
+ static constexpr std::string_view state_name = "AwaitScrub";
+ explicit AwaitScrub(my_context ctx) : ScrubState(ctx) {}
+
+ using reactions = boost::mpl::list<
+ sc::custom_reaction<events::start_scrub_t>
+ >;
+
+ sc::result react(const events::start_scrub_t &event) {
+ post_event(internal_events::set_deep_t{event.value.deep});
+ return transit<Scrubbing>();
+ }
+};
+
+struct ChunkState;
+struct Scrubbing : ScrubState<Scrubbing, PrimaryActive, ChunkState> {
+ static constexpr std::string_view state_name = "Scrubbing";
+ explicit Scrubbing(my_context ctx)
+ : ScrubState(ctx), policy(get_scrub_context().get_policy()) {}
+
+
+ using reactions = boost::mpl::list<
+ sc::custom_reaction<internal_events::set_deep_t>,
+ sc::custom_reaction<events::op_stats_t>
+ >;
+
+ chunk_validation_policy_t policy;
+
+ /// hobjects < current have been scrubbed
+ hobject_t current;
+
+ /// true for deep scrub
+ bool deep = false;
+
+ /// stats for objects < current, maintained via events::op_stats_t
+ object_stat_sum_t stats;
+
+ void advance_current(const hobject_t &next) {
+ current = next;
+ }
+
+ sc::result react(const internal_events::set_deep_t &event) {
+ deep = event.value;
+ get_scrub_context().notify_scrub_start(deep);
+ return discard_event();
+ }
+
+ void exit() {
+ get_scrub_context().notify_scrub_end(deep);
+ }
+
+ sc::result react(const events::op_stats_t &event) {
+ if (event.value.oid < current) {
+ stats.add(event.value.delta_stats);
+ }
+ return discard_event();
+ }
+};
+
+struct GetRange;
+struct ChunkState : ScrubState<ChunkState, Scrubbing, GetRange> {
+ static constexpr std::string_view state_name = "ChunkState";
+ explicit ChunkState(my_context ctx) : ScrubState(ctx) {}
+
+ /// Current chunk includes objects in [range_start, range_end)
+ boost::optional<ScrubContext::request_range_result_t> range;
+
+ /// true once we have requested that the range be reserved
+ bool range_reserved = false;
+
+ /// version of last update for the reserved chunk
+ eversion_t version;
+
+ void exit() {
+ if (range_reserved) {
+ get_scrub_context().release_range();
+ }
+ }
+};
+
+struct WaitUpdate;
+struct GetRange : ScrubState<GetRange, ChunkState> {
+ static constexpr std::string_view state_name = "GetRange";
+ explicit GetRange(my_context ctx) : ScrubState(ctx) {
+ get_scrub_context().request_range(context<Scrubbing>().current);
+ }
+
+ using reactions = boost::mpl::list<
+ sc::custom_reaction<ScrubContext::request_range_complete_t>
+ >;
+
+ sc::result react(const ScrubContext::request_range_complete_t &event) {
+ context<ChunkState>().range = event.value;
+ return transit<WaitUpdate>();
+ }
+};
+
+struct ScanRange;
+struct WaitUpdate : ScrubState<WaitUpdate, ChunkState> {
+ static constexpr std::string_view state_name = "WaitUpdate";
+ explicit WaitUpdate(my_context ctx);
+
+ using reactions = boost::mpl::list<
+ sc::custom_reaction<ScrubContext::reserve_range_complete_t>
+ >;
+
+ sc::result react(const ScrubContext::reserve_range_complete_t &e) {
+ context<ChunkState>().version = e.value;
+ return transit<ScanRange>();
+ }
+};
+
+struct ScanRange : ScrubState<ScanRange, ChunkState> {
+ static constexpr std::string_view state_name = "ScanRange";
+ explicit ScanRange(my_context ctx);
+
+ scrub_map_set_t maps;
+ unsigned waiting_on = 0;
+
+ using reactions = boost::mpl::list<
+ sc::custom_reaction<ScrubContext::scan_range_complete_t>
+ >;
+
+ sc::result react(const ScrubContext::scan_range_complete_t &);
+};
+
+struct ReplicaIdle;
+struct ReplicaActive :
+ ScrubState<ReplicaActive, ScrubMachine, ReplicaIdle> {
+ static constexpr std::string_view state_name = "ReplicaActive";
+ explicit ReplicaActive(my_context ctx) : ScrubState(ctx) {}
+
+ using reactions = boost::mpl::list<
+ sc::transition<events::reset_t, Inactive>,
+ sc::custom_reaction<events::start_scrub_t>,
+ sc::custom_reaction<events::op_stats_t>,
+ sc::transition< boost::statechart::event_base, Crash >
+ >;
+
+ sc::result react(const events::start_scrub_t &) {
+ return discard_event();
+ }
+
+ sc::result react(const events::op_stats_t &) {
+ return discard_event();
+ }
+};
+
+struct ReplicaChunkState;
+struct ReplicaIdle : ScrubState<ReplicaIdle, ReplicaActive> {
+ static constexpr std::string_view state_name = "ReplicaIdle";
+ explicit ReplicaIdle(my_context ctx) : ScrubState(ctx) {}
+
+ using reactions = boost::mpl::list<
+ sc::custom_reaction<events::replica_scan_t>
+ >;
+
+ sc::result react(const events::replica_scan_t &event) {
+ LOG_PREFIX(ScrubState::ReplicaIdle::react(events::replica_scan_t));
+ SUBDEBUGDPP(osd, "event.value: {}", get_scrub_context().get_dpp(), event.value);
+ post_event(event);
+ return transit<ReplicaChunkState>();
+ }
+};
+
+struct ReplicaWaitUpdate;
+struct ReplicaChunkState : ScrubState<ReplicaChunkState, ReplicaActive, ReplicaWaitUpdate> {
+ static constexpr std::string_view state_name = "ReplicaChunkState";
+ explicit ReplicaChunkState(my_context ctx) : ScrubState(ctx) {}
+
+ using reactions = boost::mpl::list<
+ sc::custom_reaction<events::replica_scan_t>
+ >;
+
+ events::replica_scan_event_t to_scan;
+
+ sc::result react(const events::replica_scan_t &event) {
+ LOG_PREFIX(ScrubState::ReplicaWaitUpdate::react(events::replica_scan_t));
+ SUBDEBUGDPP(osd, "event.value: {}", get_scrub_context().get_dpp(), event.value);
+ to_scan = event.value;
+ if (get_scrub_context().await_update(event.value.version)) {
+ post_event(ScrubContext::await_update_complete_t{});
+ }
+ return discard_event();
+ }
+};
+
+struct ReplicaScanChunk;
+struct ReplicaWaitUpdate : ScrubState<ReplicaWaitUpdate, ReplicaChunkState> {
+ static constexpr std::string_view state_name = "ReplicaWaitUpdate";
+ explicit ReplicaWaitUpdate(my_context ctx) : ScrubState(ctx) {}
+
+ using reactions = boost::mpl::list<
+ sc::transition<ScrubContext::await_update_complete_t, ReplicaScanChunk>
+ >;
+};
+
+struct ReplicaScanChunk : ScrubState<ReplicaScanChunk, ReplicaChunkState> {
+ static constexpr std::string_view state_name = "ReplicaScanChunk";
+ explicit ReplicaScanChunk(my_context ctx);
+
+ using reactions = boost::mpl::list<
+ sc::transition<ScrubContext::generate_and_submit_chunk_result_complete_t,
+ ReplicaIdle>
+ >;
+};
+
+#undef SIMPLE_EVENT
+#undef VALUE_EVENT
+
+}
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <ranges>
+
+#include "osd/osd_types_fmt.h"
+
+#include "crimson/common/log.h"
+#include "crimson/osd/scrub/scrub_validator.h"
+#include "osd/ECUtil.h"
+
+SET_SUBSYS(osd);
+
+namespace crimson::osd::scrub {
+
+using object_set_t = std::set<hobject_t>;
+object_set_t get_object_set(const scrub_map_set_t &in)
+{
+ object_set_t ret;
+ for (const auto& [from, map] : in) {
+ std::transform(map.objects.begin(), map.objects.end(),
+ std::inserter(ret, ret.end()),
+ [](const auto& i) { return i.first; });
+ }
+ return ret;
+}
+
+struct shard_evaluation_t {
+ pg_shard_t source;
+ shard_info_wrapper shard_info;
+
+ std::optional<object_info_t> object_info;
+ std::optional<SnapSet> snapset;
+ std::optional<ECUtil::HashInfo> hinfo;
+
+ size_t omap_keys{0};
+ size_t omap_bytes{0};
+
+ bool has_errors() const {
+ return shard_info.has_errors();
+ }
+
+ bool is_primary() const {
+ return shard_info.primary;
+ }
+
+ std::weak_ordering operator<=>(const shard_evaluation_t &rhs) const {
+ return std::make_tuple(!has_errors(), is_primary()) <=>
+ std::make_tuple(!rhs.has_errors(), rhs.is_primary());
+ }
+};
+shard_evaluation_t evaluate_object_shard(
+ const chunk_validation_policy_t &policy,
+ const hobject_t &oid,
+ pg_shard_t from,
+ const ScrubMap::object *maybe_obj)
+{
+ shard_evaluation_t ret;
+ ret.source = from;
+ if (from == policy.primary) {
+ ret.shard_info.primary = true;
+ }
+ if (!maybe_obj || maybe_obj->negative) {
+ // impossible since chunky scrub was introduced
+ ceph_assert(!maybe_obj->negative);
+ ret.shard_info.set_missing();
+ return ret;
+ }
+
+ auto &obj = *maybe_obj;
+ /* We are ignoring ScrubMap::object::large_omap_object*, object_omap_* is all the
+ * info we need */
+ ret.omap_keys = obj.object_omap_keys;
+ ret.omap_bytes = obj.object_omap_bytes;
+
+ ret.shard_info.set_object(obj);
+
+ if (obj.ec_hash_mismatch) {
+ ret.shard_info.set_ec_hash_mismatch();
+ }
+
+ if (obj.ec_size_mismatch) {
+ ret.shard_info.set_ec_size_mismatch();
+ }
+
+ if (obj.read_error) {
+ ret.shard_info.set_read_error();
+ }
+
+ if (obj.stat_error) {
+ ret.shard_info.set_stat_error();
+ }
+
+ {
+ auto xiter = obj.attrs.find(OI_ATTR);
+ if (xiter == obj.attrs.end()) {
+ ret.shard_info.set_info_missing();
+ } else {
+ bufferlist bl;
+ bl.push_back(xiter->second);
+ ret.object_info = object_info_t{};
+ try {
+ auto bliter = bl.cbegin();
+ ::decode(*(ret.object_info), bliter);
+ } catch (...) {
+ ret.shard_info.set_info_corrupted();
+ ret.object_info = std::nullopt;
+ }
+ }
+ }
+
+ ret.shard_info.size = obj.size;
+ if (ret.object_info &&
+ obj.size != policy.logical_to_ondisk_size(ret.object_info->size)) {
+ ret.shard_info.set_size_mismatch_info();
+ }
+
+ if (oid.is_head()) {
+ auto xiter = obj.attrs.find(SS_ATTR);
+ if (xiter == obj.attrs.end()) {
+ ret.shard_info.set_snapset_missing();
+ } else {
+ bufferlist bl;
+ bl.push_back(xiter->second);
+ ret.snapset = SnapSet{};
+ try {
+ auto bliter = bl.cbegin();
+ ::decode(*(ret.snapset), bliter);
+ } catch (...) {
+ ret.shard_info.set_snapset_corrupted();
+ ret.snapset = std::nullopt;
+ }
+ }
+ }
+
+ if (policy.is_ec()) {
+ auto xiter = obj.attrs.find(ECUtil::get_hinfo_key());
+ if (xiter == obj.attrs.end()) {
+ ret.shard_info.set_hinfo_missing();
+ } else {
+ bufferlist bl;
+ bl.push_back(xiter->second);
+ ret.hinfo = ECUtil::HashInfo{};
+ try {
+ auto bliter = bl.cbegin();
+ decode(*(ret.hinfo), bliter);
+ } catch (...) {
+ ret.shard_info.set_hinfo_corrupted();
+ ret.hinfo = std::nullopt;
+ }
+ }
+ }
+
+ if (ret.object_info) {
+ if (ret.shard_info.data_digest_present &&
+ ret.object_info->is_data_digest() &&
+ (ret.object_info->data_digest != ret.shard_info.data_digest)) {
+ ret.shard_info.set_data_digest_mismatch_info();
+ }
+ if (ret.shard_info.omap_digest_present &&
+ ret.object_info->is_omap_digest() &&
+ (ret.object_info->omap_digest != ret.shard_info.omap_digest)) {
+ ret.shard_info.set_omap_digest_mismatch_info();
+ }
+ }
+
+ return ret;
+}
+
+librados::obj_err_t compare_candidate_to_authoritative(
+ const chunk_validation_policy_t &policy,
+ const hobject_t &oid,
+ const shard_evaluation_t &auth,
+ const shard_evaluation_t &cand)
+{
+ using namespace librados;
+ obj_err_t ret;
+
+ if (cand.shard_info.has_shard_missing()) {
+ return ret;
+ }
+
+ const auto &auth_si = auth.shard_info;
+ const auto &cand_si = cand.shard_info;
+
+ if (auth_si.data_digest != cand_si.data_digest) {
+ ret.errors |= obj_err_t::DATA_DIGEST_MISMATCH;
+ }
+
+ if (auth_si.omap_digest != cand_si.omap_digest) {
+ ret.errors |= obj_err_t::OMAP_DIGEST_MISMATCH;
+ }
+
+ {
+ auto aiter = auth_si.attrs.find(OI_ATTR);
+ ceph_assert(aiter != auth_si.attrs.end());
+
+ auto citer = cand_si.attrs.find(OI_ATTR);
+ if (citer == cand_si.attrs.end() ||
+ !aiter->second.contents_equal(citer->second)) {
+ ret.errors |= obj_err_t::OBJECT_INFO_INCONSISTENCY;
+ }
+ }
+
+ if (oid.is_head()) {
+ auto aiter = auth_si.attrs.find(SS_ATTR);
+ ceph_assert(aiter != auth_si.attrs.end());
+
+ auto citer = cand_si.attrs.find(SS_ATTR);
+ if (citer == cand_si.attrs.end() ||
+ !aiter->second.contents_equal(citer->second)) {
+ ret.errors |= obj_err_t::SNAPSET_INCONSISTENCY;
+ }
+ }
+
+ if (policy.is_ec()) {
+ auto aiter = auth_si.attrs.find(ECUtil::get_hinfo_key());
+ ceph_assert(aiter != auth_si.attrs.end());
+
+ auto citer = cand_si.attrs.find(ECUtil::get_hinfo_key());
+ if (citer == cand_si.attrs.end() ||
+ !aiter->second.contents_equal(citer->second)) {
+ ret.errors |= obj_err_t::HINFO_INCONSISTENCY;
+ }
+ }
+
+ if (auth_si.size != cand_si.size) {
+ ret.errors |= obj_err_t::SIZE_MISMATCH;
+ }
+
+ auto is_sys_attr = [&policy](const auto &str) {
+ return str == OI_ATTR || str == SS_ATTR ||
+ (policy.is_ec() && str == ECUtil::get_hinfo_key());
+ };
+ for (auto aiter = auth_si.attrs.begin(); aiter != auth_si.attrs.end(); ++aiter) {
+ if (is_sys_attr(aiter->first)) continue;
+
+ auto citer = cand_si.attrs.find(aiter->first);
+ if (citer == cand_si.attrs.end()) {
+ ret.errors |= obj_err_t::ATTR_NAME_MISMATCH;
+ } else if (!aiter->second.contents_equal(citer->second)) {
+ ret.errors |= obj_err_t::ATTR_VALUE_MISMATCH;
+ }
+ }
+ if (std::any_of(
+ cand_si.attrs.begin(), cand_si.attrs.end(),
+ [&is_sys_attr, &auth_si](auto &p) {
+ return !is_sys_attr(p.first) &&
+ auth_si.attrs.find(p.first) == auth_si.attrs.end();
+ })) {
+ ret.errors |= obj_err_t::ATTR_NAME_MISMATCH;
+ }
+
+ return ret;
+}
+
+struct object_evaluation_t {
+ std::optional<inconsistent_obj_wrapper> inconsistency;
+ std::optional<object_info_t> object_info;
+ std::optional<SnapSet> snapset;
+
+ size_t omap_keys{0};
+ size_t omap_bytes{0};
+};
+object_evaluation_t evaluate_object(
+ const chunk_validation_policy_t &policy,
+ const hobject_t &hoid,
+ const scrub_map_set_t &maps)
+{
+ ceph_assert(maps.size() > 0);
+ using evaluation_vec_t = std::vector<shard_evaluation_t>;
+ evaluation_vec_t shards;
+ std::transform(
+ maps.begin(), maps.end(),
+ std::inserter(shards, shards.end()),
+ [&hoid, &policy](const auto &item) -> evaluation_vec_t::value_type {
+ const auto &[shard, scrub_map] = item;
+ auto miter = scrub_map.objects.find(hoid);
+ auto maybe_shard = miter == scrub_map.objects.end() ?
+ nullptr : &(miter->second);
+ return evaluate_object_shard(policy, hoid, shard, maybe_shard);
+ });
+
+ std::sort(shards.begin(), shards.end());
+
+ auto &auth_eval = shards.back();
+
+ object_evaluation_t ret;
+ inconsistent_obj_wrapper iow{hoid};
+ if (!auth_eval.has_errors()) {
+ ret.object_info = auth_eval.object_info;
+ ret.omap_keys = auth_eval.omap_keys;
+ ret.omap_bytes = auth_eval.omap_bytes;
+ ret.snapset = auth_eval.snapset;
+ if (auth_eval.object_info->size > policy.max_object_size) {
+ iow.set_size_too_large();
+ }
+ auth_eval.shard_info.selected_oi = true;
+ std::for_each(
+ shards.begin(), shards.end() - 1,
+ [&policy, &hoid, &auth_eval, &iow](auto &cand_eval) {
+ auto err = compare_candidate_to_authoritative(
+ policy, hoid, auth_eval, cand_eval);
+ iow.merge(err);
+ });
+ }
+
+ if (iow.errors ||
+ std::any_of(shards.begin(), shards.end(),
+ [](auto &cand) { return cand.has_errors(); })) {
+ for (auto &eval : shards) {
+ iow.shards.emplace(
+ librados::osd_shard_t{eval.source.osd, eval.source.shard},
+ eval.shard_info);
+ iow.union_shards.errors |= eval.shard_info.errors;
+ }
+ if (auth_eval.object_info) {
+ iow.version = auth_eval.object_info->version.version;
+ }
+ ret.inconsistency = iow;
+ }
+ return ret;
+}
+
+using clone_meta_list_t = std::list<std::pair<hobject_t, object_info_t>>;
+std::optional<inconsistent_snapset_wrapper> evaluate_snapset(
+ DoutPrefixProvider &dpp,
+ const hobject_t &hoid,
+ const std::optional<SnapSet> &maybe_snapset,
+ const clone_meta_list_t &clones)
+{
+ LOG_PREFIX(evaluate_snapset);
+ /* inconsistent_snapset_t has several error codes that seem to pertain to
+ * specific objects rather than to the snapset specifically. I'm choosing
+ * to ignore those for now */
+ inconsistent_snapset_wrapper ret{hoid};
+ if (!maybe_snapset) {
+ ret.set_headless();
+ return ret;
+ }
+ const auto &snapset = *maybe_snapset;
+
+ auto clone_iter = clones.begin();
+ for (auto ss_clone_id : snapset.clones) {
+ for (; clone_iter != clones.end() &&
+ clone_iter->first.snap < ss_clone_id;
+ ++clone_iter) {
+ ret.set_clone(clone_iter->first.snap);
+ }
+
+ if (clone_iter != clones.end() &&
+ clone_iter->first.snap == ss_clone_id) {
+ auto ss_clone_size_iter = snapset.clone_size.find(ss_clone_id);
+ if (ss_clone_size_iter == snapset.clone_size.end() ||
+ ss_clone_size_iter->second != clone_iter->second.size) {
+ ret.set_size_mismatch();
+ }
+ ++clone_iter;
+ } else {
+ ret.set_clone_missing(ss_clone_id);
+ }
+ }
+
+ for (; clone_iter != clones.end(); ++clone_iter) {
+ ret.set_clone(clone_iter->first.snap);
+ }
+
+ if (ret.errors) {
+ DEBUGDPP(
+ "snapset {}, clones {}",
+ dpp, snapset, clones);
+ return ret;
+ } else {
+ return std::nullopt;
+ }
+}
+
+void add_object_to_stats(
+ const chunk_validation_policy_t &policy,
+ const object_evaluation_t &eval,
+ object_stat_sum_t *out)
+{
+ auto &ss = eval.snapset;
+ if (!eval.object_info) {
+ return;
+ }
+ auto &oi = *eval.object_info;
+ ceph_assert(out);
+ out->num_objects++;
+ if (ss) {
+ out->num_bytes += oi.size;
+ for (auto clone : ss->clones) {
+ out->num_bytes += ss->get_clone_bytes(clone);
+ out->num_object_clones++;
+ }
+ if (oi.is_whiteout()) {
+ out->num_whiteouts++;
+ }
+ }
+ if (oi.is_dirty()) {
+ out->num_objects_dirty++;
+ }
+ if (oi.is_cache_pinned()) {
+ out->num_objects_pinned++;
+ }
+ if (oi.has_manifest()) {
+ out->num_objects_manifest++;
+ }
+
+ if (eval.omap_keys > 0) {
+ out->num_objects_omap++;
+ }
+ out->num_omap_keys += eval.omap_keys;
+ out->num_omap_bytes += eval.omap_bytes;
+
+ if (oi.soid.nspace == policy.hitset_namespace) {
+ out->num_objects_hit_set_archive++;
+ out->num_bytes_hit_set_archive += oi.size;
+ }
+
+ if (eval.omap_keys > policy.omap_key_limit ||
+ eval.omap_bytes > policy.omap_bytes_limit) {
+ out->num_large_omap_objects++;
+ }
+}
+
+chunk_result_t validate_chunk(
+ DoutPrefixProvider &dpp,
+ const chunk_validation_policy_t &policy, const scrub_map_set_t &in)
+{
+ chunk_result_t ret;
+
+ const std::set<hobject_t> object_set = get_object_set(in);
+
+ std::list<std::pair<hobject_t, SnapSet>> heads;
+ clone_meta_list_t clones;
+ for (const auto &oid: object_set) {
+ object_evaluation_t eval = evaluate_object(policy, oid, in);
+ add_object_to_stats(policy, eval, &ret.stats);
+ if (eval.inconsistency) {
+ ret.object_errors.push_back(*eval.inconsistency);
+ }
+ if (oid.is_head()) {
+ /* We're only going to consider the head object as "existing" if
+ * evaluate_object was able to find a sensible, authoritative copy
+ * complete with snapset */
+ if (eval.snapset) {
+ heads.emplace_back(oid, *eval.snapset);
+ }
+ } else {
+ /* We're only going to consider the clone object as "existing" if
+ * evaluate_object was able to find a sensible, authoritative copy
+ * complete with an object_info */
+ if (eval.object_info) {
+ clones.emplace_back(oid, *eval.object_info);
+ }
+ }
+ }
+
+ const hobject_t max_oid = hobject_t::get_max();
+ while (heads.size() || clones.size()) {
+ const hobject_t &next_head = heads.size() ? heads.front().first : max_oid;
+ const hobject_t &next_clone = clones.size() ? clones.front().first : max_oid;
+ hobject_t head_to_process = std::min(next_head, next_clone).get_head();
+
+ clone_meta_list_t clones_to_process;
+ auto clone_iter = clones.begin();
+ while (clone_iter != clones.end() && clone_iter->first < head_to_process)
+ ++clone_iter;
+ clones_to_process.splice(
+ clones_to_process.end(), clones, clones.begin(), clone_iter);
+
+ const auto head_meta = [&]() -> std::optional<SnapSet> {
+ if (head_to_process == next_head) {
+ auto ret = std::move(heads.front().second);
+ heads.pop_front();
+ return ret;
+ } else {
+ return std::nullopt;
+ }
+ }();
+
+ if (auto result = evaluate_snapset(
+ dpp, head_to_process, head_meta, clones_to_process); result) {
+ ret.snapset_errors.push_back(*result);
+ }
+ }
+
+ for (const auto &i: ret.object_errors) {
+ ret.stats.num_shallow_scrub_errors +=
+ (i.has_shallow_errors() || i.union_shards.has_shallow_errors());
+ ret.stats.num_deep_scrub_errors +=
+ (i.has_deep_errors() || i.union_shards.has_deep_errors());
+ }
+ ret.stats.num_shallow_scrub_errors += ret.snapset_errors.size();
+ ret.stats.num_scrub_errors = ret.stats.num_shallow_scrub_errors +
+ ret.stats.num_deep_scrub_errors;
+
+ return ret;
+}
+
+}
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <string>
+#include <map>
+
+#include "common/config_proxy.h"
+#include "common/scrub_types.h"
+#include "crimson/common/log.h"
+#include "osd/ECUtil.h"
+#include "osd/osd_types.h"
+
+namespace crimson::osd::scrub {
+
+struct chunk_validation_policy_t {
+ pg_shard_t primary;
+ std::optional<ECUtil::stripe_info_t> stripe_info;
+
+ // osd_max_object_size
+ size_t max_object_size;
+
+ // osd_hit_set_namespace
+ std::string hitset_namespace;
+
+ // osd_deep_scrub_large_omap_object_key_threshold
+ // osd_deep_scrub_large_omap_object_value_sum_threshold
+ uint64_t omap_key_limit;
+ size_t omap_bytes_limit;
+
+
+ bool is_ec() const {
+ return !!stripe_info;
+ }
+
+ size_t logical_to_ondisk_size(size_t size) const {
+ return stripe_info ? stripe_info->logical_to_next_chunk_offset(size) : size;
+ }
+};
+
+using scrub_map_set_t = std::map<pg_shard_t, ScrubMap>;
+
+struct chunk_result_t {
+ /* Scrub interacts with stats in two ways:
+ * 1. scrub accumulates a subset of object_stat_sum_t members to
+ * to ultimately compare to the object_stat_sum_t value maintained
+ * by the OSD. These members will be referred to as
+ * *scrub_checked_stats*.
+ * See iterate_scrub_checked_stats() for the relevant members.
+ * 2. scrub also updates some members that can't be maintained online
+ * (like num_omap_*, num_large_omap_objects) or that pertain
+ * specifically to scrub (like num_shallow_scrub_errors).
+ * Let these by referred to as *scrub_maintained_stats*.
+ * See iterate_scrub_maintained_stats() for the relevant members.
+ *
+ * The following stats member contains both, but the two sets are
+ * disjoint and treated seperately.
+ */
+ object_stat_sum_t stats;
+
+ // detected errors
+ std::vector<inconsistent_snapset_wrapper> snapset_errors;
+ std::vector<inconsistent_obj_wrapper> object_errors;
+
+ bool has_errors() const {
+ return !snapset_errors.empty() || !object_errors.empty();
+ }
+};
+
+/**
+ * validate_chunk
+ *
+ * Compares shard chunks and based on policy and returns a chunk_result_t
+ * containing the results. See chunk_result_t for details.
+ */
+chunk_result_t validate_chunk(
+ DoutPrefixProvider &dpp,
+ const chunk_validation_policy_t &policy, const scrub_map_set_t &in);
+
+/**
+ * iterate_scrub_checked_stats
+ *
+ * For each scrub_checked_stat member of object_stat_sum_t, invokes
+ * op with three arguments:
+ * - name of member (string_view)
+ * - pointer to member (T object_stat_sum_t::*)
+ * - function to corresponding pg_stat_t invalid member
+ * (bool func(const pg_stat_t &))
+ *
+ * Should be used to perform operations on all scrub_checked_stat members
+ * such as checking the accumlated scrub stats against the maintained
+ * pg stats.
+ */
+template <typename Func>
+void foreach_scrub_checked_stat(Func &&op) {
+ using namespace std::string_view_literals;
+ op("num_objects"sv,
+ &object_stat_sum_t::num_objects,
+ [](const pg_stat_t &in) { return false; });
+ op("num_bytes"sv,
+ &object_stat_sum_t::num_bytes,
+ [](const pg_stat_t &in) { return false; });
+ op("num_object_clones"sv,
+ &object_stat_sum_t::num_object_clones,
+ [](const pg_stat_t &in) { return false; });
+ op("num_whiteouts"sv,
+ &object_stat_sum_t::num_whiteouts,
+ [](const pg_stat_t &in) { return false; });
+ op("num_objects_dirty"sv,
+ &object_stat_sum_t::num_objects_dirty,
+ [](const pg_stat_t &in) { return in.dirty_stats_invalid; });
+ op("num_objects_omap"sv,
+ &object_stat_sum_t::num_objects_omap,
+ [](const pg_stat_t &in) { return in.omap_stats_invalid; });
+ op("num_objects_pinned"sv,
+ &object_stat_sum_t::num_objects_pinned,
+ [](const pg_stat_t &in) { return in.pin_stats_invalid; });
+ op("num_objects_hit_set_archive"sv,
+ &object_stat_sum_t::num_objects_hit_set_archive,
+ [](const pg_stat_t &in) { return in.hitset_stats_invalid; });
+ op("num_bytes_hit_set_archive"sv,
+ &object_stat_sum_t::num_bytes_hit_set_archive,
+ [](const pg_stat_t &in) { return in.hitset_bytes_stats_invalid; });
+ op("num_objects_manifest"sv,
+ &object_stat_sum_t::num_objects_manifest,
+ [](const pg_stat_t &in) { return in.manifest_stats_invalid; });
+}
+
+/**
+ * iterate_scrub_maintained_stats
+ *
+ * For each scrub_maintained_stat member of object_stat_sum_t, invokes
+ * op with three arguments:
+ * - name of member (string_view)
+ * - pointer to member (T object_stat_sum_t::*)
+ * - skip for shallow (bool)
+ *
+ * Should be used to perform operations on all scrub_maintained_stat members
+ * such as updating the pg maintained instance once scrub is complete.
+ */
+template <typename Func>
+void foreach_scrub_maintained_stat(Func &&op) {
+ using namespace std::string_view_literals;
+ op("num_scrub_errors"sv, &object_stat_sum_t::num_scrub_errors, false);
+ op("num_shallow_scrub_errors"sv,
+ &object_stat_sum_t::num_shallow_scrub_errors,
+ false);
+ op("num_deep_scrub_errors"sv, &object_stat_sum_t::num_deep_scrub_errors, true);
+ op("num_omap_bytes"sv, &object_stat_sum_t::num_omap_bytes, true);
+ op("num_omap_keys"sv, &object_stat_sum_t::num_omap_keys, true);
+ op("num_large_omap_objects"sv,
+ &object_stat_sum_t::num_large_omap_objects,
+ true);
+}
+
+}
+
+template <>
+struct fmt::formatter<crimson::osd::scrub::chunk_result_t> {
+ constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
+
+ template <typename FormatContext>
+ auto format(
+ const crimson::osd::scrub::chunk_result_t &result, FormatContext& ctx) const
+ {
+ return fmt::format_to(
+ ctx.out(),
+ "chunk_result_t("
+ "num_scrub_errors: {}, "
+ "num_deep_scrub_errors: {}, "
+ "snapset_errors: [{}], "
+ "object_errors: [{}])",
+ result.stats.num_scrub_errors,
+ result.stats.num_deep_scrub_errors,
+ result.snapset_errors,
+ result.object_errors
+ );
+ }
+};
crimson::gtest)
add_ceph_unittest(unittest-seastar-errorator
--memory 256M --smp 1)
+
+add_executable(unittest-crimson-scrub
+ test_crimson_scrub.cc
+ ${PROJECT_SOURCE_DIR}/src/crimson/osd/scrub/scrub_machine.cc
+ ${PROJECT_SOURCE_DIR}/src/crimson/osd/scrub/scrub_validator.cc
+ ${PROJECT_SOURCE_DIR}/src/osd/ECUtil.cc)
+target_link_libraries(
+ unittest-crimson-scrub
+ crimson-common
+ crimson::gtest)
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <boost/iterator/transform_iterator.hpp>
+
+#include <fmt/ranges.h>
+
+#include <seastar/core/sleep.hh>
+
+#include "test/crimson/gtest_seastar.h"
+
+#include "include/rados/rados_types.hpp"
+#include "common/scrub_types.h"
+#include "crimson/common/interruptible_future.h"
+#include "crimson/osd/scrub/scrub_machine.h"
+#include "crimson/osd/scrub/scrub_validator.h"
+
+#include "osd/osd_types_fmt.h"
+
+constexpr static size_t TEST_MAX_OBJECT_SIZE = 128<<20;
+constexpr static std::string_view TEST_INTERNAL_NAMESPACE = ".internal";
+constexpr static uint64_t TEST_OMAP_KEY_LIMIT = 200000;
+constexpr static size_t TEST_OMAP_BYTES_LIMIT = 1<<30;
+
+void so_set_attr_len(ScrubMap::object &obj, const std::string &name, size_t len)
+{
+ obj.attrs[name] = buffer::ptr(len);
+}
+
+void so_set_attr(ScrubMap::object &obj, const std::string &name, bufferlist bl)
+{
+ bl.rebuild();
+ obj.attrs[name] = bl.front();
+}
+
+std::optional<bufferlist> so_get_attr(
+ ScrubMap::object &obj, const std::string &name)
+{
+ if (obj.attrs.count(name)) {
+ bufferlist bl;
+ bl.push_back(obj.attrs[name]);
+ return bl;
+ } else {
+ return std::nullopt;
+ }
+}
+
+template <typename T>
+void so_set_attr_type(
+ ScrubMap::object &obj, const std::string &name,
+ const std::optional<T> &v)
+{
+ if (v) {
+ bufferlist bl;
+ encode(*v, bl, CEPH_FEATURES_ALL);
+ so_set_attr(obj, name, std::move(bl));
+ } else {
+ obj.attrs.erase(name);
+ }
+}
+
+template <typename T>
+std::optional<T> so_get_attr_type(ScrubMap::object &obj, const std::string &name)
+{
+ auto maybe_bl = so_get_attr(obj, name);
+ if (!maybe_bl) {
+ return std::nullopt;
+ }
+ auto bl = std::move(*maybe_bl);
+ try {
+ T ret;
+ auto bliter = bl.cbegin();
+ decode(ret, bliter);
+ return ret;
+ } catch (...) {
+ return std::nullopt;
+ }
+}
+
+void so_set_oi(ScrubMap::object &obj, const std::optional<object_info_t> &oi)
+{
+ return so_set_attr_type<object_info_t>(obj, OI_ATTR, oi);
+}
+
+std::optional<object_info_t> so_get_oi(ScrubMap::object &obj)
+{
+ return so_get_attr_type<object_info_t>(obj, OI_ATTR);
+}
+
+template <typename F>
+void so_mut_oi(ScrubMap::object &obj, F &&f) {
+ so_set_oi(obj, std::invoke(std::forward<F>(f), so_get_oi(obj)));
+}
+
+void so_set_ss(ScrubMap::object &obj, const std::optional<SnapSet> &ss)
+{
+ return so_set_attr_type<SnapSet>(obj, SS_ATTR, ss);
+}
+
+std::optional<SnapSet> so_get_ss(ScrubMap::object &obj)
+{
+ return so_get_attr_type<SnapSet>(obj, SS_ATTR);
+}
+
+template <typename F>
+void so_mut_ss(ScrubMap::object &obj, F &&f) {
+ so_set_ss(obj, std::invoke(std::forward<F>(f), so_get_ss(obj)));
+}
+
+void so_set_hinfo(
+ ScrubMap::object &obj, const std::optional<ECUtil::HashInfo> &hinfo)
+{
+ return so_set_attr_type<ECUtil::HashInfo>(obj, ECUtil::get_hinfo_key(), hinfo);
+}
+
+std::optional<ECUtil::HashInfo> so_get_hinfo(ScrubMap::object &obj)
+{
+ return so_get_attr_type<ECUtil::HashInfo>(obj, ECUtil::get_hinfo_key());
+}
+
+template <typename F>
+void so_mut_hinfo(ScrubMap::object &obj, F &&f) {
+ auto maybe_hinfo = so_get_hinfo(obj);
+ auto new_maybe_hinfo = std::invoke(std::forward<F>(f), std::move(maybe_hinfo));
+ so_set_hinfo(obj, new_maybe_hinfo);
+}
+
+/**
+ * so_builder_t
+ *
+ * Utility class for constructing test objects.
+ */
+struct so_builder_t {
+ ScrubMap::object so;
+
+ void set_defaults() {
+ so.size = 0;
+ so_mut_oi(so, [](auto maybe_oi) {
+ if (maybe_oi) {
+ maybe_oi->size = 0;
+ }
+ return maybe_oi;
+ });
+ }
+
+ static hobject_t make_hoid(std::string name, snapid_t cloneid=CEPH_NOSNAP) {
+ auto oid = object_t(name);
+ return hobject_t{
+ oid,
+ "",
+ cloneid,
+ static_cast<uint32_t>(std::hash<object_t>()(oid)),
+ 1,
+ ""
+ };
+ }
+
+ static so_builder_t make_head(std::string name) {
+ auto hoid = make_hoid(name);
+ so_builder_t ret;
+ so_set_oi(ret.so, object_info_t{hoid});
+ so_set_ss(ret.so, SnapSet{});
+ ret.set_defaults();
+ return ret;
+ }
+
+ static so_builder_t make_clone(
+ std::string name,
+ snapid_t cloneid = 4
+ ) {
+ auto hoid = make_hoid(name, cloneid);
+ so_builder_t ret;
+ so_set_oi(ret.so, object_info_t{hoid});
+ ret.set_defaults();
+ return ret;
+ }
+
+ static so_builder_t make_ec_head(std::string name) {
+ auto ret = make_head(name);
+ so_set_hinfo(ret.so, ECUtil::HashInfo{});
+ return ret;
+ }
+
+ static so_builder_t make_ec_clone(
+ std::string name,
+ snapid_t cloneid = 4
+ ) {
+ auto ret = make_clone(name, cloneid);
+ so_set_hinfo(ret.so, ECUtil::HashInfo{});
+ return ret;
+ }
+
+ so_builder_t &set_size(
+ size_t size,
+ const std::optional<ECUtil::stripe_info_t> stripe_info = std::nullopt) {
+ if (stripe_info) {
+ so.size = stripe_info->logical_to_next_chunk_offset(size);
+ } else {
+ so.size = size;
+ }
+
+ so_mut_oi(so, [size](auto maybe_oi) {
+ if (maybe_oi) {
+ maybe_oi->size = size;
+ }
+ return maybe_oi;
+ });
+ so_mut_hinfo(so, [size, &stripe_info](auto maybe_hinfo) {
+ if (maybe_hinfo) {
+ ceph_assert(stripe_info);
+ maybe_hinfo->set_total_chunk_size_clear_hash(
+ stripe_info->logical_to_next_chunk_offset(size));
+ }
+ return maybe_hinfo;
+ });
+ return *this;
+ }
+
+ so_builder_t &add_attr(const std::string &name, size_t len) {
+ so_set_attr_len(so, name, len);
+ return *this;
+ }
+
+ ScrubMap::object get() const {
+ return so;
+ }
+};
+
+/**
+ * test_obj_t
+ *
+ * test param combining an so_builder_t with human readable description with
+ * a stripe_info.
+ */
+struct test_obj_t : so_builder_t {
+ std::optional<ECUtil::stripe_info_t> stripe_info;
+ std::string desc;
+ hobject_t hoid;
+
+ test_obj_t(
+ so_builder_t _builder,
+ std::optional<ECUtil::stripe_info_t> _stripe_info,
+ std::string _desc,
+ hobject_t _hoid) :
+ so_builder_t(std::move(_builder)),
+ stripe_info(std::move(_stripe_info)),
+ desc(std::move(_desc)),
+ hoid(std::move(_hoid)) {
+ ceph_assert(!desc.empty());
+ }
+
+ static test_obj_t make(
+ const std::string &desc,
+ std::optional<ECUtil::stripe_info_t> stripe_info,
+ so_builder_t builder) {
+ hobject_t hoid = so_get_oi(builder.so)->soid;
+ return test_obj_t{
+ std::move(builder),
+ stripe_info,
+ desc,
+ std::move(hoid)};
+ }
+
+ template <typename... Args>
+ static test_obj_t make_head(const std::string &desc, Args&&... args) {
+ return make(
+ desc,
+ std::nullopt,
+ so_builder_t::make_head(std::forward<Args>(args)...));
+ }
+
+ template <typename... Args>
+ static test_obj_t make_clone(const std::string &desc, Args&&... args) {
+ return make(
+ desc,
+ std::nullopt,
+ so_builder_t::make_clone(std::forward<Args>(args)...));
+ }
+
+ template <typename... Args>
+ static test_obj_t make_ec_head(const std::string &desc, Args&&... args) {
+ return make(
+ desc,
+ ECUtil::stripe_info_t{4, 1<<20},
+ so_builder_t::make_ec_head(std::forward<Args>(args)...));
+ }
+
+ template <typename... Args>
+ static test_obj_t make_ec_clone(const std::string &desc, Args&&... args) {
+ return make(
+ desc,
+ ECUtil::stripe_info_t{4, 1<<20},
+ so_builder_t::make_ec_clone(std::forward<Args>(args)...));
+ }
+
+ test_obj_t &set_size(
+ size_t size) {
+ so_builder_t::set_size(size, stripe_info);
+ return *this;
+ }
+
+ test_obj_t &add_attr(const std::string &name, size_t len) {
+ so_builder_t::add_attr(name, len);
+ return *this;
+ }
+
+ ScrubMap::object get() const {
+ return so_builder_t::get();
+ }
+};
+
+/**
+ * Interface for a test case on a single object.
+ */
+struct SingleErrorTestCase {
+ /// Describes limitations on test preconditions
+ enum class restriction_t {
+ NONE, /// No limitations
+ REPLICA_ONLY, /// Only works if injected on replica
+ EC_ONLY, /// Only valid for ec objects
+ HEAD_ONLY /// Only valid for head objects
+ };
+
+ /// returns human-readable string describing the test for debugging
+ virtual std::string_view get_description() const = 0;
+
+ /// returns test_obj_t with error injected
+ virtual test_obj_t adjust_base_object(test_obj_t ret) const {
+ return ret;
+ }
+
+ /// returns test_obj_t with error injected
+ virtual test_obj_t inject_error(test_obj_t) const = 0;
+
+ /// returns expected shard error
+ virtual librados::err_t get_shard_error_sig() const = 0;
+
+ /// returns expected object error
+ virtual librados::obj_err_t get_object_error_sig() const = 0;
+
+ /// returns true if test should be run with passed restriction
+ virtual bool valid_for_restriction(restriction_t restriction) const = 0;
+
+ virtual ~SingleErrorTestCase() = default;
+};
+
+/// Utility template for implementing SimpleErrorTestCase
+template <typename T>
+struct SingleErrorTestCaseT : SingleErrorTestCase {
+ /// Defaults for REQUIRE_EC and REQUIRES_HEAD
+ constexpr static bool REQUIRES_EC = false;
+ constexpr static bool REQUIRES_HEAD = false;
+
+ /* Every implementor must define:
+ constexpr static librados::err_t shard_error_sig{
+ };
+ constexpr static librados::obj_err_t object_error_sig{
+ };
+ */
+
+ librados::err_t get_shard_error_sig() const final {
+ return T::shard_error_sig;
+ }
+ librados::obj_err_t get_object_error_sig() const final {
+ return T::object_error_sig;
+ }
+
+ constexpr static bool requires_ec() {
+ return T::REQUIRES_EC;
+ }
+ constexpr static bool requires_head() {
+ return T::REQUIRES_HEAD;
+ }
+ constexpr static bool requires_replica() {
+ /* If there are no shard_errors, we'll take primary to be authoritative. */
+ return T::shard_error_sig.errors == 0;
+ }
+
+ bool valid_for_restriction(restriction_t restriction) const final {
+ // There aren't currently any tests with two restrictions, if this
+ // changes, the suite instantiations will need to change as well.
+ static_assert(
+ (requires_ec() + requires_head() + requires_replica()) <= 1);
+ return [] {
+ if constexpr (requires_replica()) {
+ return restriction_t::REPLICA_ONLY;
+ } else if constexpr (requires_head()) {
+ return restriction_t::HEAD_ONLY;
+ } else if constexpr (requires_ec()) {
+ return restriction_t::EC_ONLY;
+ } else {
+ return restriction_t::NONE;
+ }
+ }() == restriction;
+ }
+ virtual ~SingleErrorTestCaseT() = default;
+};
+
+/* The following classes exercise each possible error code detected
+ * by evaluate_object_shard and compare_candidate_to_authoritative
+ * in crimson/osd/scrub/scrub_validator.*
+ *
+ * Note, any newly added cases must also be added to the test_cases
+ * array below.
+ */
+
+struct ECHashMismatch : SingleErrorTestCaseT<ECHashMismatch> {
+ constexpr static librados::err_t shard_error_sig{
+ librados::err_t::SHARD_EC_HASH_MISMATCH
+ };
+ constexpr static librados::obj_err_t object_error_sig{
+ };
+
+ std::string_view get_description() const {
+ return "ECHashMismatch";
+ };
+ test_obj_t inject_error(test_obj_t obj) const {
+ obj.so.ec_hash_mismatch = true;
+ return obj;
+ }
+};
+
+struct ECSizeMismatch : SingleErrorTestCaseT<ECSizeMismatch> {
+ constexpr static librados::err_t shard_error_sig{
+ librados::err_t::SHARD_EC_SIZE_MISMATCH
+ };
+ constexpr static librados::obj_err_t object_error_sig{
+ };
+
+ std::string_view get_description() const {
+ return "ECSizeMismatch";
+ };
+ test_obj_t inject_error(test_obj_t obj) const {
+ obj.so.ec_size_mismatch = true;
+ return obj;
+ }
+};
+
+struct ReadError : SingleErrorTestCaseT<ReadError> {
+ constexpr static librados::err_t shard_error_sig{
+ librados::err_t::SHARD_READ_ERR
+ };
+ constexpr static librados::obj_err_t object_error_sig{};
+
+ std::string_view get_description() const {
+ return "ReadError";
+ };
+ test_obj_t inject_error(test_obj_t obj) const {
+ obj.so.read_error = true;
+ return obj;
+ }
+};
+
+struct StatError : SingleErrorTestCaseT<StatError> {
+ constexpr static librados::err_t shard_error_sig{
+ librados::err_t::SHARD_STAT_ERR
+ };
+ constexpr static librados::obj_err_t object_error_sig{
+ };
+
+ std::string_view get_description() const {
+ return "StatError";
+ };
+ test_obj_t inject_error(test_obj_t obj) const {
+ obj.so.stat_error = true;
+ return obj;
+ }
+};
+
+struct MissingOI : SingleErrorTestCaseT<MissingOI> {
+ constexpr static librados::err_t shard_error_sig{
+ librados::err_t::INFO_MISSING
+ };
+ constexpr static librados::obj_err_t object_error_sig{
+ librados::obj_err_t::OBJECT_INFO_INCONSISTENCY
+ };
+
+ std::string_view get_description() const {
+ return "MissingOI";
+ };
+ test_obj_t inject_error(test_obj_t obj) const {
+ so_mut_oi(obj.so, [](auto) { return std::nullopt; });
+ return obj;
+ }
+};
+
+struct CorruptOI: SingleErrorTestCaseT<CorruptOI> {
+ constexpr static librados::err_t shard_error_sig{
+ librados::err_t::INFO_CORRUPTED
+ };
+ constexpr static librados::obj_err_t object_error_sig{
+ librados::obj_err_t::OBJECT_INFO_INCONSISTENCY
+ };
+
+ std::string_view get_description() const {
+ return "CorruptOI";
+ };
+ test_obj_t inject_error(test_obj_t obj) const {
+ so_set_attr_len(obj.so, OI_ATTR, 10);
+ return obj;
+ }
+};
+
+struct CorruptOndiskSize : SingleErrorTestCaseT<CorruptOndiskSize> {
+ constexpr static librados::err_t shard_error_sig{
+ librados::err_t::SIZE_MISMATCH_INFO
+ };
+ constexpr static librados::obj_err_t object_error_sig{
+ librados::obj_err_t::SIZE_MISMATCH
+ };
+
+ std::string_view get_description() const {
+ return "CorruptOndiskSize";
+ };
+ test_obj_t inject_error(test_obj_t obj) const {
+ obj.so.size += 2;
+ return obj;
+ }
+};
+
+struct MissingSS : SingleErrorTestCaseT<MissingSS> {
+ constexpr static librados::err_t shard_error_sig{
+ librados::err_t::SNAPSET_MISSING
+ };
+ constexpr static librados::obj_err_t object_error_sig{
+ librados::obj_err_t::SNAPSET_INCONSISTENCY
+ };
+ constexpr static bool REQUIRES_HEAD = true;
+
+ std::string_view get_description() const {
+ return "MissingSS";
+ };
+ test_obj_t inject_error(test_obj_t obj) const {
+ ceph_assert(obj.hoid.is_head());
+ so_mut_ss(obj.so, [](auto) { return std::nullopt; });
+ return obj;
+ }
+};
+
+struct CorruptSS : SingleErrorTestCaseT<CorruptSS> {
+ constexpr static librados::err_t shard_error_sig{
+ librados::err_t::SNAPSET_CORRUPTED
+ };
+ constexpr static librados::obj_err_t object_error_sig{
+ librados::obj_err_t::SNAPSET_INCONSISTENCY
+ };
+ constexpr static bool REQUIRES_HEAD = true;
+
+ std::string_view get_description() const {
+ return "CorruptSS";
+ };
+ test_obj_t inject_error(test_obj_t obj) const {
+ ceph_assert(obj.hoid.is_head());
+ so_set_attr_len(obj.so, SS_ATTR, 10);
+ return obj;
+ }
+};
+
+struct MissingHinfo : SingleErrorTestCaseT<MissingHinfo> {
+ constexpr static librados::err_t shard_error_sig{
+ librados::err_t::HINFO_MISSING
+ };
+ constexpr static librados::obj_err_t object_error_sig{
+ librados::obj_err_t::HINFO_INCONSISTENCY
+ };
+ constexpr static bool REQUIRES_EC = true;
+
+ std::string_view get_description() const {
+ return "MissingHinfo";
+ };
+ test_obj_t inject_error(test_obj_t obj) const {
+ ceph_assert(obj.stripe_info);
+ so_mut_hinfo(obj.so, [](auto) { return std::nullopt; });
+ return obj;
+ }
+};
+
+struct CorruptHinfo : SingleErrorTestCaseT<CorruptHinfo> {
+ constexpr static librados::err_t shard_error_sig{
+ librados::err_t::HINFO_CORRUPTED
+ };
+ constexpr static librados::obj_err_t object_error_sig{
+ librados::obj_err_t::HINFO_INCONSISTENCY
+ };
+ constexpr static bool REQUIRES_EC = true;
+
+ std::string_view get_description() const {
+ return "CorruptHinfo";
+ };
+ test_obj_t inject_error(test_obj_t obj) const {
+ ceph_assert(obj.stripe_info);
+ so_set_attr_len(obj.so, ECUtil::get_hinfo_key(), 10);
+ return obj;
+ }
+};
+
+struct DataDigestMismatch : SingleErrorTestCaseT<DataDigestMismatch> {
+ constexpr static librados::err_t shard_error_sig{
+ librados::err_t::DATA_DIGEST_MISMATCH_INFO
+ };
+ constexpr static librados::obj_err_t object_error_sig{
+ librados::obj_err_t::DATA_DIGEST_MISMATCH
+ };
+
+ std::string_view get_description() const {
+ return "DataDigestMismatch";
+ };
+ test_obj_t adjust_base_object(test_obj_t obj) const {
+ so_mut_oi(obj.so, [](auto maybe_oi) {
+ ceph_assert(maybe_oi);
+ maybe_oi->set_data_digest(1);
+ return maybe_oi;
+ });
+ obj.so.digest_present = true;
+ obj.so.digest = 1;
+ return obj;
+ }
+ test_obj_t inject_error(test_obj_t obj) const {
+ ceph_assert(so_get_oi(obj.so)->is_data_digest());
+ obj.so.digest = 2;
+ return obj;
+ }
+};
+
+struct OmapDigestMismatch : SingleErrorTestCaseT<OmapDigestMismatch> {
+ constexpr static librados::err_t shard_error_sig{
+ librados::err_t::OMAP_DIGEST_MISMATCH_INFO
+ };
+ constexpr static librados::obj_err_t object_error_sig{
+ librados::obj_err_t::OMAP_DIGEST_MISMATCH
+ };
+
+ std::string_view get_description() const {
+ return "OmapDigestMismatch";
+ };
+ test_obj_t adjust_base_object(test_obj_t obj) const {
+ so_mut_oi(obj.so, [](auto maybe_oi) {
+ ceph_assert(maybe_oi);
+ maybe_oi->set_omap_digest(1);
+ return maybe_oi;
+ });
+ obj.so.omap_digest_present = true;
+ obj.so.omap_digest = 1;
+ return obj;
+ }
+ test_obj_t inject_error(test_obj_t obj) const {
+ ceph_assert(so_get_oi(obj.so)->is_omap_digest());
+ obj.so.omap_digest = 2;
+ return obj;
+ }
+};
+
+struct ExtraAttribute : SingleErrorTestCaseT<ExtraAttribute> {
+ constexpr static librados::err_t shard_error_sig{};
+ constexpr static librados::obj_err_t object_error_sig{
+ librados::obj_err_t::ATTR_NAME_MISMATCH
+ };
+
+ std::string_view get_description() const {
+ return "ExtraAttribute";
+ };
+ test_obj_t inject_error(test_obj_t obj) const {
+ so_set_attr_len(obj.so, "attr_added_erroneously", 10);
+ return obj;
+ }
+};
+
+struct MissingAttribute : SingleErrorTestCaseT<MissingAttribute> {
+ constexpr static librados::err_t shard_error_sig{};
+ constexpr static librados::obj_err_t object_error_sig{
+ librados::obj_err_t::ATTR_NAME_MISMATCH
+ };
+
+ std::string_view get_description() const {
+ return "MissingAttribute";
+ };
+ test_obj_t adjust_base_object(test_obj_t obj) const {
+ so_set_attr_len(obj.so, "attr_to_be_missing", 10);
+ return obj;
+ }
+ test_obj_t inject_error(test_obj_t obj) const {
+ obj.so.attrs.erase("attr_to_be_missing");
+ return obj;
+ }
+};
+
+template <>
+struct fmt::formatter<SingleErrorTestCase> {
+ constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
+
+ template <typename FormatContext>
+ auto format(const auto &test_case, FormatContext& ctx) const
+ {
+ return fmt::format_to(
+ ctx.out(), "{}",
+ test_case.get_description());
+ }
+};
+
+std::unique_ptr<SingleErrorTestCase> test_cases[] = {
+ std::make_unique<ECHashMismatch>(),
+ std::make_unique<ECSizeMismatch>(),
+ std::make_unique<ReadError>(),
+ std::make_unique<StatError>(),
+ std::make_unique<MissingOI>(),
+ std::make_unique<CorruptOI>(),
+ std::make_unique<CorruptOndiskSize>(),
+ std::make_unique<MissingSS>(),
+ std::make_unique<CorruptSS>(),
+ std::make_unique<MissingHinfo>(),
+ std::make_unique<CorruptHinfo>(),
+ std::make_unique<DataDigestMismatch>(),
+ std::make_unique<OmapDigestMismatch>(),
+ std::make_unique<ExtraAttribute>(),
+ std::make_unique<MissingAttribute>()
+};
+const SingleErrorTestCase *to_ptr(
+ const std::unique_ptr<SingleErrorTestCase> &tc) {
+ return tc.get();
+}
+// iterator over the above set as pointers
+using test_case_ptr_iter_t = boost::transform_iterator<
+ std::function<decltype(to_ptr)>, decltype(std::begin(test_cases))>;
+template <SingleErrorTestCase::restriction_t restriction>
+struct test_case_filter_t {
+ bool operator()(const SingleErrorTestCase *tc) const {
+ return tc->valid_for_restriction(restriction);
+ }
+};
+template <SingleErrorTestCase::restriction_t restriction>
+// iterator over the above set filtered by restriction
+using test_case_filter_iter_t = boost::filter_iterator<
+ test_case_filter_t<restriction>,
+ test_case_ptr_iter_t>;
+template <SingleErrorTestCase::restriction_t restriction>
+// begin and end, used below to instantiate test suites
+auto test_cases_begin() {
+ return test_case_filter_iter_t<restriction>(
+ test_case_filter_t<restriction>(),
+ test_case_ptr_iter_t(std::begin(test_cases), to_ptr),
+ test_case_ptr_iter_t(std::end(test_cases), to_ptr));
+}
+template <SingleErrorTestCase::restriction_t restriction>
+auto test_cases_end() {
+ return test_case_filter_iter_t<restriction>(
+ test_case_filter_t<restriction>(),
+ test_case_ptr_iter_t(std::end(test_cases), to_ptr),
+ test_case_ptr_iter_t(std::end(test_cases), to_ptr));
+}
+
+/// tuple defining each generated test case
+using single_error_test_param_t = std::tuple<
+ test_obj_t, /// initial test object
+ bool, /// inject on primary?
+ const SingleErrorTestCase* /// test case
+ >;
+template <>
+struct fmt::formatter<single_error_test_param_t> {
+ constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
+
+ template <typename FormatContext>
+ auto format(const auto ¶m, FormatContext& ctx) const
+ {
+ const auto &[obj, is_primary, test_case] = param;
+ return fmt::format_to(
+ ctx.out(), "{}{}{}",
+ obj.desc,
+ is_primary ? "Primary" : "Replica",
+ test_case->get_description());
+ }
+};
+std::ostream &operator<<(std::ostream &out, const single_error_test_param_t &p)
+{
+ return out << fmt::format("{}", p);
+}
+
+class TestSingleError :
+ public testing::TestWithParam<single_error_test_param_t> {
+};
+
+/**
+ * compare_error_signatures
+ *
+ * Generic helper for comparing err_t, obj_err_t, and
+ * inconsistent_snapset_t with descriptive output.
+ */
+auto compare_error_signatures(const auto &lh, const auto &rh)
+{
+ if (lh.errors == rh.errors) {
+ return ::testing::AssertionSuccess() << fmt::format(
+ "Signature match: {}", lh);
+ } else {
+ return ::testing::AssertionFailure() << fmt::format(
+ "Signature mismatch: {} should be {}",
+ lh, rh);
+ }
+}
+
+TEST_P(TestSingleError, SingleError) {
+ const auto &[_obj, is_primary, test_case] = GetParam();
+ auto obj = test_case->adjust_base_object(_obj);
+
+ const pg_shard_t primary(0, shard_id_t::NO_SHARD);
+ const pg_shard_t replica(1, shard_id_t::NO_SHARD);
+ crimson::osd::scrub::chunk_validation_policy_t policy {
+ primary,
+ obj.stripe_info,
+ TEST_MAX_OBJECT_SIZE,
+ std::string{TEST_INTERNAL_NAMESPACE},
+ TEST_OMAP_KEY_LIMIT,
+ TEST_OMAP_BYTES_LIMIT
+ };
+ const pg_shard_t &target = is_primary ? primary : replica;
+ const std::vector<pg_shard_t> shards = {
+ primary, replica
+ };
+
+ auto with_error = test_case->inject_error(obj);
+ crimson::osd::scrub::scrub_map_set_t maps;
+ for (const auto &osd : shards) {
+ if (osd == target) {
+ maps[osd].objects[obj.hoid] = with_error.get();
+ } else {
+ maps[osd].objects[obj.hoid] = obj.get();
+ }
+ }
+
+ DoutPrefix dpp(nullptr, ceph_subsys_test, "test_crimson_scrub");
+ const auto ret = crimson::osd::scrub::validate_chunk(
+ dpp, policy, maps);
+ const auto &object_errors = ret.object_errors;
+
+ ASSERT_EQ(object_errors.size(), 1) << fmt::format(
+ "{}: generated an incorrect number of errors: {}\n",
+ *test_case, object_errors);
+
+ auto &obj_error = object_errors.front();
+
+ EXPECT_EQ(
+ ret.stats.num_shallow_scrub_errors,
+ (obj_error.has_shallow_errors() ||
+ obj_error.union_shards.has_shallow_errors()) +
+ ret.snapset_errors.size());
+ EXPECT_EQ(
+ ret.stats.num_deep_scrub_errors,
+ (obj_error.has_deep_errors() ||
+ obj_error.union_shards.has_deep_errors()));
+
+ EXPECT_TRUE(compare_error_signatures(
+ static_cast<const librados::obj_err_t&>(obj_error),
+ test_case->get_object_error_sig()));
+
+ EXPECT_EQ(obj_error.shards.size(), shards.size());
+ bool found_selected_oi = false;
+ for (const auto &shard : shards) {
+ auto siter = obj_error.shards.find(
+ librados::osd_shard_t(shard.osd, shard.shard)
+ );
+ if (siter == obj_error.shards.end()) {
+ EXPECT_NE(siter, obj_error.shards.end());
+ continue;
+ }
+ if (shard == target) {
+ EXPECT_TRUE(compare_error_signatures(
+ static_cast<const librados::err_t&>(siter->second),
+ test_case->get_shard_error_sig()));
+ } else {
+ EXPECT_FALSE(siter->second.has_errors());
+ if (siter->second.selected_oi) found_selected_oi = true;
+ }
+ if (shard == primary) {
+ EXPECT_TRUE(siter->second.primary);
+ }
+ }
+ EXPECT_TRUE(found_selected_oi);
+}
+
+/* Tests that don't have restrictions */
+INSTANTIATE_TEST_SUITE_P(
+ SingleErrorGeneral,
+ TestSingleError,
+ ::testing::Combine(
+ ::testing::Values(
+ test_obj_t::make_head("Small", "foo").set_size(64),
+ test_obj_t::make_clone("EmptyWithAttr", "foo2").add_attr("extra_attr", 64),
+ test_obj_t::make_head("ReplicatedRBD", "foo2").set_size(4<<20),
+ test_obj_t::make_ec_head("ECHead", "foo").set_size(4<<20),
+ test_obj_t::make_ec_clone("LargeECClone", "foo").set_size(16<<20)
+ ),
+ ::testing::Bool(),
+ ::testing::ValuesIn(
+ test_cases_begin<SingleErrorTestCase::restriction_t::NONE>(),
+ test_cases_end<SingleErrorTestCase::restriction_t::NONE>())
+ ),
+ [](const auto &info) {
+ return fmt::format("{}", info.param);
+ }
+);
+
+/* Some tests don't trigger shard errors, so we can't actually tell which
+ * replica is wrong. Such tests are written for the error to be injected
+ * on the replica. */
+INSTANTIATE_TEST_SUITE_P(
+ SingleErrorPrimaryOnly,
+ TestSingleError,
+ ::testing::Combine(
+ ::testing::Values(
+ test_obj_t::make_head("Small", "foo").set_size(64),
+ test_obj_t::make_clone("EmptyWithAttr", "foo2").add_attr("extra_attr", 64),
+ test_obj_t::make_head("ReplicatedRBD", "foo2").set_size(4<<20),
+ test_obj_t::make_ec_head("ECHead", "foo").set_size(4<<20),
+ test_obj_t::make_ec_clone("LargeECClone", "foo").set_size(16<<20)
+ ),
+ ::testing::Values(false), // replica only
+ ::testing::ValuesIn(
+ test_cases_begin<SingleErrorTestCase::restriction_t::REPLICA_ONLY>(),
+ test_cases_end<SingleErrorTestCase::restriction_t::REPLICA_ONLY>())
+ ),
+ [](const auto &info) {
+ return fmt::format("{}", info.param);
+ }
+);
+
+/* Some tests only make sense on ec objects. */
+INSTANTIATE_TEST_SUITE_P(
+ SingleErrorOnly,
+ TestSingleError,
+ ::testing::Combine(
+ ::testing::Values(
+ test_obj_t::make_ec_head("ECHead", "foo").set_size(4<<20),
+ test_obj_t::make_ec_clone("LargeECClone", "foo").set_size(16<<20)
+ ),
+ ::testing::Bool(),
+ ::testing::ValuesIn(
+ test_cases_begin<SingleErrorTestCase::restriction_t::EC_ONLY>(),
+ test_cases_end<SingleErrorTestCase::restriction_t::EC_ONLY>())
+ ),
+ [](const auto &info) {
+ return fmt::format("{}", info.param);
+ }
+);
+
+/* Some tests only make sense on head objects. */
+INSTANTIATE_TEST_SUITE_P(
+ SingleErrorHEAD,
+ TestSingleError,
+ ::testing::Combine(
+ ::testing::Values(
+ test_obj_t::make_head("Small", "foo").set_size(64),
+ test_obj_t::make_head("ReplicatedRBD", "foo2").set_size(4<<20),
+ test_obj_t::make_ec_head("ECHead", "foo").set_size(4<<20)
+ ),
+ ::testing::Bool(),
+ ::testing::ValuesIn(
+ test_cases_begin<SingleErrorTestCase::restriction_t::HEAD_ONLY>(),
+ test_cases_end<SingleErrorTestCase::restriction_t::HEAD_ONLY>())
+ ),
+ [](const auto &info) {
+ return fmt::format("{}", info.param);
+ }
+);
+
+using test_clone_spec_t = std::pair<
+ snapid_t, // clone id
+ size_t // clone size
+ >;
+
+/// descending order of clone id
+using test_clone_list_t = std::vector<test_clone_spec_t>;
+
+/**
+ * snapset_test_case_t
+ *
+ * This descriptor can express 3 types of error
+ * - missing clone
+ * - extra clone
+ * - clone size mismatch
+ * in 4 positions using one bit for each pair.
+ */
+class snapset_test_case_t {
+ uint32_t signature;
+
+ snapset_test_case_t(uint32_t signature) : signature(signature) {}
+
+ constexpr static uint32_t POSITION_BITS = 4;
+ constexpr static uint32_t position_mask[] = {
+ 0x1, 0x2, 0x4, 0x8
+ };
+ constexpr static unsigned MAX_POS = std::size(position_mask);
+
+ constexpr static uint32_t MIN_VALID = 0;
+ constexpr static uint32_t MAX_VALID = 0xFFF;
+ enum type_t {
+ MISSING = 0,
+ EXTRA,
+ SIZE
+ };
+
+ bool should_inject(type_t type, unsigned position) const {
+ ceph_assert(position < MAX_POS);
+ return (signature >> (type * POSITION_BITS)) & position_mask[position];
+ }
+ static snapset_test_case_t make(type_t type, unsigned position) {
+ ceph_assert(position < std::size(position_mask));
+ return snapset_test_case_t{
+ position_mask[position] << (type * POSITION_BITS)
+ };
+ }
+ static auto generate_single_errors(type_t type) {
+ std::vector<snapset_test_case_t> ret;
+ ret.reserve(std::size(position_mask));
+ for (unsigned i = 0; i < MAX_POS; ++i) {
+ ret.push_back(make(type, i));
+ }
+ return ret;
+ }
+
+public:
+ constexpr static unsigned get_max_pos() { return MAX_POS; }
+
+ bool should_inject_missing(unsigned position) const {
+ return should_inject(MISSING, position);
+ }
+ bool should_inject_extra(unsigned position) const {
+ return should_inject(EXTRA, position);
+ }
+ bool should_inject_size(unsigned position) const {
+ return should_inject(SIZE, position);
+ }
+
+ static auto generate_single_missing_errors() {
+ return generate_single_errors(MISSING);
+ }
+ static auto generate_single_extra_errors() {
+ return generate_single_errors(EXTRA);
+ }
+ static auto generate_single_size_errors() {
+ return generate_single_errors(SIZE);
+ }
+ static auto generate_random_errors(size_t num, int seed = 0) {
+ std::default_random_engine e1(seed);
+ std::uniform_int_distribution<uint32_t> uniform_dist(1, MAX_VALID);
+
+ std::vector<snapset_test_case_t> ret;
+ ret.reserve(num);
+ for (unsigned i = 0; i < num; ++i) {
+ ret.push_back(snapset_test_case_t{uniform_dist(e1)});
+ }
+ return ret;
+ }
+ friend std::ostream &operator<<(std::ostream &out, snapset_test_case_t rhs);
+};
+std::ostream &operator<<(std::ostream &out, snapset_test_case_t rhs) {
+ for (auto &[s, type] :
+ std::vector<std::pair<std::string, snapset_test_case_t::type_t>>(
+ {{"M", snapset_test_case_t::MISSING},
+ {"E", snapset_test_case_t::EXTRA},
+ {"S", snapset_test_case_t::SIZE}})) {
+ out << s;
+ for (unsigned i = 0;
+ i < snapset_test_case_t::MAX_POS; ++i) {
+ if (rhs.should_inject(type, i)) {
+ out << i;
+ }
+ }
+ }
+ return out;
+}
+
+class TestSnapSetCloneError :
+ public testing::TestWithParam<snapset_test_case_t> {
+};
+
+
+SnapSet make_snapset(const test_clone_list_t &clone_list)
+{
+ SnapSet ss;
+ for (const auto &[cloneid, size] : clone_list) {
+ ss.clones.push_back(cloneid);
+ ss.clone_size[cloneid] = size;
+ ss.clone_overlap[cloneid];
+ ss.clone_snaps[cloneid].push_back(cloneid);
+ }
+ return ss;
+}
+
+std::pair<hobject_t, ScrubMap::object> make_clone(
+ std::string name, std::pair<snapid_t, size_t> in)
+{
+ ScrubMap ret;
+ auto [cloneid, size] = in;
+ hobject_t hoid = so_builder_t::make_hoid(name, in.first);
+ auto so = so_builder_t::make_clone(
+ name, cloneid);
+ so.set_size(size);
+ return std::make_pair(hoid, so.get());
+}
+
+TEST_P(TestSnapSetCloneError, CloneError) {
+ const pg_shard_t primary(0, shard_id_t::NO_SHARD);
+ crimson::osd::scrub::chunk_validation_policy_t policy {
+ primary,
+ std::nullopt,
+ TEST_MAX_OBJECT_SIZE,
+ std::string{TEST_INTERNAL_NAMESPACE},
+ TEST_OMAP_KEY_LIMIT,
+ TEST_OMAP_BYTES_LIMIT
+ };
+
+ crimson::osd::scrub::scrub_map_set_t maps;
+ const std::string name = "test_obj";
+ auto &map = maps[primary];
+ inconsistent_snapset_wrapper expected_error;
+
+ test_clone_list_t should_exist = {
+ { 10, 32 }, { 25, 64 }, { 50, 32 }, { 100, 64 }
+ };
+ test_clone_list_t extra = {
+ { 9, 64 }, { 11, 32 }, { 99, 64 }, { 101, 32 }
+ };
+
+ for (unsigned i = 0; i < snapset_test_case_t::get_max_pos(); ++i) {
+ hobject_t hoid = so_builder_t::make_hoid(name, should_exist[i].first);
+ if (!GetParam().should_inject_missing(i)) {
+ auto to_insert = make_clone(name, should_exist[i]);
+ if (GetParam().should_inject_size(i)) {
+ expected_error.set_size_mismatch();
+ to_insert.second = so_builder_t(to_insert.second).set_size(
+ so_get_oi(to_insert.second)->size + 1).get();
+ }
+ map.objects.insert(to_insert);
+ } else {
+ expected_error.set_clone_missing(should_exist[i].first);
+ }
+ if (GetParam().should_inject_extra(i)) {
+ map.objects.insert(make_clone(name, extra[i]));
+ expected_error.set_clone(extra[i].first);
+ }
+ }
+
+ hobject_t hoid = so_builder_t::make_hoid(name);
+ map.objects[hoid] = so_builder_t::make_head(name).get();
+
+ so_set_ss(map.objects[hoid], make_snapset(should_exist));
+
+ DoutPrefix dpp(nullptr, ceph_subsys_test, "test_crimson_scrub");
+ const auto ret = crimson::osd::scrub::validate_chunk(
+ dpp, policy, maps);
+ EXPECT_EQ(ret.object_errors.size(), 0);
+ ASSERT_EQ(ret.snapset_errors.size(), 1) << fmt::format(
+ "Got snapset_errors: {}", ret.snapset_errors);
+
+ EXPECT_TRUE(compare_error_signatures(
+ ret.snapset_errors.front(),
+ expected_error));
+
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ SingleMissing,
+ TestSnapSetCloneError,
+ ::testing::ValuesIn(snapset_test_case_t::generate_single_missing_errors())
+);
+
+INSTANTIATE_TEST_SUITE_P(
+ SingleExtra,
+ TestSnapSetCloneError,
+ ::testing::ValuesIn(snapset_test_case_t::generate_single_extra_errors())
+);
+
+INSTANTIATE_TEST_SUITE_P(
+ SingleSize,
+ TestSnapSetCloneError,
+ ::testing::ValuesIn(snapset_test_case_t::generate_single_size_errors())
+);
+
+INSTANTIATE_TEST_SUITE_P(
+ MultipleRandom,
+ TestSnapSetCloneError,
+ ::testing::ValuesIn(snapset_test_case_t::generate_random_errors(100))
+);
+
+TEST(TestSnapSet, MissingHead) {
+ const pg_shard_t primary(0, shard_id_t::NO_SHARD);
+ crimson::osd::scrub::chunk_validation_policy_t policy {
+ primary,
+ std::nullopt,
+ TEST_MAX_OBJECT_SIZE,
+ std::string{TEST_INTERNAL_NAMESPACE},
+ TEST_OMAP_KEY_LIMIT,
+ TEST_OMAP_BYTES_LIMIT
+ };
+
+ crimson::osd::scrub::scrub_map_set_t maps;
+ inconsistent_snapset_wrapper expected_error;
+
+ test_clone_list_t clones = {
+ { 10, 64 }, { 25, 32 }, { 50, 64 }, { 100, 32 }
+ };
+ for (const auto &desc : test_clone_list_t{clones}) {
+ maps[primary].objects.emplace(make_clone("test_object", desc));
+ }
+ expected_error.set_headless();
+
+
+ DoutPrefix dpp(nullptr, ceph_subsys_test, "test_crimson_scrub");
+ const auto ret = crimson::osd::scrub::validate_chunk(
+ dpp, policy, maps);
+ EXPECT_EQ(ret.object_errors.size(), 0);
+ ASSERT_EQ(ret.snapset_errors.size(), 1) << fmt::format(
+ "Got snapset_errors: {}", ret.snapset_errors);
+
+ EXPECT_TRUE(compare_error_signatures(
+ ret.snapset_errors.front(),
+ expected_error));
+
+}
+
+TEST(TestSnapSet, Stats) {
+ const pg_shard_t primary(0, shard_id_t::NO_SHARD);
+ crimson::osd::scrub::chunk_validation_policy_t policy {
+ primary,
+ std::nullopt,
+ TEST_MAX_OBJECT_SIZE,
+ std::string{TEST_INTERNAL_NAMESPACE},
+ TEST_OMAP_KEY_LIMIT,
+ TEST_OMAP_BYTES_LIMIT
+ };
+
+
+ object_stat_sum_t expected_stats;
+ crimson::osd::scrub::scrub_map_set_t maps;
+ auto &objs = maps[primary].objects;
+
+ unsigned num = 0;
+ auto add_simple_head = [&](size_t size, auto &&f)
+ -> ScrubMap::object & {
+ auto name = fmt::format("obj-{}", ++num);
+ auto hoid = so_builder_t::make_hoid(name);
+ auto obj = so_builder_t::make_head(name).set_size(size).get();
+ so_mut_oi(obj, std::forward<decltype(f)>(f));
+ expected_stats.num_bytes += size;
+ expected_stats.num_objects++;
+ return objs[hoid] = obj;
+ };
+
+ add_simple_head(64, [&expected_stats](auto maybe_oi) {
+ ceph_assert(maybe_oi);
+ maybe_oi->set_flag(object_info_t::FLAG_DIRTY);
+ expected_stats.num_objects_dirty++;
+ return maybe_oi;
+ });
+
+ add_simple_head(128, [&expected_stats](auto maybe_oi) {
+ ceph_assert(maybe_oi);
+ maybe_oi->set_flag(object_info_t::FLAG_MANIFEST);
+ expected_stats.num_objects_manifest++;
+ return maybe_oi;
+ });
+
+ add_simple_head(0, [&expected_stats](auto maybe_oi) {
+ ceph_assert(maybe_oi);
+ maybe_oi->set_flag(object_info_t::FLAG_WHITEOUT);
+ expected_stats.num_whiteouts++;
+ return maybe_oi;
+ });
+
+ {
+ auto &so = add_simple_head(32, [](auto ret) { return ret; });
+ expected_stats.num_omap_keys += (so.object_omap_keys = 10);
+ expected_stats.num_omap_bytes += (so.object_omap_bytes = 100);
+ expected_stats.num_objects_omap++;
+ }
+
+ {
+ auto &so = add_simple_head(64, [](auto ret) { return ret; });
+ expected_stats.num_omap_keys +=
+ (so.object_omap_keys = (TEST_OMAP_KEY_LIMIT + 1));
+ expected_stats.num_omap_bytes +=
+ (so.object_omap_bytes = so.object_omap_keys);
+ expected_stats.num_objects_omap++;
+ expected_stats.num_large_omap_objects++;
+ }
+
+ {
+ auto &so = add_simple_head(64, [](auto ret) { return ret; });
+ expected_stats.num_omap_keys += (so.object_omap_keys = 1);
+ expected_stats.num_omap_bytes +=
+ (so.object_omap_bytes = (TEST_OMAP_BYTES_LIMIT + 1));
+ expected_stats.num_objects_omap++;
+ expected_stats.num_large_omap_objects++;
+ }
+
+ {
+ auto name = fmt::format("obj-{}", ++num);
+
+ std::map<snapid_t, interval_set<uint64_t>> clone_overlap;
+ test_clone_list_t clones;
+ auto add_clone = [&](std::pair<snapid_t, size_t> clone_desc,
+ interval_set<uint64_t> overlap) -> ScrubMap::object & {
+ auto hoid = so_builder_t::make_hoid(name, clone_desc.first);
+ clones.push_back(clone_desc);
+ auto [_, obj] = make_clone(name, clone_desc);
+ expected_stats.num_object_clones++;
+ expected_stats.num_objects++;
+
+ expected_stats.num_bytes += clone_desc.second - overlap.size();
+ clone_overlap[clone_desc.first] = std::move(overlap);
+
+ return objs[hoid] = obj;
+ };
+
+ auto make_is = [](uint64_t off, uint64_t len) {
+ interval_set<uint64_t> ret;
+ ret.insert(off, len);
+ return ret;
+ };
+
+ add_clone({99, 32}, {});
+ add_clone({100, 64}, make_is(31, 33));
+
+ {
+ auto hoid = so_builder_t::make_hoid(name);
+ size_t size = 64;
+ auto obj = so_builder_t::make_head(name).set_size(size).get();
+ expected_stats.num_bytes += size;
+ expected_stats.num_objects++;
+
+ SnapSet ss = make_snapset(clones);
+ ss.clone_overlap = std::move(clone_overlap);
+ so_mut_ss(obj, [ss=std::move(ss)](auto) mutable {
+ return std::move(ss);
+ });
+
+ objs[hoid] = obj;
+ }
+ }
+
+ DoutPrefix dpp(nullptr, ceph_subsys_test, "test_crimson_scrub");
+ const auto ret = crimson::osd::scrub::validate_chunk(
+ dpp, policy, maps);
+ EXPECT_EQ(ret.object_errors.size(), 0);
+ ASSERT_EQ(ret.snapset_errors.size(), 0) << fmt::format(
+ "Got snapset_errors: {}", ret.snapset_errors);
+
+ EXPECT_EQ(ret.stats, expected_stats);
+}