]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/osd/scrub: add scrub_machine and scrub_validator along with tests
authorSamuel Just <sjust@redhat.com>
Fri, 11 Aug 2023 21:03:48 +0000 (14:03 -0700)
committerSamuel Just <sjust@redhat.com>
Mon, 11 Dec 2023 04:10:18 +0000 (04:10 +0000)
scrub_validator.h/cc contain the logic from translating a set of ScrubMaps
into per-chunk stats and errors.

scrub_machine.h/cc contain the logic for the overall scrub lifecycle and
reservation management.

ScrubContext (scrub_machine.h) is the interface through which the above
logic performs reservations and scans.

Signed-off-by: Samuel Just <sjust@redhat.com>
src/crimson/CMakeLists.txt
src/crimson/osd/CMakeLists.txt
src/crimson/osd/scrub/scrub_machine.cc [new file with mode: 0644]
src/crimson/osd/scrub/scrub_machine.h [new file with mode: 0644]
src/crimson/osd/scrub/scrub_validator.cc [new file with mode: 0644]
src/crimson/osd/scrub/scrub_validator.h [new file with mode: 0644]
src/test/crimson/CMakeLists.txt
src/test/crimson/test_crimson_scrub.cc [new file with mode: 0644]

index 9e751fcebc91401481abd24b96a540e29275f50d..510ffbd9df996f0d24cb96a0d111578cedc86eb7 100644 (file)
@@ -121,6 +121,7 @@ add_library(crimson-common STATIC
   ${PROJECT_SOURCE_DIR}/src/osd/HitSet.cc
   ${PROJECT_SOURCE_DIR}/src/osd/OSDMap.cc
   ${PROJECT_SOURCE_DIR}/src/osd/PGPeeringEvent.cc
+  ${PROJECT_SOURCE_DIR}/src/common/scrub_types.cc
   ${PROJECT_SOURCE_DIR}/src/xxHash/xxhash.c
   ${crimson_common_srcs}
   $<TARGET_OBJECTS:common_mountcephfs_objs>
index 817027c18ffcdfef15ad2b7c03de4744193a06bf..e3ab3cf4d735e19b53eb35ec0ca5c0ed27065a4a 100644 (file)
@@ -33,6 +33,8 @@ add_executable(crimson-osd
   replicated_recovery_backend.cc
   scheduler/scheduler.cc
   scheduler/mclock_scheduler.cc
+  scrub/scrub_machine.cc
+  scrub/scrub_validator.cc
   osdmap_gate.cc
   pg_activation_blocker.cc
   pg_map.cc
@@ -40,6 +42,7 @@ add_executable(crimson-osd
   objclass.cc
   ${PROJECT_SOURCE_DIR}/src/objclass/class_api.cc
   ${PROJECT_SOURCE_DIR}/src/osd/ClassHandler.cc
+  ${PROJECT_SOURCE_DIR}/src/osd/ECUtil.cc
   ${PROJECT_SOURCE_DIR}/src/osd/osd_op_util.cc
   ${PROJECT_SOURCE_DIR}/src/osd/OSDCap.cc
   ${PROJECT_SOURCE_DIR}/src/osd/PeeringState.cc
diff --git a/src/crimson/osd/scrub/scrub_machine.cc b/src/crimson/osd/scrub/scrub_machine.cc
new file mode 100644 (file)
index 0000000..7d674fe
--- /dev/null
@@ -0,0 +1,76 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "include/ceph_assert.h"
+
+#include "crimson/osd/scrub/scrub_machine.h"
+
+namespace crimson::osd::scrub {
+
+WaitUpdate::WaitUpdate(my_context ctx) : ScrubState(ctx)
+{
+  auto &cs = context<ChunkState>();
+  cs.range_reserved = true;
+  assert(cs.range);
+  get_scrub_context().reserve_range(cs.range->start, cs.range->end);
+}
+
+ScanRange::ScanRange(my_context ctx) : ScrubState(ctx)
+{
+  ceph_assert(context<ChunkState>().range);
+  const auto &cs = context<ChunkState>();
+  const auto &range = cs.range.value();
+  get_scrub_context(
+  ).foreach_id_to_scrub([this, &range, &cs](const auto &id) {
+    get_scrub_context().scan_range(
+      id, cs.version,
+      context<Scrubbing>().deep,
+      range.start, range.end);
+    waiting_on++;
+  });
+}
+
+sc::result ScanRange::react(const ScrubContext::scan_range_complete_t &event)
+{
+  auto [_, inserted] = maps.insert(event.value.to_pair());
+  ceph_assert(inserted);
+  ceph_assert(waiting_on > 0);
+  --waiting_on;
+
+  if (waiting_on > 0) {
+    return discard_event();
+  } else {
+    ceph_assert(context<ChunkState>().range);
+    {
+      auto results = validate_chunk(
+       get_scrub_context().get_dpp(),
+       context<Scrubbing>().policy,
+       maps);
+      context<Scrubbing>().stats.add(results.stats);
+      get_scrub_context().emit_chunk_result(
+       *(context<ChunkState>().range),
+       std::move(results));
+    }
+    if (context<ChunkState>().range->end.is_max()) {
+      get_scrub_context().emit_scrub_result(
+       context<Scrubbing>().deep,
+       context<Scrubbing>().stats);
+      return transit<PrimaryActive>();
+    } else {
+      context<Scrubbing>().advance_current(
+       context<ChunkState>().range->end);
+      return transit<ChunkState>();
+    }
+  }
+}
+
+ReplicaScanChunk::ReplicaScanChunk(my_context ctx) : ScrubState(ctx)
+{
+  auto &to_scan = context<ReplicaChunkState>().to_scan;
+  get_scrub_context().generate_and_submit_chunk_result(
+    to_scan.start,
+    to_scan.end,
+    to_scan.deep);
+}
+
+};
diff --git a/src/crimson/osd/scrub/scrub_machine.h b/src/crimson/osd/scrub/scrub_machine.h
new file mode 100644 (file)
index 0000000..d2d127a
--- /dev/null
@@ -0,0 +1,613 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <string>
+#include <ranges>
+
+#include <boost/statechart/custom_reaction.hpp>
+#include <boost/statechart/deferral.hpp>
+#include <boost/statechart/event.hpp>
+#include <boost/statechart/event_base.hpp>
+#include <boost/statechart/in_state_reaction.hpp>
+#include <boost/statechart/simple_state.hpp>
+#include <boost/statechart/state.hpp>
+#include <boost/statechart/state_machine.hpp>
+#include <boost/statechart/transition.hpp>
+
+#include "common/fmt_common.h"
+#include "common/hobject.h"
+#include "common/hobject_fmt.h"
+#include "crimson/common/log.h"
+#include "osd/osd_types_fmt.h"
+#include "scrub_validator.h"
+
+namespace crimson::osd::scrub {
+
+/* Development Notes
+ *
+ * Notes:
+ * - We're leaving out all of the throttle waits.  We actually want to handle
+ *   that using crimson's operation throttler machinery.
+ *
+ * TODOs:
+ * - Leaving SnapMapper validation to later work
+ *   - Note, each replica should validate and repair locally as the SnapMapper
+ *     is meant to be a local index of the local object contents
+ * - Leaving preemption for later
+ * - Leaving scheduling for later, for now the only way to trigger a scrub
+ *   is via the ceph tell <pgid> [deep_]scrub command
+ */
+
+namespace sc = boost::statechart;
+
+template <typename T>
+struct simple_event_t : sc::event<T> {
+  template <typename FormatContext>
+  auto fmt_print_ctx(FormatContext & ctx) const {
+    return fmt::format_to(ctx.out(), "{}", T::event_name);
+  }
+};
+
+template <typename T, has_formatter V>
+struct value_event_t : sc::event<T> {
+  const V value;
+
+  template <typename... Args>
+  value_event_t(Args&&... args) : value(std::forward<Args>(args)...) {}
+
+  value_event_t(const value_event_t &) = default;
+  value_event_t(value_event_t &&) = default;
+  value_event_t &operator=(const value_event_t&) = default;
+  value_event_t &operator=(value_event_t&&) = default;
+
+  template <typename FormatContext>
+  auto fmt_print_ctx(FormatContext & ctx) const {
+    return fmt::format_to(ctx.out(), "{}", T::event_name);
+  }
+};
+
+
+#define SIMPLE_EVENT(T) struct T : simple_event_t<T> {                 \
+    static constexpr const char * event_name = #T;                     \
+  };
+
+#define VALUE_EVENT(T, V) struct T : value_event_t<T, V> {             \
+    static constexpr const char * event_name = #T;                     \
+                                                                       \
+    template <typename... Args>                                                \
+    T(Args&&... args) : value_event_t(                                 \
+      std::forward<Args>(args)...) {}                                  \
+  };
+
+/**
+ * ScrubContext
+ *
+ * Interface to external PG/OSD/IO machinery.
+ *
+ * Methods which may take time return immediately and define an event which
+ * will be asynchronously delivered to the state machine with the result.  This
+ * is a bit clumsy to use, but should render this component highly testable.
+ *
+ * Events sent as a completion to a ScrubContext interface method are defined
+ * within ScrubContext.  Other events are defined within ScrubMachine.
+ */
+struct ScrubContext {
+  /// return ids to scrub
+  virtual const std::set<pg_shard_t> &get_ids_to_scrub() const = 0;
+
+  /// iterates over each pg_shard_t to scrub
+  template <typename F>
+  void foreach_id_to_scrub(F &&f) {
+    for (const auto &id : get_ids_to_scrub()) {
+      std::invoke(f, id);
+    }
+  }
+
+  /// return struct defining chunk validation rules
+  virtual chunk_validation_policy_t get_policy() const = 0;
+
+  /// notifies implementation of scrub start
+  virtual void notify_scrub_start(bool deep) = 0;
+
+  /// notifies implementation of scrub end
+  virtual void notify_scrub_end(bool deep) = 0;
+
+  /// requests range to scrub starting at start
+  struct request_range_result_t {
+    hobject_t start;
+    hobject_t end;
+
+    request_range_result_t(
+      const hobject_t &start,
+      const hobject_t &end) : start(start), end(end) {}
+
+    auto fmt_print_ctx(auto &ctx) const -> decltype(ctx.out()) {
+      return fmt::format_to(ctx.out(), "start: {}, end: {}", start, end);
+    }
+  };
+  VALUE_EVENT(request_range_complete_t, request_range_result_t);
+  virtual void request_range(
+    const hobject_t &start) = 0;
+
+  /// reserves range [start, end)
+  VALUE_EVENT(reserve_range_complete_t, eversion_t);
+  virtual void reserve_range(
+    const hobject_t &start,
+    const hobject_t &end) = 0;
+
+  /// waits until implementation has committed up to version
+  SIMPLE_EVENT(await_update_complete_t);
+  virtual bool await_update(
+    const eversion_t &version) = 0;
+
+  /// cancel in progress or currently reserved range
+  virtual void release_range() = 0;
+
+  /// scans [begin, end) on target as of version
+  struct scan_range_value_t {
+    pg_shard_t from;
+    ScrubMap map;
+
+    template <typename Map>
+    scan_range_value_t(
+      pg_shard_t from,
+      Map &&map) : from(from), map(std::forward<Map>(map)) {}
+
+    auto to_pair() const { return std::make_pair(from, map); }
+    auto fmt_print_ctx(auto &ctx) const -> decltype(ctx.out()) {
+      return fmt::format_to(ctx.out(), "from: {}", from);
+    }
+  };
+  VALUE_EVENT(scan_range_complete_t, scan_range_value_t);
+  virtual void scan_range(
+    pg_shard_t target,
+    eversion_t version,
+    bool deep,
+    const hobject_t &start,
+    const hobject_t &end) = 0;
+
+  /// instructs implmentatino to scan [begin, end) and emit result to primary
+  SIMPLE_EVENT(generate_and_submit_chunk_result_complete_t);
+  virtual void generate_and_submit_chunk_result(
+    const hobject_t &begin,
+    const hobject_t &end,
+    bool deep) = 0;
+
+  /// notifies implementation of chunk scrub results
+  virtual void emit_chunk_result(
+    const request_range_result_t &range,
+    chunk_result_t &&result) = 0;
+
+  /// notifies implementation of full scrub results
+  virtual void emit_scrub_result(
+    bool deep,
+    object_stat_sum_t scrub_stats) = 0;
+
+  /// get dpp instance for logging
+  virtual DoutPrefixProvider &get_dpp() = 0;
+};
+
+struct Crash;
+struct Inactive;
+
+namespace events {
+/// reset ScrubMachine
+SIMPLE_EVENT(reset_t);
+
+/// start (deep) scrub
+struct start_scrub_event_t {
+  bool deep = false;
+
+  start_scrub_event_t(bool deep) : deep(deep) {}
+
+  auto fmt_print_ctx(auto &ctx) const -> decltype(ctx.out()) {
+    return fmt::format_to(ctx.out(), "deep: {}", deep);
+  }
+};
+VALUE_EVENT(start_scrub_t, start_scrub_event_t);
+
+/// notifies ScrubMachine about a write on oid resulting in delta_stats
+struct op_stat_event_t {
+  hobject_t oid;
+  object_stat_sum_t delta_stats;
+
+  op_stat_event_t(
+    hobject_t oid,
+    object_stat_sum_t delta_stats) : oid(oid), delta_stats(delta_stats) {}
+
+  auto fmt_print_ctx(auto &ctx) const -> decltype(ctx.out()) {
+    return fmt::format_to(ctx.out(), "oid: {}", oid);
+  }
+};
+VALUE_EVENT(op_stats_t, op_stat_event_t);
+
+/// Prepares statemachine for primary events
+SIMPLE_EVENT(primary_activate_t);
+
+/// Prepares statemachine for replica events
+SIMPLE_EVENT(replica_activate_t);
+
+/// Instructs replica to (deep) scrub [start, end) as of version version
+struct replica_scan_event_t {
+  hobject_t start;
+  hobject_t end;
+  eversion_t version;
+  bool deep = false;
+
+  replica_scan_event_t() = default;
+
+  replica_scan_event_t(
+    hobject_t start,
+    hobject_t end,
+    eversion_t version,
+    bool deep) : start(start), end(end), version(version), deep(deep) {}
+
+  auto fmt_print_ctx(auto &ctx) const -> decltype(ctx.out()) {
+    return fmt::format_to(
+      ctx.out(), "start: {}, end: {}, version: {}, deep: {}",
+      start, end, version, deep);
+  }
+};
+VALUE_EVENT(replica_scan_t, replica_scan_event_t);
+
+}
+
+
+/**
+ * ScrubMachine
+ *
+ * Manages orchestration of rados's distributed scrub process.
+ *
+ * There are two general ways in which ScrubMachine may need to release
+ * resources:
+ * - interval_change_t -- represents case where PG as a whole undergoes
+ *   a distributed mapping change.  Distributed resources are released
+ *   implicitly as remote PG instances receive the new map.  Local
+ *   resources are still released by ScrubMachine via ScrubContext methods
+ *   generally via state destructors
+ * - otherwise, ScrubMachine is responsible for notifying remote PG
+ *   instances via the appropriate ScrubContext methods again generally
+ *   from state destructors.
+ *
+ * TODO: interval_change_t will be added with remote reservations.
+ */
+class ScrubMachine
+  : public sc::state_machine<ScrubMachine, Inactive> {
+public:
+  static constexpr std::string_view full_name = "ScrubMachine";
+
+  ScrubContext &context;
+  ScrubMachine(ScrubContext &context) : context(context) {}
+};
+
+/**
+ * ScrubState
+ *
+ * Template defining machinery/state common to all scrub state machine
+ * states.
+ */
+template <typename S, typename P, typename... T>
+struct ScrubState : sc::state<S, P, T...> {
+  using sc_base = sc::state<S, P, T...>;
+  DoutPrefixProvider &dpp;
+
+  /* machinery for populating a full_name member for each ScrubState with
+   * ScrubMachine/.../ParentState/ChildState full_name */
+  template <std::string_view const &PN, typename PI,
+           std::string_view const &CN, typename CI>
+  struct concat;
+
+  template <std::string_view const &PN, std::size_t... PI,
+           std::string_view const &CN, std::size_t... CI>
+  struct concat<PN, std::index_sequence<PI...>, CN, std::index_sequence<CI...>> {
+    static constexpr size_t value_size = PN.size() + CN.size() + 1;
+    static constexpr const char value[value_size]{PN[PI]..., '/', CN[CI]...};
+  };
+
+  template <std::string_view const &PN, std::string_view const &CN>
+  struct join {
+    using conc = concat<
+      PN, std::make_index_sequence<PN.size()>,
+      CN, std::make_index_sequence<CN.size()>>;
+    static constexpr std::string_view value{
+      conc::value,
+      conc::value_size
+    };
+  };
+
+  /// Populated with ScrubMachine/.../Parent/Child for each state Child
+  static constexpr std::string_view full_name =
+    join<P::full_name, S::state_name>::value;
+
+  template <typename C>
+  explicit ScrubState(C ctx) : sc_base(ctx), dpp(get_scrub_context().get_dpp()) {
+    LOG_PREFIX(ScrubState::ScrubState);
+    SUBDEBUGDPP(osd, "entering state {}", dpp, full_name);
+  }
+
+  ~ScrubState() {
+    LOG_PREFIX(ScrubState::~ScrubState);
+    SUBDEBUGDPP(osd, "exiting state {}", dpp, full_name);
+  }
+
+  auto &get_scrub_context() {
+    return sc_base::template context<ScrubMachine>().context;
+  }
+};
+
+struct Crash : ScrubState<Crash, ScrubMachine> {
+  static constexpr std::string_view state_name = "Crash";
+  explicit Crash(my_context ctx) : ScrubState(ctx) {
+    ceph_abort("Crash state impossible");
+  }
+
+};
+
+struct PrimaryActive;
+struct ReplicaActive;
+struct Inactive : ScrubState<Inactive, ScrubMachine> {
+  static constexpr std::string_view state_name = "Inactive";
+  explicit Inactive(my_context ctx) : ScrubState(ctx) {}
+
+  using reactions = boost::mpl::list<
+    sc::transition<events::primary_activate_t, PrimaryActive>,
+    sc::transition<events::replica_activate_t, ReplicaActive>,
+    sc::custom_reaction<events::reset_t>,
+    sc::custom_reaction<events::start_scrub_t>,
+    sc::custom_reaction<events::op_stats_t>,
+    sc::transition< boost::statechart::event_base, Crash >
+    >;
+
+  sc::result react(const events::reset_t &) {
+    return discard_event();
+  }
+  sc::result react(const events::start_scrub_t &) {
+    return discard_event();
+  }
+  sc::result react(const events::op_stats_t &) {
+    return discard_event();
+  }
+};
+
+struct AwaitScrub;
+struct PrimaryActive : ScrubState<PrimaryActive, ScrubMachine, AwaitScrub> {
+  static constexpr std::string_view state_name = "PrimaryActive";
+  explicit PrimaryActive(my_context ctx) : ScrubState(ctx) {}
+
+  bool local_reservation_held = false;
+  std::set<pg_shard_t> remote_reservations_held;
+
+  using reactions = boost::mpl::list<
+    sc::transition<events::reset_t, Inactive>,
+    sc::custom_reaction<events::start_scrub_t>,
+    sc::custom_reaction<events::op_stats_t>,
+    sc::transition< boost::statechart::event_base, Crash >
+    >;
+
+  sc::result react(const events::start_scrub_t &event) {
+    return discard_event();
+  }
+
+  sc::result react(const events::op_stats_t &) {
+    return discard_event();
+  }
+};
+
+namespace internal_events {
+VALUE_EVENT(set_deep_t, bool);
+}
+
+struct Scrubbing;
+struct AwaitScrub : ScrubState<AwaitScrub, PrimaryActive> {
+  static constexpr std::string_view state_name = "AwaitScrub";
+  explicit AwaitScrub(my_context ctx) : ScrubState(ctx) {}
+
+  using reactions = boost::mpl::list<
+    sc::custom_reaction<events::start_scrub_t>
+    >;
+
+  sc::result react(const events::start_scrub_t &event) {
+    post_event(internal_events::set_deep_t{event.value.deep});
+    return transit<Scrubbing>();
+  }
+};
+
+struct ChunkState;
+struct Scrubbing : ScrubState<Scrubbing, PrimaryActive, ChunkState> {
+  static constexpr std::string_view state_name = "Scrubbing";
+  explicit Scrubbing(my_context ctx)
+    : ScrubState(ctx), policy(get_scrub_context().get_policy()) {}
+
+
+  using reactions = boost::mpl::list<
+    sc::custom_reaction<internal_events::set_deep_t>,
+    sc::custom_reaction<events::op_stats_t>
+    >;
+
+  chunk_validation_policy_t policy;
+
+  /// hobjects < current have been scrubbed
+  hobject_t current;
+
+  /// true for deep scrub
+  bool deep = false;
+
+  /// stats for objects < current, maintained via events::op_stats_t
+  object_stat_sum_t stats;
+
+  void advance_current(const hobject_t &next) {
+    current = next;
+  }
+
+  sc::result react(const internal_events::set_deep_t &event) {
+    deep = event.value;
+    get_scrub_context().notify_scrub_start(deep);
+    return discard_event();
+  }
+
+  void exit() {
+    get_scrub_context().notify_scrub_end(deep);
+  }
+
+  sc::result react(const events::op_stats_t &event) {
+    if (event.value.oid < current) {
+      stats.add(event.value.delta_stats);
+    }
+    return discard_event();
+  }
+};
+
+struct GetRange;
+struct ChunkState : ScrubState<ChunkState, Scrubbing, GetRange> {
+  static constexpr std::string_view state_name = "ChunkState";
+  explicit ChunkState(my_context ctx) : ScrubState(ctx) {}
+
+  /// Current chunk includes objects in [range_start, range_end)
+  boost::optional<ScrubContext::request_range_result_t> range;
+
+  /// true once we have requested that the range be reserved
+  bool range_reserved = false;
+
+  /// version of last update for the reserved chunk
+  eversion_t version;
+
+  void exit() {
+    if (range_reserved) {
+      get_scrub_context().release_range();
+    }
+  }
+};
+
+struct WaitUpdate;
+struct GetRange : ScrubState<GetRange, ChunkState> {
+  static constexpr std::string_view state_name = "GetRange";
+  explicit GetRange(my_context ctx) : ScrubState(ctx) {
+    get_scrub_context().request_range(context<Scrubbing>().current);
+  }
+
+  using reactions = boost::mpl::list<
+    sc::custom_reaction<ScrubContext::request_range_complete_t>
+    >;
+
+  sc::result react(const ScrubContext::request_range_complete_t &event) {
+    context<ChunkState>().range = event.value;
+    return transit<WaitUpdate>();
+  }
+};
+
+struct ScanRange;
+struct WaitUpdate : ScrubState<WaitUpdate, ChunkState> {
+  static constexpr std::string_view state_name = "WaitUpdate";
+  explicit WaitUpdate(my_context ctx);
+
+  using reactions = boost::mpl::list<
+    sc::custom_reaction<ScrubContext::reserve_range_complete_t>
+    >;
+
+  sc::result react(const ScrubContext::reserve_range_complete_t &e) {
+    context<ChunkState>().version = e.value;
+    return transit<ScanRange>();
+  }
+};
+
+struct ScanRange : ScrubState<ScanRange, ChunkState> {
+  static constexpr std::string_view state_name = "ScanRange";
+  explicit ScanRange(my_context ctx);
+
+  scrub_map_set_t maps;
+  unsigned waiting_on = 0;
+
+  using reactions = boost::mpl::list<
+    sc::custom_reaction<ScrubContext::scan_range_complete_t>
+    >;
+
+  sc::result react(const ScrubContext::scan_range_complete_t &);
+};
+
+struct ReplicaIdle;
+struct ReplicaActive :
+    ScrubState<ReplicaActive, ScrubMachine, ReplicaIdle> {
+  static constexpr std::string_view state_name = "ReplicaActive";
+  explicit ReplicaActive(my_context ctx) : ScrubState(ctx) {}
+
+  using reactions = boost::mpl::list<
+    sc::transition<events::reset_t, Inactive>,
+    sc::custom_reaction<events::start_scrub_t>,
+    sc::custom_reaction<events::op_stats_t>,
+    sc::transition< boost::statechart::event_base, Crash >
+    >;
+
+  sc::result react(const events::start_scrub_t &) {
+    return discard_event();
+  }
+
+  sc::result react(const events::op_stats_t &) {
+    return discard_event();
+  }
+};
+
+struct ReplicaChunkState;
+struct ReplicaIdle : ScrubState<ReplicaIdle, ReplicaActive> {
+  static constexpr std::string_view state_name = "ReplicaIdle";
+  explicit ReplicaIdle(my_context ctx) : ScrubState(ctx) {}
+
+  using reactions = boost::mpl::list<
+    sc::custom_reaction<events::replica_scan_t>
+    >;
+
+  sc::result react(const events::replica_scan_t &event) {
+    LOG_PREFIX(ScrubState::ReplicaIdle::react(events::replica_scan_t));
+    SUBDEBUGDPP(osd, "event.value: {}", get_scrub_context().get_dpp(), event.value);
+    post_event(event);
+    return transit<ReplicaChunkState>();
+  }
+};
+
+struct ReplicaWaitUpdate;
+struct ReplicaChunkState : ScrubState<ReplicaChunkState, ReplicaActive, ReplicaWaitUpdate> {
+  static constexpr std::string_view state_name = "ReplicaChunkState";
+  explicit ReplicaChunkState(my_context ctx) : ScrubState(ctx) {}
+
+  using reactions = boost::mpl::list<
+    sc::custom_reaction<events::replica_scan_t>
+    >;
+
+  events::replica_scan_event_t to_scan;
+
+  sc::result react(const events::replica_scan_t &event) {
+    LOG_PREFIX(ScrubState::ReplicaWaitUpdate::react(events::replica_scan_t));
+    SUBDEBUGDPP(osd, "event.value: {}", get_scrub_context().get_dpp(), event.value);
+    to_scan = event.value;
+    if (get_scrub_context().await_update(event.value.version)) {
+      post_event(ScrubContext::await_update_complete_t{});
+    }
+    return discard_event();
+  }
+};
+
+struct ReplicaScanChunk;
+struct ReplicaWaitUpdate : ScrubState<ReplicaWaitUpdate, ReplicaChunkState> {
+  static constexpr std::string_view state_name = "ReplicaWaitUpdate";
+  explicit ReplicaWaitUpdate(my_context ctx) : ScrubState(ctx) {}
+
+  using reactions = boost::mpl::list<
+    sc::transition<ScrubContext::await_update_complete_t, ReplicaScanChunk>
+    >;
+};
+
+struct ReplicaScanChunk : ScrubState<ReplicaScanChunk, ReplicaChunkState> {
+  static constexpr std::string_view state_name = "ReplicaScanChunk";
+  explicit ReplicaScanChunk(my_context ctx);
+
+  using reactions = boost::mpl::list<
+    sc::transition<ScrubContext::generate_and_submit_chunk_result_complete_t,
+                  ReplicaIdle>
+    >;
+};
+
+#undef SIMPLE_EVENT
+#undef VALUE_EVENT
+
+}
diff --git a/src/crimson/osd/scrub/scrub_validator.cc b/src/crimson/osd/scrub/scrub_validator.cc
new file mode 100644 (file)
index 0000000..b7dcc46
--- /dev/null
@@ -0,0 +1,502 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <ranges>
+
+#include "osd/osd_types_fmt.h"
+
+#include "crimson/common/log.h"
+#include "crimson/osd/scrub/scrub_validator.h"
+#include "osd/ECUtil.h"
+
+SET_SUBSYS(osd);
+
+namespace crimson::osd::scrub {
+
+using object_set_t = std::set<hobject_t>;
+object_set_t get_object_set(const scrub_map_set_t &in)
+{
+  object_set_t ret;
+  for (const auto& [from, map] : in) {
+    std::transform(map.objects.begin(), map.objects.end(),
+                   std::inserter(ret, ret.end()),
+                   [](const auto& i) { return i.first; });
+  }
+  return ret;
+}
+
+struct shard_evaluation_t {
+  pg_shard_t source;
+  shard_info_wrapper shard_info;
+
+  std::optional<object_info_t> object_info;
+  std::optional<SnapSet> snapset;
+  std::optional<ECUtil::HashInfo> hinfo;
+
+  size_t omap_keys{0};
+  size_t omap_bytes{0};
+
+  bool has_errors() const {
+    return shard_info.has_errors();
+  }
+
+  bool is_primary() const {
+    return shard_info.primary;
+  }
+
+  std::weak_ordering operator<=>(const shard_evaluation_t &rhs) const {
+    return std::make_tuple(!has_errors(), is_primary()) <=>
+      std::make_tuple(!rhs.has_errors(), rhs.is_primary());
+  }
+};
+shard_evaluation_t evaluate_object_shard(
+  const chunk_validation_policy_t &policy,
+  const hobject_t &oid,
+  pg_shard_t from,
+  const ScrubMap::object *maybe_obj)
+{
+  shard_evaluation_t ret;
+  ret.source = from;
+  if (from == policy.primary) {
+    ret.shard_info.primary = true;
+  }
+  if (!maybe_obj || maybe_obj->negative) {
+    // impossible since chunky scrub was introduced
+    ceph_assert(!maybe_obj->negative);
+    ret.shard_info.set_missing();
+    return ret;
+  }
+
+  auto &obj = *maybe_obj;
+  /* We are ignoring ScrubMap::object::large_omap_object*, object_omap_* is all the
+   * info we need */
+  ret.omap_keys = obj.object_omap_keys;
+  ret.omap_bytes = obj.object_omap_bytes;
+
+  ret.shard_info.set_object(obj);
+
+  if (obj.ec_hash_mismatch) {
+    ret.shard_info.set_ec_hash_mismatch();
+  }
+
+  if (obj.ec_size_mismatch) {
+    ret.shard_info.set_ec_size_mismatch();
+  }
+
+  if (obj.read_error) {
+    ret.shard_info.set_read_error();
+  }
+
+  if (obj.stat_error) {
+    ret.shard_info.set_stat_error();
+  }
+
+  {
+    auto xiter = obj.attrs.find(OI_ATTR);
+    if (xiter == obj.attrs.end()) {
+      ret.shard_info.set_info_missing();
+    } else {
+      bufferlist bl;
+      bl.push_back(xiter->second);
+      ret.object_info = object_info_t{};
+      try {
+       auto bliter = bl.cbegin();
+       ::decode(*(ret.object_info), bliter);
+      } catch (...) {
+       ret.shard_info.set_info_corrupted();
+       ret.object_info = std::nullopt;
+      }
+    }
+  }
+
+  ret.shard_info.size = obj.size;
+  if (ret.object_info &&
+      obj.size != policy.logical_to_ondisk_size(ret.object_info->size)) {
+    ret.shard_info.set_size_mismatch_info();
+  }
+
+  if (oid.is_head()) {
+    auto xiter = obj.attrs.find(SS_ATTR);
+    if (xiter == obj.attrs.end()) {
+      ret.shard_info.set_snapset_missing();
+    } else {
+      bufferlist bl;
+      bl.push_back(xiter->second);
+      ret.snapset = SnapSet{};
+      try {
+       auto bliter = bl.cbegin();
+       ::decode(*(ret.snapset), bliter);
+      } catch (...) {
+       ret.shard_info.set_snapset_corrupted();
+       ret.snapset = std::nullopt;
+      }
+    }
+  }
+
+  if (policy.is_ec()) {
+    auto xiter = obj.attrs.find(ECUtil::get_hinfo_key());
+    if (xiter == obj.attrs.end()) {
+      ret.shard_info.set_hinfo_missing();
+    } else {
+      bufferlist bl;
+      bl.push_back(xiter->second);
+      ret.hinfo = ECUtil::HashInfo{};
+      try {
+       auto bliter = bl.cbegin();
+       decode(*(ret.hinfo), bliter);
+      } catch (...) {
+       ret.shard_info.set_hinfo_corrupted();
+       ret.hinfo = std::nullopt;
+      }
+    }
+  }
+
+  if (ret.object_info) {
+    if (ret.shard_info.data_digest_present &&
+       ret.object_info->is_data_digest() &&
+       (ret.object_info->data_digest != ret.shard_info.data_digest)) {
+      ret.shard_info.set_data_digest_mismatch_info();
+    }
+    if (ret.shard_info.omap_digest_present &&
+       ret.object_info->is_omap_digest() &&
+       (ret.object_info->omap_digest != ret.shard_info.omap_digest)) {
+      ret.shard_info.set_omap_digest_mismatch_info();
+    }
+  }
+
+  return ret;
+}
+
+librados::obj_err_t compare_candidate_to_authoritative(
+  const chunk_validation_policy_t &policy,
+  const hobject_t &oid,
+  const shard_evaluation_t &auth,
+  const shard_evaluation_t &cand)
+{
+  using namespace librados;
+  obj_err_t ret;
+
+  if (cand.shard_info.has_shard_missing()) {
+    return ret;
+  }
+
+  const auto &auth_si = auth.shard_info;
+  const auto &cand_si = cand.shard_info;
+
+  if (auth_si.data_digest != cand_si.data_digest) {
+    ret.errors |= obj_err_t::DATA_DIGEST_MISMATCH;
+  }
+
+  if (auth_si.omap_digest != cand_si.omap_digest) {
+    ret.errors |= obj_err_t::OMAP_DIGEST_MISMATCH;
+  }
+
+  {
+    auto aiter = auth_si.attrs.find(OI_ATTR);
+    ceph_assert(aiter != auth_si.attrs.end());
+
+    auto citer = cand_si.attrs.find(OI_ATTR);
+    if (citer == cand_si.attrs.end() ||
+       !aiter->second.contents_equal(citer->second)) {
+      ret.errors |= obj_err_t::OBJECT_INFO_INCONSISTENCY;
+    }
+  }
+
+  if (oid.is_head()) {
+    auto aiter = auth_si.attrs.find(SS_ATTR);
+    ceph_assert(aiter != auth_si.attrs.end());
+
+    auto citer = cand_si.attrs.find(SS_ATTR);
+    if (citer == cand_si.attrs.end() ||
+       !aiter->second.contents_equal(citer->second)) {
+      ret.errors |= obj_err_t::SNAPSET_INCONSISTENCY;
+    }
+  }
+
+  if (policy.is_ec()) {
+    auto aiter = auth_si.attrs.find(ECUtil::get_hinfo_key());
+    ceph_assert(aiter != auth_si.attrs.end());
+
+    auto citer = cand_si.attrs.find(ECUtil::get_hinfo_key());
+    if (citer == cand_si.attrs.end() ||
+       !aiter->second.contents_equal(citer->second)) {
+      ret.errors |= obj_err_t::HINFO_INCONSISTENCY;
+    }
+  }
+
+  if (auth_si.size != cand_si.size) {
+    ret.errors |= obj_err_t::SIZE_MISMATCH;
+  }
+
+  auto is_sys_attr = [&policy](const auto &str) {
+    return str == OI_ATTR || str == SS_ATTR ||
+      (policy.is_ec() && str == ECUtil::get_hinfo_key());
+  };
+  for (auto aiter = auth_si.attrs.begin(); aiter != auth_si.attrs.end(); ++aiter) {
+    if (is_sys_attr(aiter->first)) continue;
+
+    auto citer = cand_si.attrs.find(aiter->first);
+    if (citer == cand_si.attrs.end()) {
+      ret.errors |= obj_err_t::ATTR_NAME_MISMATCH;
+    } else if (!aiter->second.contents_equal(citer->second)) {
+      ret.errors |= obj_err_t::ATTR_VALUE_MISMATCH;
+    }
+  }
+  if (std::any_of(
+       cand_si.attrs.begin(), cand_si.attrs.end(),
+       [&is_sys_attr, &auth_si](auto &p) {
+         return !is_sys_attr(p.first) &&
+           auth_si.attrs.find(p.first) == auth_si.attrs.end();
+       })) {
+    ret.errors |= obj_err_t::ATTR_NAME_MISMATCH;
+  }
+
+  return ret;
+}
+
+struct object_evaluation_t {
+  std::optional<inconsistent_obj_wrapper> inconsistency;
+  std::optional<object_info_t> object_info;
+  std::optional<SnapSet> snapset;
+
+  size_t omap_keys{0};
+  size_t omap_bytes{0};
+};
+object_evaluation_t evaluate_object(
+  const chunk_validation_policy_t &policy,
+  const hobject_t &hoid,
+  const scrub_map_set_t &maps)
+{
+  ceph_assert(maps.size() > 0);
+  using evaluation_vec_t = std::vector<shard_evaluation_t>;
+  evaluation_vec_t shards;
+  std::transform(
+    maps.begin(), maps.end(),
+    std::inserter(shards, shards.end()),
+    [&hoid, &policy](const auto &item) -> evaluation_vec_t::value_type {
+      const auto &[shard, scrub_map] = item;
+      auto miter = scrub_map.objects.find(hoid);
+      auto maybe_shard = miter == scrub_map.objects.end() ?
+       nullptr : &(miter->second);
+      return evaluate_object_shard(policy, hoid, shard, maybe_shard);
+    });
+
+  std::sort(shards.begin(), shards.end());
+
+  auto &auth_eval = shards.back();
+
+  object_evaluation_t ret;
+  inconsistent_obj_wrapper iow{hoid};
+  if (!auth_eval.has_errors()) {
+    ret.object_info = auth_eval.object_info;
+    ret.omap_keys = auth_eval.omap_keys;
+    ret.omap_bytes = auth_eval.omap_bytes;
+    ret.snapset = auth_eval.snapset;
+    if (auth_eval.object_info->size > policy.max_object_size) {
+      iow.set_size_too_large();
+    }
+    auth_eval.shard_info.selected_oi = true;
+    std::for_each(
+      shards.begin(), shards.end() - 1,
+      [&policy, &hoid, &auth_eval, &iow](auto &cand_eval) {
+       auto err = compare_candidate_to_authoritative(
+         policy, hoid, auth_eval, cand_eval);
+       iow.merge(err);
+      });
+  }
+
+  if (iow.errors ||
+      std::any_of(shards.begin(), shards.end(),
+                 [](auto &cand) { return cand.has_errors(); })) {
+    for (auto &eval : shards) {
+      iow.shards.emplace(
+       librados::osd_shard_t{eval.source.osd, eval.source.shard},
+       eval.shard_info);
+      iow.union_shards.errors |= eval.shard_info.errors;
+    }
+    if (auth_eval.object_info) {
+      iow.version = auth_eval.object_info->version.version;
+    }
+    ret.inconsistency = iow;
+  }
+  return ret;
+}
+
+using clone_meta_list_t = std::list<std::pair<hobject_t, object_info_t>>;
+std::optional<inconsistent_snapset_wrapper> evaluate_snapset(
+  DoutPrefixProvider &dpp,
+  const hobject_t &hoid,
+  const std::optional<SnapSet> &maybe_snapset,
+  const clone_meta_list_t &clones)
+{
+  LOG_PREFIX(evaluate_snapset);
+  /* inconsistent_snapset_t has several error codes that seem to pertain to
+   * specific objects rather than to the snapset specifically.  I'm choosing
+   * to ignore those for now */
+  inconsistent_snapset_wrapper ret{hoid};
+  if (!maybe_snapset) {
+    ret.set_headless();
+    return ret;
+  }
+  const auto &snapset = *maybe_snapset;
+
+  auto clone_iter = clones.begin();
+  for (auto ss_clone_id : snapset.clones) {
+    for (; clone_iter != clones.end() &&
+          clone_iter->first.snap < ss_clone_id;
+        ++clone_iter) {
+      ret.set_clone(clone_iter->first.snap);
+    }
+
+    if (clone_iter != clones.end() &&
+       clone_iter->first.snap == ss_clone_id) {
+      auto ss_clone_size_iter = snapset.clone_size.find(ss_clone_id);
+      if (ss_clone_size_iter == snapset.clone_size.end() ||
+         ss_clone_size_iter->second != clone_iter->second.size) {
+       ret.set_size_mismatch();
+      }
+      ++clone_iter;
+    } else {
+      ret.set_clone_missing(ss_clone_id);
+    }
+  }
+
+  for (; clone_iter != clones.end(); ++clone_iter) {
+    ret.set_clone(clone_iter->first.snap);
+  }
+
+  if (ret.errors) {
+    DEBUGDPP(
+      "snapset {}, clones {}",
+      dpp, snapset, clones);
+    return ret;
+  } else {
+    return std::nullopt;
+  }
+}
+
+void add_object_to_stats(
+  const chunk_validation_policy_t &policy,
+  const object_evaluation_t &eval,
+  object_stat_sum_t *out)
+{
+  auto &ss = eval.snapset;
+  if (!eval.object_info) {
+    return;
+  }
+  auto &oi = *eval.object_info;
+  ceph_assert(out);
+  out->num_objects++;
+  if (ss) {
+    out->num_bytes += oi.size;
+    for (auto clone : ss->clones) {
+      out->num_bytes += ss->get_clone_bytes(clone);
+      out->num_object_clones++;
+    }
+    if (oi.is_whiteout()) {
+      out->num_whiteouts++;
+    }
+  }
+  if (oi.is_dirty()) {
+    out->num_objects_dirty++;
+  }
+  if (oi.is_cache_pinned()) {
+    out->num_objects_pinned++;
+  }
+  if (oi.has_manifest()) {
+    out->num_objects_manifest++;
+  }
+
+  if (eval.omap_keys > 0) {
+    out->num_objects_omap++;
+  }
+  out->num_omap_keys += eval.omap_keys;
+  out->num_omap_bytes += eval.omap_bytes;
+
+  if (oi.soid.nspace == policy.hitset_namespace) {
+    out->num_objects_hit_set_archive++;
+    out->num_bytes_hit_set_archive += oi.size;
+  }
+
+  if (eval.omap_keys > policy.omap_key_limit ||
+      eval.omap_bytes > policy.omap_bytes_limit) {
+    out->num_large_omap_objects++;
+  }
+}
+
+chunk_result_t validate_chunk(
+  DoutPrefixProvider &dpp,
+  const chunk_validation_policy_t &policy, const scrub_map_set_t &in)
+{
+  chunk_result_t ret;
+
+  const std::set<hobject_t> object_set = get_object_set(in);
+
+  std::list<std::pair<hobject_t, SnapSet>> heads;
+  clone_meta_list_t clones;
+  for (const auto &oid: object_set) {
+    object_evaluation_t eval = evaluate_object(policy, oid, in);
+    add_object_to_stats(policy, eval, &ret.stats);
+    if (eval.inconsistency) {
+      ret.object_errors.push_back(*eval.inconsistency);
+    }
+    if (oid.is_head()) {
+      /* We're only going to consider the head object as "existing" if
+       * evaluate_object was able to find a sensible, authoritative copy
+       * complete with snapset */
+      if (eval.snapset) {
+       heads.emplace_back(oid, *eval.snapset);
+      }
+    } else {
+      /* We're only going to consider the clone object as "existing" if
+       * evaluate_object was able to find a sensible, authoritative copy
+       * complete with an object_info */
+      if (eval.object_info) {
+       clones.emplace_back(oid, *eval.object_info);
+      }
+    }
+  }
+
+  const hobject_t max_oid = hobject_t::get_max();
+  while (heads.size() || clones.size()) {
+    const hobject_t &next_head = heads.size() ? heads.front().first : max_oid;
+    const hobject_t &next_clone = clones.size() ? clones.front().first : max_oid;
+    hobject_t head_to_process = std::min(next_head, next_clone).get_head();
+
+    clone_meta_list_t clones_to_process;
+    auto clone_iter = clones.begin();
+    while (clone_iter != clones.end() && clone_iter->first < head_to_process)
+      ++clone_iter;
+    clones_to_process.splice(
+      clones_to_process.end(), clones, clones.begin(), clone_iter);
+
+    const auto head_meta = [&]() -> std::optional<SnapSet> {
+      if (head_to_process == next_head) {
+       auto ret = std::move(heads.front().second);
+       heads.pop_front();
+       return ret;
+      } else {
+       return std::nullopt;
+      }
+    }();
+
+    if (auto result = evaluate_snapset(
+         dpp, head_to_process, head_meta, clones_to_process); result) {
+      ret.snapset_errors.push_back(*result);
+    }
+  }
+
+  for (const auto &i: ret.object_errors) {
+    ret.stats.num_shallow_scrub_errors +=
+      (i.has_shallow_errors() || i.union_shards.has_shallow_errors());
+    ret.stats.num_deep_scrub_errors +=
+      (i.has_deep_errors() || i.union_shards.has_deep_errors());
+  }
+  ret.stats.num_shallow_scrub_errors += ret.snapset_errors.size();
+  ret.stats.num_scrub_errors = ret.stats.num_shallow_scrub_errors +
+    ret.stats.num_deep_scrub_errors;
+
+  return ret;
+}
+
+}
diff --git a/src/crimson/osd/scrub/scrub_validator.h b/src/crimson/osd/scrub/scrub_validator.h
new file mode 100644 (file)
index 0000000..32f5933
--- /dev/null
@@ -0,0 +1,180 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <string>
+#include <map>
+
+#include "common/config_proxy.h"
+#include "common/scrub_types.h"
+#include "crimson/common/log.h"
+#include "osd/ECUtil.h"
+#include "osd/osd_types.h"
+
+namespace crimson::osd::scrub {
+
+struct chunk_validation_policy_t {
+  pg_shard_t primary;
+  std::optional<ECUtil::stripe_info_t> stripe_info;
+
+  // osd_max_object_size
+  size_t max_object_size;
+
+  // osd_hit_set_namespace
+  std::string hitset_namespace;
+
+  // osd_deep_scrub_large_omap_object_key_threshold
+  // osd_deep_scrub_large_omap_object_value_sum_threshold
+  uint64_t omap_key_limit;
+  size_t omap_bytes_limit;
+
+
+  bool is_ec() const {
+    return !!stripe_info;
+  }
+
+  size_t logical_to_ondisk_size(size_t size) const {
+    return stripe_info ? stripe_info->logical_to_next_chunk_offset(size) : size;
+  }
+};
+
+using scrub_map_set_t = std::map<pg_shard_t, ScrubMap>;
+
+struct chunk_result_t {
+  /* Scrub interacts with stats in two ways:
+   * 1. scrub accumulates a subset of object_stat_sum_t members to
+   *    to ultimately compare to the object_stat_sum_t value maintained
+   *    by the OSD. These members will be referred to as
+   *    *scrub_checked_stats*.
+   *    See iterate_scrub_checked_stats() for the relevant members.
+   * 2. scrub also updates some members that can't be maintained online
+   *    (like num_omap_*, num_large_omap_objects) or that pertain
+   *    specifically to scrub (like num_shallow_scrub_errors).
+   *    Let these by referred to as *scrub_maintained_stats*.
+   *    See iterate_scrub_maintained_stats() for the relevant members.
+   *
+   * The following stats member contains both, but the two sets are
+   * disjoint and treated seperately.
+   */
+  object_stat_sum_t stats;
+
+  // detected errors
+  std::vector<inconsistent_snapset_wrapper> snapset_errors;
+  std::vector<inconsistent_obj_wrapper> object_errors;
+
+  bool has_errors() const {
+    return !snapset_errors.empty() || !object_errors.empty();
+  }
+};
+
+/**
+ * validate_chunk
+ *
+ * Compares shard chunks and based on policy and returns a chunk_result_t
+ * containing the results.  See chunk_result_t for details.
+ */
+chunk_result_t validate_chunk(
+  DoutPrefixProvider &dpp,
+  const chunk_validation_policy_t &policy, const scrub_map_set_t &in);
+
+/**
+ * iterate_scrub_checked_stats
+ *
+ * For each scrub_checked_stat member of object_stat_sum_t, invokes
+ * op with three arguments:
+ * - name of member (string_view)
+ * - pointer to member (T object_stat_sum_t::*)
+ * - function to corresponding pg_stat_t invalid member
+ *   (bool func(const pg_stat_t &))
+ *
+ * Should be used to perform operations on all scrub_checked_stat members
+ * such as checking the accumlated scrub stats against the maintained
+ * pg stats.
+ */
+template <typename Func>
+void foreach_scrub_checked_stat(Func &&op) {
+  using namespace std::string_view_literals;
+  op("num_objects"sv,
+     &object_stat_sum_t::num_objects,
+     [](const pg_stat_t &in) { return false; });
+  op("num_bytes"sv,
+     &object_stat_sum_t::num_bytes,
+     [](const pg_stat_t &in) { return false; });
+  op("num_object_clones"sv,
+     &object_stat_sum_t::num_object_clones,
+     [](const pg_stat_t &in) { return false; });
+  op("num_whiteouts"sv,
+     &object_stat_sum_t::num_whiteouts,
+     [](const pg_stat_t &in) { return false; });
+  op("num_objects_dirty"sv,
+     &object_stat_sum_t::num_objects_dirty,
+     [](const pg_stat_t &in) { return in.dirty_stats_invalid; });
+  op("num_objects_omap"sv,
+     &object_stat_sum_t::num_objects_omap,
+     [](const pg_stat_t &in) { return in.omap_stats_invalid; });
+  op("num_objects_pinned"sv,
+     &object_stat_sum_t::num_objects_pinned,
+     [](const pg_stat_t &in) { return in.pin_stats_invalid; });
+  op("num_objects_hit_set_archive"sv,
+     &object_stat_sum_t::num_objects_hit_set_archive,
+     [](const pg_stat_t &in) { return in.hitset_stats_invalid; });
+  op("num_bytes_hit_set_archive"sv,
+     &object_stat_sum_t::num_bytes_hit_set_archive,
+     [](const pg_stat_t &in) { return in.hitset_bytes_stats_invalid; });
+  op("num_objects_manifest"sv,
+     &object_stat_sum_t::num_objects_manifest,
+     [](const pg_stat_t &in) { return in.manifest_stats_invalid; });
+}
+
+/**
+ * iterate_scrub_maintained_stats
+ *
+ * For each scrub_maintained_stat member of object_stat_sum_t, invokes
+ * op with three arguments:
+ * - name of member (string_view)
+ * - pointer to member (T object_stat_sum_t::*)
+ * - skip for shallow (bool)
+ *
+ * Should be used to perform operations on all scrub_maintained_stat members
+ * such as updating the pg maintained instance once scrub is complete.
+ */
+template <typename Func>
+void foreach_scrub_maintained_stat(Func &&op) {
+  using namespace std::string_view_literals;
+  op("num_scrub_errors"sv, &object_stat_sum_t::num_scrub_errors, false);
+  op("num_shallow_scrub_errors"sv,
+     &object_stat_sum_t::num_shallow_scrub_errors,
+     false);
+  op("num_deep_scrub_errors"sv, &object_stat_sum_t::num_deep_scrub_errors, true);
+  op("num_omap_bytes"sv, &object_stat_sum_t::num_omap_bytes, true);
+  op("num_omap_keys"sv, &object_stat_sum_t::num_omap_keys, true);
+  op("num_large_omap_objects"sv,
+     &object_stat_sum_t::num_large_omap_objects,
+     true);
+}
+
+}
+
+template <>
+struct fmt::formatter<crimson::osd::scrub::chunk_result_t> {
+  constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
+
+  template <typename FormatContext>
+  auto format(
+    const crimson::osd::scrub::chunk_result_t &result, FormatContext& ctx) const
+  {
+    return fmt::format_to(
+      ctx.out(),
+      "chunk_result_t("
+      "num_scrub_errors: {}, "
+      "num_deep_scrub_errors: {}, "
+      "snapset_errors: [{}], "
+      "object_errors: [{}])",
+      result.stats.num_scrub_errors,
+      result.stats.num_deep_scrub_errors,
+      result.snapset_errors,
+      result.object_errors
+    );
+  }
+};
index b1851cca2c7eea0a2e1283e626c4fc209ade3730..c943ff885464a0b12e3e9c6c080570a6983d55b7 100644 (file)
@@ -103,3 +103,13 @@ target_link_libraries(
   crimson::gtest)
 add_ceph_unittest(unittest-seastar-errorator
   --memory 256M --smp 1)
+
+add_executable(unittest-crimson-scrub
+  test_crimson_scrub.cc
+  ${PROJECT_SOURCE_DIR}/src/crimson/osd/scrub/scrub_machine.cc
+  ${PROJECT_SOURCE_DIR}/src/crimson/osd/scrub/scrub_validator.cc
+  ${PROJECT_SOURCE_DIR}/src/osd/ECUtil.cc)
+target_link_libraries(
+  unittest-crimson-scrub
+  crimson-common
+  crimson::gtest)
diff --git a/src/test/crimson/test_crimson_scrub.cc b/src/test/crimson/test_crimson_scrub.cc
new file mode 100644 (file)
index 0000000..65b1f31
--- /dev/null
@@ -0,0 +1,1347 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <boost/iterator/transform_iterator.hpp>
+
+#include <fmt/ranges.h>
+
+#include <seastar/core/sleep.hh>
+
+#include "test/crimson/gtest_seastar.h"
+
+#include "include/rados/rados_types.hpp"
+#include "common/scrub_types.h"
+#include "crimson/common/interruptible_future.h"
+#include "crimson/osd/scrub/scrub_machine.h"
+#include "crimson/osd/scrub/scrub_validator.h"
+
+#include "osd/osd_types_fmt.h"
+
+constexpr static size_t TEST_MAX_OBJECT_SIZE = 128<<20;
+constexpr static std::string_view TEST_INTERNAL_NAMESPACE = ".internal";
+constexpr static uint64_t TEST_OMAP_KEY_LIMIT = 200000;
+constexpr static size_t TEST_OMAP_BYTES_LIMIT = 1<<30;
+
+void so_set_attr_len(ScrubMap::object &obj, const std::string &name, size_t len)
+{
+  obj.attrs[name] = buffer::ptr(len);
+}
+
+void so_set_attr(ScrubMap::object &obj, const std::string &name, bufferlist bl)
+{
+  bl.rebuild();
+  obj.attrs[name] = bl.front();
+}
+
+std::optional<bufferlist> so_get_attr(
+  ScrubMap::object &obj, const std::string &name)
+{
+  if (obj.attrs.count(name)) {
+    bufferlist bl;
+    bl.push_back(obj.attrs[name]);
+    return bl;
+  } else {
+    return std::nullopt;
+  }
+}
+
+template <typename T>
+void so_set_attr_type(
+  ScrubMap::object &obj, const std::string &name,
+  const std::optional<T> &v)
+{
+  if (v) {
+    bufferlist bl;
+    encode(*v, bl, CEPH_FEATURES_ALL);
+    so_set_attr(obj, name, std::move(bl));
+  } else {
+    obj.attrs.erase(name);
+  }
+}
+
+template <typename T>
+std::optional<T> so_get_attr_type(ScrubMap::object &obj, const std::string &name)
+{
+  auto maybe_bl = so_get_attr(obj, name);
+  if (!maybe_bl) {
+    return std::nullopt;
+  }
+  auto bl = std::move(*maybe_bl);
+  try {
+    T ret;
+    auto bliter = bl.cbegin();
+    decode(ret, bliter);
+    return ret;
+  } catch (...) {
+    return std::nullopt;
+  }
+}
+
+void so_set_oi(ScrubMap::object &obj, const std::optional<object_info_t> &oi)
+{
+  return so_set_attr_type<object_info_t>(obj, OI_ATTR, oi);
+}
+
+std::optional<object_info_t> so_get_oi(ScrubMap::object &obj)
+{
+  return so_get_attr_type<object_info_t>(obj, OI_ATTR);
+}
+
+template <typename F>
+void so_mut_oi(ScrubMap::object &obj, F &&f) {
+  so_set_oi(obj, std::invoke(std::forward<F>(f), so_get_oi(obj)));
+}
+
+void so_set_ss(ScrubMap::object &obj, const std::optional<SnapSet> &ss)
+{
+  return so_set_attr_type<SnapSet>(obj, SS_ATTR, ss);
+}
+
+std::optional<SnapSet> so_get_ss(ScrubMap::object &obj)
+{
+  return so_get_attr_type<SnapSet>(obj, SS_ATTR);
+}
+
+template <typename F>
+void so_mut_ss(ScrubMap::object &obj, F &&f) {
+  so_set_ss(obj, std::invoke(std::forward<F>(f), so_get_ss(obj)));
+}
+
+void so_set_hinfo(
+  ScrubMap::object &obj, const std::optional<ECUtil::HashInfo> &hinfo)
+{
+  return so_set_attr_type<ECUtil::HashInfo>(obj, ECUtil::get_hinfo_key(), hinfo);
+}
+
+std::optional<ECUtil::HashInfo> so_get_hinfo(ScrubMap::object &obj)
+{
+  return so_get_attr_type<ECUtil::HashInfo>(obj, ECUtil::get_hinfo_key());
+}
+
+template <typename F>
+void so_mut_hinfo(ScrubMap::object &obj, F &&f) {
+  auto maybe_hinfo = so_get_hinfo(obj);
+  auto new_maybe_hinfo = std::invoke(std::forward<F>(f), std::move(maybe_hinfo));
+  so_set_hinfo(obj, new_maybe_hinfo);
+}
+
+/**
+ * so_builder_t
+ *
+ * Utility class for constructing test objects.
+ */
+struct so_builder_t {
+  ScrubMap::object so;
+
+  void set_defaults() {
+    so.size = 0;
+    so_mut_oi(so, [](auto maybe_oi) {
+      if (maybe_oi) {
+       maybe_oi->size = 0;
+      }
+      return maybe_oi;
+    });
+  }
+
+  static hobject_t make_hoid(std::string name, snapid_t cloneid=CEPH_NOSNAP) {
+    auto oid = object_t(name);
+    return hobject_t{
+      oid,
+      "",
+      cloneid,
+      static_cast<uint32_t>(std::hash<object_t>()(oid)),
+      1,
+      ""
+    };
+  }
+
+  static so_builder_t make_head(std::string name) {
+    auto hoid = make_hoid(name);
+    so_builder_t ret;
+    so_set_oi(ret.so, object_info_t{hoid});
+    so_set_ss(ret.so, SnapSet{});
+    ret.set_defaults();
+    return ret;
+  }
+
+  static so_builder_t make_clone(
+    std::string name,
+    snapid_t cloneid = 4
+  ) {
+    auto hoid = make_hoid(name, cloneid);
+    so_builder_t ret;
+    so_set_oi(ret.so, object_info_t{hoid});
+    ret.set_defaults();
+    return ret;
+  }
+
+  static so_builder_t make_ec_head(std::string name) {
+    auto ret = make_head(name);
+    so_set_hinfo(ret.so, ECUtil::HashInfo{});
+    return ret;
+  }
+
+  static so_builder_t make_ec_clone(
+    std::string name,
+    snapid_t cloneid = 4
+  ) {
+    auto ret = make_clone(name, cloneid);
+    so_set_hinfo(ret.so, ECUtil::HashInfo{});
+    return ret;
+  }
+
+  so_builder_t &set_size(
+    size_t size,
+    const std::optional<ECUtil::stripe_info_t> stripe_info = std::nullopt) {
+    if (stripe_info) {
+      so.size = stripe_info->logical_to_next_chunk_offset(size);
+    } else {
+      so.size = size;
+    }
+
+    so_mut_oi(so, [size](auto maybe_oi) {
+      if (maybe_oi) {
+       maybe_oi->size = size;
+      }
+      return maybe_oi;
+    });
+    so_mut_hinfo(so, [size, &stripe_info](auto maybe_hinfo) {
+      if (maybe_hinfo) {
+       ceph_assert(stripe_info);
+       maybe_hinfo->set_total_chunk_size_clear_hash(
+         stripe_info->logical_to_next_chunk_offset(size));
+      }
+      return maybe_hinfo;
+    });
+    return *this;
+  }
+
+  so_builder_t &add_attr(const std::string &name, size_t len) {
+    so_set_attr_len(so, name, len);
+    return *this;
+  }
+
+  ScrubMap::object get() const {
+    return so;
+  }
+};
+
+/**
+ * test_obj_t
+ *
+ * test param combining an so_builder_t with human readable description with
+ * a stripe_info.
+ */
+struct test_obj_t : so_builder_t {
+  std::optional<ECUtil::stripe_info_t> stripe_info;
+  std::string desc;
+  hobject_t hoid;
+
+  test_obj_t(
+    so_builder_t _builder,
+    std::optional<ECUtil::stripe_info_t> _stripe_info,
+    std::string _desc,
+    hobject_t _hoid) :
+    so_builder_t(std::move(_builder)),
+    stripe_info(std::move(_stripe_info)),
+    desc(std::move(_desc)),
+    hoid(std::move(_hoid)) {
+    ceph_assert(!desc.empty());
+  }
+
+  static test_obj_t make(
+    const std::string &desc,
+    std::optional<ECUtil::stripe_info_t> stripe_info,
+    so_builder_t builder) {
+    hobject_t hoid = so_get_oi(builder.so)->soid;
+    return test_obj_t{
+      std::move(builder),
+      stripe_info,
+      desc,
+      std::move(hoid)};
+  }
+
+  template <typename... Args>
+  static test_obj_t make_head(const std::string &desc, Args&&... args) {
+    return make(
+      desc,
+      std::nullopt,
+      so_builder_t::make_head(std::forward<Args>(args)...));
+  }
+
+  template <typename... Args>
+  static test_obj_t make_clone(const std::string &desc, Args&&... args) {
+    return make(
+      desc,
+      std::nullopt,
+      so_builder_t::make_clone(std::forward<Args>(args)...));
+  }
+
+  template <typename... Args>
+  static test_obj_t make_ec_head(const std::string &desc, Args&&... args) {
+    return make(
+      desc,
+      ECUtil::stripe_info_t{4, 1<<20},
+      so_builder_t::make_ec_head(std::forward<Args>(args)...));
+  }
+
+  template <typename... Args>
+  static test_obj_t make_ec_clone(const std::string &desc, Args&&... args) {
+    return make(
+      desc,
+      ECUtil::stripe_info_t{4, 1<<20},
+      so_builder_t::make_ec_clone(std::forward<Args>(args)...));
+  }
+
+  test_obj_t &set_size(
+    size_t size) {
+    so_builder_t::set_size(size, stripe_info);
+    return *this;
+  }
+
+  test_obj_t &add_attr(const std::string &name, size_t len) {
+    so_builder_t::add_attr(name, len);
+    return *this;
+  }
+
+  ScrubMap::object get() const {
+    return so_builder_t::get();
+  }
+};
+
+/**
+ * Interface for a test case on a single object.
+ */
+struct SingleErrorTestCase {
+  /// Describes limitations on test preconditions
+  enum class restriction_t {
+    NONE,         /// No limitations
+    REPLICA_ONLY, /// Only works if injected on replica
+    EC_ONLY,      /// Only valid for ec objects
+    HEAD_ONLY     /// Only valid for head objects
+  };
+
+  /// returns human-readable string describing the test for debugging
+  virtual std::string_view get_description() const = 0;
+
+  /// returns test_obj_t with error injected
+  virtual test_obj_t adjust_base_object(test_obj_t ret) const {
+    return ret;
+  }
+
+  /// returns test_obj_t with error injected
+  virtual test_obj_t inject_error(test_obj_t) const = 0;
+
+  /// returns expected shard error
+  virtual librados::err_t get_shard_error_sig() const = 0;
+
+  /// returns expected object error
+  virtual librados::obj_err_t get_object_error_sig() const = 0;
+
+  /// returns true if test should be run with passed restriction
+  virtual bool valid_for_restriction(restriction_t restriction) const = 0;
+
+  virtual ~SingleErrorTestCase() = default;
+};
+
+/// Utility template for implementing SimpleErrorTestCase
+template <typename T>
+struct SingleErrorTestCaseT : SingleErrorTestCase {
+  /// Defaults for REQUIRE_EC and REQUIRES_HEAD
+  constexpr static bool REQUIRES_EC = false;
+  constexpr static bool REQUIRES_HEAD = false;
+
+  /* Every implementor must define:
+  constexpr static librados::err_t shard_error_sig{
+  };
+  constexpr static librados::obj_err_t object_error_sig{
+  };
+  */
+
+  librados::err_t get_shard_error_sig() const final {
+    return T::shard_error_sig;
+  }
+  librados::obj_err_t get_object_error_sig() const final {
+    return T::object_error_sig;
+  }
+
+  constexpr static bool requires_ec() {
+    return T::REQUIRES_EC;
+  }
+  constexpr static bool requires_head() {
+    return T::REQUIRES_HEAD;
+  }
+  constexpr static bool requires_replica() {
+    /* If there are no shard_errors, we'll take primary to be authoritative. */
+    return T::shard_error_sig.errors == 0;
+  }
+
+  bool valid_for_restriction(restriction_t restriction) const final {
+    // There aren't currently any tests with two restrictions, if this
+    // changes, the suite instantiations will need to change as well.
+    static_assert(
+      (requires_ec() + requires_head() + requires_replica()) <= 1);
+    return [] {
+      if constexpr (requires_replica()) {
+       return restriction_t::REPLICA_ONLY;
+      } else if constexpr (requires_head()) {
+       return restriction_t::HEAD_ONLY;
+      } else if constexpr (requires_ec()) {
+       return restriction_t::EC_ONLY;
+      } else {
+       return restriction_t::NONE;
+      }
+    }() == restriction;
+  }
+  virtual ~SingleErrorTestCaseT() = default;
+};
+
+/* The following classes exercise each possible error code detected
+ * by evaluate_object_shard and compare_candidate_to_authoritative
+ * in crimson/osd/scrub/scrub_validator.*
+ *
+ * Note, any newly added cases must also be added to the test_cases
+ * array below.
+ */
+
+struct ECHashMismatch : SingleErrorTestCaseT<ECHashMismatch> {
+  constexpr static librados::err_t shard_error_sig{
+    librados::err_t::SHARD_EC_HASH_MISMATCH
+  };
+  constexpr static librados::obj_err_t object_error_sig{
+  };
+
+  std::string_view get_description() const {
+    return "ECHashMismatch";
+  };
+  test_obj_t inject_error(test_obj_t obj) const {
+    obj.so.ec_hash_mismatch = true;
+    return obj;
+  }
+};
+
+struct ECSizeMismatch : SingleErrorTestCaseT<ECSizeMismatch> {
+  constexpr static librados::err_t shard_error_sig{
+    librados::err_t::SHARD_EC_SIZE_MISMATCH
+  };
+  constexpr static librados::obj_err_t object_error_sig{
+  };
+
+  std::string_view get_description() const {
+    return "ECSizeMismatch";
+  };
+  test_obj_t inject_error(test_obj_t obj) const {
+    obj.so.ec_size_mismatch = true;
+    return obj;
+  }
+};
+
+struct ReadError : SingleErrorTestCaseT<ReadError> {
+  constexpr static librados::err_t shard_error_sig{
+    librados::err_t::SHARD_READ_ERR
+  };
+  constexpr static librados::obj_err_t object_error_sig{};
+
+  std::string_view get_description() const {
+    return "ReadError";
+  };
+  test_obj_t inject_error(test_obj_t obj) const {
+    obj.so.read_error = true;
+    return obj;
+  }
+};
+
+struct StatError : SingleErrorTestCaseT<StatError> {
+  constexpr static librados::err_t shard_error_sig{
+    librados::err_t::SHARD_STAT_ERR
+  };
+  constexpr static librados::obj_err_t object_error_sig{
+  };
+
+  std::string_view get_description() const {
+    return "StatError";
+  };
+  test_obj_t inject_error(test_obj_t obj) const {
+    obj.so.stat_error = true;
+    return obj;
+  }
+};
+
+struct MissingOI : SingleErrorTestCaseT<MissingOI> {
+  constexpr static librados::err_t shard_error_sig{
+    librados::err_t::INFO_MISSING
+  };
+  constexpr static librados::obj_err_t object_error_sig{
+    librados::obj_err_t::OBJECT_INFO_INCONSISTENCY
+  };
+
+  std::string_view get_description() const {
+    return "MissingOI";
+  };
+  test_obj_t inject_error(test_obj_t obj) const {
+    so_mut_oi(obj.so, [](auto) { return std::nullopt; });
+    return obj;
+  }
+};
+
+struct CorruptOI: SingleErrorTestCaseT<CorruptOI> {
+  constexpr static librados::err_t shard_error_sig{
+    librados::err_t::INFO_CORRUPTED
+  };
+  constexpr static librados::obj_err_t object_error_sig{
+    librados::obj_err_t::OBJECT_INFO_INCONSISTENCY
+  };
+
+  std::string_view get_description() const {
+    return "CorruptOI";
+  };
+  test_obj_t inject_error(test_obj_t obj) const {
+    so_set_attr_len(obj.so, OI_ATTR, 10);
+    return obj;
+  }
+};
+
+struct CorruptOndiskSize : SingleErrorTestCaseT<CorruptOndiskSize> {
+  constexpr static librados::err_t shard_error_sig{
+    librados::err_t::SIZE_MISMATCH_INFO
+  };
+  constexpr static librados::obj_err_t object_error_sig{
+    librados::obj_err_t::SIZE_MISMATCH
+  };
+
+  std::string_view get_description() const {
+    return "CorruptOndiskSize";
+  };
+  test_obj_t inject_error(test_obj_t obj) const {
+    obj.so.size += 2;
+    return obj;
+  }
+};
+
+struct MissingSS : SingleErrorTestCaseT<MissingSS> {
+  constexpr static librados::err_t shard_error_sig{
+    librados::err_t::SNAPSET_MISSING
+  };
+  constexpr static librados::obj_err_t object_error_sig{
+    librados::obj_err_t::SNAPSET_INCONSISTENCY
+  };
+  constexpr static bool REQUIRES_HEAD = true;
+
+  std::string_view get_description() const {
+    return "MissingSS";
+  };
+  test_obj_t inject_error(test_obj_t obj) const {
+    ceph_assert(obj.hoid.is_head());
+    so_mut_ss(obj.so, [](auto) { return std::nullopt; });
+    return obj;
+  }
+};
+
+struct CorruptSS : SingleErrorTestCaseT<CorruptSS> {
+  constexpr static librados::err_t shard_error_sig{
+    librados::err_t::SNAPSET_CORRUPTED
+  };
+  constexpr static librados::obj_err_t object_error_sig{
+    librados::obj_err_t::SNAPSET_INCONSISTENCY
+  };
+  constexpr static bool REQUIRES_HEAD = true;
+
+  std::string_view get_description() const {
+    return "CorruptSS";
+  };
+  test_obj_t inject_error(test_obj_t obj) const {
+    ceph_assert(obj.hoid.is_head());
+    so_set_attr_len(obj.so, SS_ATTR, 10);
+    return obj;
+  }
+};
+
+struct MissingHinfo : SingleErrorTestCaseT<MissingHinfo> {
+  constexpr static librados::err_t shard_error_sig{
+    librados::err_t::HINFO_MISSING
+  };
+  constexpr static librados::obj_err_t object_error_sig{
+    librados::obj_err_t::HINFO_INCONSISTENCY
+  };
+  constexpr static bool REQUIRES_EC = true;
+
+  std::string_view get_description() const {
+    return "MissingHinfo";
+  };
+  test_obj_t inject_error(test_obj_t obj) const {
+    ceph_assert(obj.stripe_info);
+    so_mut_hinfo(obj.so, [](auto) { return std::nullopt; });
+    return obj;
+  }
+};
+
+struct CorruptHinfo : SingleErrorTestCaseT<CorruptHinfo> {
+  constexpr static librados::err_t shard_error_sig{
+    librados::err_t::HINFO_CORRUPTED
+  };
+  constexpr static librados::obj_err_t object_error_sig{
+    librados::obj_err_t::HINFO_INCONSISTENCY
+  };
+  constexpr static bool REQUIRES_EC = true;
+
+  std::string_view get_description() const {
+    return "CorruptHinfo";
+  };
+  test_obj_t inject_error(test_obj_t obj) const {
+    ceph_assert(obj.stripe_info);
+    so_set_attr_len(obj.so, ECUtil::get_hinfo_key(), 10);
+    return obj;
+  }
+};
+
+struct DataDigestMismatch : SingleErrorTestCaseT<DataDigestMismatch> {
+  constexpr static librados::err_t shard_error_sig{
+    librados::err_t::DATA_DIGEST_MISMATCH_INFO
+  };
+  constexpr static librados::obj_err_t object_error_sig{
+    librados::obj_err_t::DATA_DIGEST_MISMATCH
+  };
+
+  std::string_view get_description() const {
+    return "DataDigestMismatch";
+  };
+  test_obj_t adjust_base_object(test_obj_t obj) const {
+    so_mut_oi(obj.so, [](auto maybe_oi) {
+      ceph_assert(maybe_oi);
+      maybe_oi->set_data_digest(1);
+      return maybe_oi;
+    });
+    obj.so.digest_present = true;
+    obj.so.digest = 1;
+    return obj;
+  }
+  test_obj_t inject_error(test_obj_t obj) const {
+    ceph_assert(so_get_oi(obj.so)->is_data_digest());
+    obj.so.digest = 2;
+    return obj;
+  }
+};
+
+struct OmapDigestMismatch : SingleErrorTestCaseT<OmapDigestMismatch> {
+  constexpr static librados::err_t shard_error_sig{
+    librados::err_t::OMAP_DIGEST_MISMATCH_INFO
+  };
+  constexpr static librados::obj_err_t object_error_sig{
+    librados::obj_err_t::OMAP_DIGEST_MISMATCH
+  };
+
+  std::string_view get_description() const {
+    return "OmapDigestMismatch";
+  };
+  test_obj_t adjust_base_object(test_obj_t obj) const {
+    so_mut_oi(obj.so, [](auto maybe_oi) {
+      ceph_assert(maybe_oi);
+      maybe_oi->set_omap_digest(1);
+      return maybe_oi;
+    });
+    obj.so.omap_digest_present = true;
+    obj.so.omap_digest = 1;
+    return obj;
+  }
+  test_obj_t inject_error(test_obj_t obj) const {
+    ceph_assert(so_get_oi(obj.so)->is_omap_digest());
+    obj.so.omap_digest = 2;
+    return obj;
+  }
+};
+
+struct ExtraAttribute : SingleErrorTestCaseT<ExtraAttribute> {
+  constexpr static librados::err_t shard_error_sig{};
+  constexpr static librados::obj_err_t object_error_sig{
+    librados::obj_err_t::ATTR_NAME_MISMATCH
+  };
+
+  std::string_view get_description() const {
+    return "ExtraAttribute";
+  };
+  test_obj_t inject_error(test_obj_t obj) const {
+    so_set_attr_len(obj.so, "attr_added_erroneously", 10);
+    return obj;
+  }
+};
+
+struct MissingAttribute : SingleErrorTestCaseT<MissingAttribute> {
+  constexpr static librados::err_t shard_error_sig{};
+  constexpr static librados::obj_err_t object_error_sig{
+    librados::obj_err_t::ATTR_NAME_MISMATCH
+  };
+
+  std::string_view get_description() const {
+    return "MissingAttribute";
+  };
+  test_obj_t adjust_base_object(test_obj_t obj) const {
+    so_set_attr_len(obj.so, "attr_to_be_missing", 10);
+    return obj;
+  }
+  test_obj_t inject_error(test_obj_t obj) const {
+    obj.so.attrs.erase("attr_to_be_missing");
+    return obj;
+  }
+};
+
+template <>
+struct fmt::formatter<SingleErrorTestCase> {
+  constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
+
+  template <typename FormatContext>
+  auto format(const auto &test_case, FormatContext& ctx) const
+  {
+    return fmt::format_to(
+      ctx.out(), "{}",
+      test_case.get_description());
+  }
+};
+
+std::unique_ptr<SingleErrorTestCase> test_cases[] = {
+  std::make_unique<ECHashMismatch>(),
+  std::make_unique<ECSizeMismatch>(),
+  std::make_unique<ReadError>(),
+  std::make_unique<StatError>(),
+  std::make_unique<MissingOI>(),
+  std::make_unique<CorruptOI>(),
+  std::make_unique<CorruptOndiskSize>(),
+  std::make_unique<MissingSS>(),
+  std::make_unique<CorruptSS>(),
+  std::make_unique<MissingHinfo>(),
+  std::make_unique<CorruptHinfo>(),
+  std::make_unique<DataDigestMismatch>(),
+  std::make_unique<OmapDigestMismatch>(),
+  std::make_unique<ExtraAttribute>(),
+  std::make_unique<MissingAttribute>()
+};
+const SingleErrorTestCase *to_ptr(
+  const std::unique_ptr<SingleErrorTestCase> &tc) {
+  return tc.get();
+}
+// iterator over the above set as pointers
+using test_case_ptr_iter_t = boost::transform_iterator<
+  std::function<decltype(to_ptr)>, decltype(std::begin(test_cases))>;
+template <SingleErrorTestCase::restriction_t restriction>
+struct test_case_filter_t {
+  bool operator()(const SingleErrorTestCase *tc) const {
+    return tc->valid_for_restriction(restriction);
+  }
+};
+template <SingleErrorTestCase::restriction_t restriction>
+// iterator over the above set filtered by restriction
+using test_case_filter_iter_t = boost::filter_iterator<
+  test_case_filter_t<restriction>,
+  test_case_ptr_iter_t>;
+template <SingleErrorTestCase::restriction_t restriction>
+// begin and end, used below to instantiate test suites
+auto test_cases_begin() {
+  return test_case_filter_iter_t<restriction>(
+    test_case_filter_t<restriction>(),
+    test_case_ptr_iter_t(std::begin(test_cases), to_ptr),
+    test_case_ptr_iter_t(std::end(test_cases), to_ptr));
+}
+template <SingleErrorTestCase::restriction_t restriction>
+auto test_cases_end() {
+  return test_case_filter_iter_t<restriction>(
+    test_case_filter_t<restriction>(),
+    test_case_ptr_iter_t(std::end(test_cases), to_ptr),
+    test_case_ptr_iter_t(std::end(test_cases), to_ptr));
+}
+
+/// tuple defining each generated test case
+using single_error_test_param_t = std::tuple<
+  test_obj_t,                /// initial test object
+  bool,                      /// inject on primary?
+  const SingleErrorTestCase* /// test case
+  >;
+template <>
+struct fmt::formatter<single_error_test_param_t> {
+  constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
+
+  template <typename FormatContext>
+  auto format(const auto &param, FormatContext& ctx) const
+  {
+    const auto &[obj, is_primary, test_case] = param;
+    return fmt::format_to(
+      ctx.out(), "{}{}{}",
+      obj.desc,
+      is_primary ? "Primary" : "Replica",
+      test_case->get_description());
+  }
+};
+std::ostream &operator<<(std::ostream &out, const single_error_test_param_t &p)
+{
+  return out << fmt::format("{}", p);
+}
+
+class TestSingleError :
+  public testing::TestWithParam<single_error_test_param_t> {
+};
+
+/**
+ * compare_error_signatures
+ *
+ * Generic helper for comparing err_t, obj_err_t, and
+ * inconsistent_snapset_t with descriptive output.
+ */
+auto compare_error_signatures(const auto &lh, const auto &rh)
+{
+  if (lh.errors == rh.errors) {
+    return ::testing::AssertionSuccess() << fmt::format(
+      "Signature match: {}", lh);
+  } else {
+    return ::testing::AssertionFailure() << fmt::format(
+      "Signature mismatch: {} should be {}",
+      lh, rh);
+  }
+}
+
+TEST_P(TestSingleError, SingleError) {
+  const auto &[_obj, is_primary, test_case] = GetParam();
+  auto obj = test_case->adjust_base_object(_obj);
+
+  const pg_shard_t primary(0, shard_id_t::NO_SHARD);
+  const pg_shard_t replica(1, shard_id_t::NO_SHARD);
+  crimson::osd::scrub::chunk_validation_policy_t policy {
+    primary,
+    obj.stripe_info,
+    TEST_MAX_OBJECT_SIZE,
+    std::string{TEST_INTERNAL_NAMESPACE},
+    TEST_OMAP_KEY_LIMIT,
+    TEST_OMAP_BYTES_LIMIT
+  };
+  const pg_shard_t &target = is_primary ? primary : replica;
+  const std::vector<pg_shard_t> shards = {
+    primary, replica
+  };
+
+  auto with_error = test_case->inject_error(obj);
+  crimson::osd::scrub::scrub_map_set_t maps;
+  for (const auto &osd : shards) {
+    if (osd == target) {
+      maps[osd].objects[obj.hoid] = with_error.get();
+    } else {
+      maps[osd].objects[obj.hoid] = obj.get();
+    }
+  }
+
+  DoutPrefix dpp(nullptr, ceph_subsys_test, "test_crimson_scrub");
+  const auto ret = crimson::osd::scrub::validate_chunk(
+    dpp, policy, maps);
+  const auto &object_errors = ret.object_errors;
+
+  ASSERT_EQ(object_errors.size(), 1) << fmt::format(
+    "{}: generated an incorrect number of errors: {}\n",
+    *test_case, object_errors);
+
+  auto &obj_error = object_errors.front();
+
+  EXPECT_EQ(
+    ret.stats.num_shallow_scrub_errors,
+    (obj_error.has_shallow_errors() ||
+     obj_error.union_shards.has_shallow_errors()) +
+    ret.snapset_errors.size());
+  EXPECT_EQ(
+    ret.stats.num_deep_scrub_errors,
+    (obj_error.has_deep_errors() ||
+     obj_error.union_shards.has_deep_errors()));
+
+  EXPECT_TRUE(compare_error_signatures(
+    static_cast<const librados::obj_err_t&>(obj_error),
+    test_case->get_object_error_sig()));
+
+  EXPECT_EQ(obj_error.shards.size(), shards.size());
+  bool found_selected_oi = false;
+  for (const auto &shard : shards) {
+    auto siter = obj_error.shards.find(
+      librados::osd_shard_t(shard.osd, shard.shard)
+    );
+    if (siter == obj_error.shards.end()) {
+      EXPECT_NE(siter, obj_error.shards.end());
+      continue;
+    }
+    if (shard == target) {
+      EXPECT_TRUE(compare_error_signatures(
+       static_cast<const librados::err_t&>(siter->second),
+       test_case->get_shard_error_sig()));
+    } else {
+      EXPECT_FALSE(siter->second.has_errors());
+      if (siter->second.selected_oi) found_selected_oi = true;
+    }
+    if (shard == primary) {
+      EXPECT_TRUE(siter->second.primary);
+    }
+  }
+  EXPECT_TRUE(found_selected_oi);
+}
+
+/* Tests that don't have restrictions */
+INSTANTIATE_TEST_SUITE_P(
+  SingleErrorGeneral,
+  TestSingleError,
+  ::testing::Combine(
+    ::testing::Values(
+      test_obj_t::make_head("Small", "foo").set_size(64),
+      test_obj_t::make_clone("EmptyWithAttr", "foo2").add_attr("extra_attr", 64),
+      test_obj_t::make_head("ReplicatedRBD", "foo2").set_size(4<<20),
+      test_obj_t::make_ec_head("ECHead", "foo").set_size(4<<20),
+      test_obj_t::make_ec_clone("LargeECClone", "foo").set_size(16<<20)
+    ),
+    ::testing::Bool(),
+    ::testing::ValuesIn(
+      test_cases_begin<SingleErrorTestCase::restriction_t::NONE>(),
+      test_cases_end<SingleErrorTestCase::restriction_t::NONE>())
+  ),
+  [](const auto &info) {
+    return fmt::format("{}", info.param);
+  }
+);
+
+/* Some tests don't trigger shard errors, so we can't actually tell which
+ * replica is wrong.  Such tests are written for the error to be injected
+ * on the replica. */
+INSTANTIATE_TEST_SUITE_P(
+  SingleErrorPrimaryOnly,
+  TestSingleError,
+  ::testing::Combine(
+    ::testing::Values(
+      test_obj_t::make_head("Small", "foo").set_size(64),
+      test_obj_t::make_clone("EmptyWithAttr", "foo2").add_attr("extra_attr", 64),
+      test_obj_t::make_head("ReplicatedRBD", "foo2").set_size(4<<20),
+      test_obj_t::make_ec_head("ECHead", "foo").set_size(4<<20),
+      test_obj_t::make_ec_clone("LargeECClone", "foo").set_size(16<<20)
+    ),
+    ::testing::Values(false), // replica only
+    ::testing::ValuesIn(
+      test_cases_begin<SingleErrorTestCase::restriction_t::REPLICA_ONLY>(),
+      test_cases_end<SingleErrorTestCase::restriction_t::REPLICA_ONLY>())
+  ),
+  [](const auto &info) {
+    return fmt::format("{}", info.param);
+  }
+);
+
+/* Some tests only make sense on ec objects. */
+INSTANTIATE_TEST_SUITE_P(
+  SingleErrorOnly,
+  TestSingleError,
+  ::testing::Combine(
+    ::testing::Values(
+      test_obj_t::make_ec_head("ECHead", "foo").set_size(4<<20),
+      test_obj_t::make_ec_clone("LargeECClone", "foo").set_size(16<<20)
+    ),
+    ::testing::Bool(),
+    ::testing::ValuesIn(
+      test_cases_begin<SingleErrorTestCase::restriction_t::EC_ONLY>(),
+      test_cases_end<SingleErrorTestCase::restriction_t::EC_ONLY>())
+  ),
+  [](const auto &info) {
+    return fmt::format("{}", info.param);
+  }
+);
+
+/* Some tests only make sense on head objects. */
+INSTANTIATE_TEST_SUITE_P(
+  SingleErrorHEAD,
+  TestSingleError,
+  ::testing::Combine(
+    ::testing::Values(
+      test_obj_t::make_head("Small", "foo").set_size(64),
+      test_obj_t::make_head("ReplicatedRBD", "foo2").set_size(4<<20),
+      test_obj_t::make_ec_head("ECHead", "foo").set_size(4<<20)
+    ),
+    ::testing::Bool(),
+    ::testing::ValuesIn(
+      test_cases_begin<SingleErrorTestCase::restriction_t::HEAD_ONLY>(),
+      test_cases_end<SingleErrorTestCase::restriction_t::HEAD_ONLY>())
+  ),
+  [](const auto &info) {
+    return fmt::format("{}", info.param);
+  }
+);
+
+using test_clone_spec_t = std::pair<
+  snapid_t, // clone id
+  size_t    // clone size
+  >;
+
+/// descending order of clone id
+using test_clone_list_t = std::vector<test_clone_spec_t>;
+
+/**
+ * snapset_test_case_t
+ *
+ * This descriptor can express 3 types of error
+ * - missing clone
+ * - extra clone
+ * - clone size mismatch
+ * in 4 positions using one bit for each pair.
+ */
+class snapset_test_case_t {
+  uint32_t signature;
+
+  snapset_test_case_t(uint32_t signature) : signature(signature) {}
+
+  constexpr static uint32_t POSITION_BITS = 4;
+  constexpr static uint32_t position_mask[] = {
+    0x1, 0x2, 0x4, 0x8
+  };
+  constexpr static unsigned MAX_POS = std::size(position_mask);
+
+  constexpr static uint32_t MIN_VALID = 0;
+  constexpr static uint32_t MAX_VALID = 0xFFF;
+  enum type_t {
+    MISSING = 0,
+    EXTRA,
+    SIZE
+  };
+
+  bool should_inject(type_t type, unsigned position) const {
+    ceph_assert(position < MAX_POS);
+    return (signature >> (type * POSITION_BITS)) & position_mask[position];
+  }
+  static snapset_test_case_t make(type_t type, unsigned position) {
+    ceph_assert(position < std::size(position_mask));
+    return snapset_test_case_t{
+      position_mask[position] << (type * POSITION_BITS)
+    };
+  }
+  static auto generate_single_errors(type_t type) {
+    std::vector<snapset_test_case_t> ret;
+    ret.reserve(std::size(position_mask));
+    for (unsigned i = 0; i < MAX_POS; ++i) {
+      ret.push_back(make(type, i));
+    }
+    return ret;
+  }
+
+public:
+  constexpr static unsigned get_max_pos() { return MAX_POS; }
+
+  bool should_inject_missing(unsigned position) const {
+    return should_inject(MISSING, position);
+  }
+  bool should_inject_extra(unsigned position) const {
+    return should_inject(EXTRA, position);
+  }
+  bool should_inject_size(unsigned position) const {
+    return should_inject(SIZE, position);
+  }
+
+  static auto generate_single_missing_errors() {
+    return generate_single_errors(MISSING);
+  }
+  static auto generate_single_extra_errors() {
+    return generate_single_errors(EXTRA);
+  }
+  static auto generate_single_size_errors() {
+    return generate_single_errors(SIZE);
+  }
+  static auto generate_random_errors(size_t num, int seed = 0) {
+    std::default_random_engine e1(seed);
+    std::uniform_int_distribution<uint32_t> uniform_dist(1, MAX_VALID);
+
+    std::vector<snapset_test_case_t> ret;
+    ret.reserve(num);
+    for (unsigned i = 0; i < num; ++i) {
+      ret.push_back(snapset_test_case_t{uniform_dist(e1)});
+    }
+    return ret;
+  }
+  friend std::ostream &operator<<(std::ostream &out, snapset_test_case_t rhs);
+};
+std::ostream &operator<<(std::ostream &out, snapset_test_case_t rhs) {
+  for (auto &[s, type] :
+        std::vector<std::pair<std::string, snapset_test_case_t::type_t>>(
+          {{"M", snapset_test_case_t::MISSING},
+           {"E", snapset_test_case_t::EXTRA},
+           {"S", snapset_test_case_t::SIZE}})) {
+    out << s;
+    for (unsigned i = 0;
+        i < snapset_test_case_t::MAX_POS; ++i) {
+      if (rhs.should_inject(type, i)) {
+       out << i;
+      }
+    }
+  }
+  return out;
+}
+
+class TestSnapSetCloneError :
+  public testing::TestWithParam<snapset_test_case_t> {
+};
+
+
+SnapSet make_snapset(const test_clone_list_t &clone_list)
+{
+  SnapSet ss;
+  for (const auto &[cloneid, size] : clone_list) {
+    ss.clones.push_back(cloneid);
+    ss.clone_size[cloneid] = size;
+    ss.clone_overlap[cloneid];
+    ss.clone_snaps[cloneid].push_back(cloneid);
+  }
+  return ss;
+}
+
+std::pair<hobject_t, ScrubMap::object> make_clone(
+  std::string name, std::pair<snapid_t, size_t> in)
+{
+  ScrubMap ret;
+  auto [cloneid, size] = in;
+  hobject_t hoid = so_builder_t::make_hoid(name, in.first);
+  auto so = so_builder_t::make_clone(
+    name, cloneid);
+  so.set_size(size);
+  return std::make_pair(hoid, so.get());
+}
+
+TEST_P(TestSnapSetCloneError, CloneError) {
+  const pg_shard_t primary(0, shard_id_t::NO_SHARD);
+  crimson::osd::scrub::chunk_validation_policy_t policy {
+    primary,
+    std::nullopt,
+    TEST_MAX_OBJECT_SIZE,
+    std::string{TEST_INTERNAL_NAMESPACE},
+    TEST_OMAP_KEY_LIMIT,
+    TEST_OMAP_BYTES_LIMIT
+  };
+
+  crimson::osd::scrub::scrub_map_set_t maps;
+  const std::string name = "test_obj";
+  auto &map = maps[primary];
+  inconsistent_snapset_wrapper expected_error;
+
+  test_clone_list_t should_exist = {
+    { 10, 32 }, { 25,  64 }, { 50,  32 }, { 100,  64 }
+  };
+  test_clone_list_t extra = {
+    { 9, 64 }, { 11, 32 }, { 99, 64 }, { 101, 32 }
+  };
+
+  for (unsigned i = 0; i < snapset_test_case_t::get_max_pos(); ++i) {
+    hobject_t hoid = so_builder_t::make_hoid(name, should_exist[i].first);
+    if (!GetParam().should_inject_missing(i)) {
+      auto to_insert = make_clone(name, should_exist[i]);
+      if (GetParam().should_inject_size(i)) {
+       expected_error.set_size_mismatch();
+       to_insert.second = so_builder_t(to_insert.second).set_size(
+         so_get_oi(to_insert.second)->size + 1).get();
+      }
+      map.objects.insert(to_insert);
+    } else {
+      expected_error.set_clone_missing(should_exist[i].first);
+    }
+    if (GetParam().should_inject_extra(i)) {
+      map.objects.insert(make_clone(name, extra[i]));
+      expected_error.set_clone(extra[i].first);
+    }
+  }
+
+  hobject_t hoid = so_builder_t::make_hoid(name);
+  map.objects[hoid] = so_builder_t::make_head(name).get();
+
+  so_set_ss(map.objects[hoid], make_snapset(should_exist));
+
+  DoutPrefix dpp(nullptr, ceph_subsys_test, "test_crimson_scrub");
+  const auto ret = crimson::osd::scrub::validate_chunk(
+    dpp, policy, maps);
+  EXPECT_EQ(ret.object_errors.size(), 0);
+  ASSERT_EQ(ret.snapset_errors.size(), 1) << fmt::format(
+    "Got snapset_errors: {}", ret.snapset_errors);
+
+  EXPECT_TRUE(compare_error_signatures(
+    ret.snapset_errors.front(),
+    expected_error));
+
+}
+
+INSTANTIATE_TEST_SUITE_P(
+  SingleMissing,
+  TestSnapSetCloneError,
+  ::testing::ValuesIn(snapset_test_case_t::generate_single_missing_errors())
+);
+
+INSTANTIATE_TEST_SUITE_P(
+  SingleExtra,
+  TestSnapSetCloneError,
+  ::testing::ValuesIn(snapset_test_case_t::generate_single_extra_errors())
+);
+
+INSTANTIATE_TEST_SUITE_P(
+  SingleSize,
+  TestSnapSetCloneError,
+  ::testing::ValuesIn(snapset_test_case_t::generate_single_size_errors())
+);
+
+INSTANTIATE_TEST_SUITE_P(
+  MultipleRandom,
+  TestSnapSetCloneError,
+  ::testing::ValuesIn(snapset_test_case_t::generate_random_errors(100))
+);
+
+TEST(TestSnapSet, MissingHead) {
+  const pg_shard_t primary(0, shard_id_t::NO_SHARD);
+  crimson::osd::scrub::chunk_validation_policy_t policy {
+    primary,
+    std::nullopt,
+    TEST_MAX_OBJECT_SIZE,
+    std::string{TEST_INTERNAL_NAMESPACE},
+    TEST_OMAP_KEY_LIMIT,
+    TEST_OMAP_BYTES_LIMIT
+  };
+
+  crimson::osd::scrub::scrub_map_set_t maps;
+  inconsistent_snapset_wrapper expected_error;
+
+  test_clone_list_t clones = {
+    { 10, 64 }, { 25, 32 }, { 50, 64 }, { 100, 32 }
+  };
+  for (const auto &desc : test_clone_list_t{clones}) {
+    maps[primary].objects.emplace(make_clone("test_object", desc));
+  }
+  expected_error.set_headless();
+
+
+  DoutPrefix dpp(nullptr, ceph_subsys_test, "test_crimson_scrub");
+  const auto ret = crimson::osd::scrub::validate_chunk(
+    dpp, policy, maps);
+  EXPECT_EQ(ret.object_errors.size(), 0);
+  ASSERT_EQ(ret.snapset_errors.size(), 1) << fmt::format(
+    "Got snapset_errors: {}", ret.snapset_errors);
+
+  EXPECT_TRUE(compare_error_signatures(
+    ret.snapset_errors.front(),
+    expected_error));
+
+}
+
+TEST(TestSnapSet, Stats) {
+  const pg_shard_t primary(0, shard_id_t::NO_SHARD);
+  crimson::osd::scrub::chunk_validation_policy_t policy {
+    primary,
+    std::nullopt,
+    TEST_MAX_OBJECT_SIZE,
+    std::string{TEST_INTERNAL_NAMESPACE},
+    TEST_OMAP_KEY_LIMIT,
+    TEST_OMAP_BYTES_LIMIT
+  };
+
+
+  object_stat_sum_t expected_stats;
+  crimson::osd::scrub::scrub_map_set_t maps;
+  auto &objs = maps[primary].objects;
+
+  unsigned num = 0;
+  auto add_simple_head = [&](size_t size, auto &&f)
+    -> ScrubMap::object & {
+    auto name = fmt::format("obj-{}", ++num);
+    auto hoid = so_builder_t::make_hoid(name);
+    auto obj = so_builder_t::make_head(name).set_size(size).get();
+    so_mut_oi(obj, std::forward<decltype(f)>(f));
+    expected_stats.num_bytes += size;
+    expected_stats.num_objects++;
+    return objs[hoid] = obj;
+  };
+
+  add_simple_head(64, [&expected_stats](auto maybe_oi) {
+    ceph_assert(maybe_oi);
+    maybe_oi->set_flag(object_info_t::FLAG_DIRTY);
+    expected_stats.num_objects_dirty++;
+    return maybe_oi;
+  });
+
+  add_simple_head(128, [&expected_stats](auto maybe_oi) {
+    ceph_assert(maybe_oi);
+    maybe_oi->set_flag(object_info_t::FLAG_MANIFEST);
+    expected_stats.num_objects_manifest++;
+    return maybe_oi;
+  });
+
+  add_simple_head(0, [&expected_stats](auto maybe_oi) {
+    ceph_assert(maybe_oi);
+    maybe_oi->set_flag(object_info_t::FLAG_WHITEOUT);
+    expected_stats.num_whiteouts++;
+    return maybe_oi;
+  });
+
+  {
+    auto &so = add_simple_head(32, [](auto ret) { return ret; });
+    expected_stats.num_omap_keys += (so.object_omap_keys = 10);
+    expected_stats.num_omap_bytes += (so.object_omap_bytes = 100);
+    expected_stats.num_objects_omap++;
+  }
+
+  {
+    auto &so = add_simple_head(64, [](auto ret) { return ret; });
+    expected_stats.num_omap_keys +=
+      (so.object_omap_keys = (TEST_OMAP_KEY_LIMIT + 1));
+    expected_stats.num_omap_bytes +=
+      (so.object_omap_bytes = so.object_omap_keys);
+    expected_stats.num_objects_omap++;
+    expected_stats.num_large_omap_objects++;
+  }
+
+  {
+    auto &so = add_simple_head(64, [](auto ret) { return ret; });
+    expected_stats.num_omap_keys += (so.object_omap_keys = 1);
+    expected_stats.num_omap_bytes +=
+      (so.object_omap_bytes = (TEST_OMAP_BYTES_LIMIT + 1));
+    expected_stats.num_objects_omap++;
+    expected_stats.num_large_omap_objects++;
+  }
+
+  {
+    auto name = fmt::format("obj-{}", ++num);
+
+    std::map<snapid_t, interval_set<uint64_t>> clone_overlap;
+    test_clone_list_t clones;
+    auto add_clone = [&](std::pair<snapid_t, size_t> clone_desc,
+                        interval_set<uint64_t> overlap) -> ScrubMap::object & {
+      auto hoid = so_builder_t::make_hoid(name, clone_desc.first);
+      clones.push_back(clone_desc);
+      auto [_, obj] = make_clone(name, clone_desc);
+      expected_stats.num_object_clones++;
+      expected_stats.num_objects++;
+
+      expected_stats.num_bytes += clone_desc.second - overlap.size();
+      clone_overlap[clone_desc.first] = std::move(overlap);
+
+      return objs[hoid] = obj;
+    };
+
+    auto make_is = [](uint64_t off, uint64_t len) {
+      interval_set<uint64_t> ret;
+      ret.insert(off, len);
+      return ret;
+    };
+
+    add_clone({99, 32}, {});
+    add_clone({100, 64}, make_is(31, 33));
+
+    {
+      auto hoid = so_builder_t::make_hoid(name);
+      size_t size = 64;
+      auto obj = so_builder_t::make_head(name).set_size(size).get();
+      expected_stats.num_bytes += size;
+      expected_stats.num_objects++;
+
+      SnapSet ss = make_snapset(clones);
+      ss.clone_overlap = std::move(clone_overlap);
+      so_mut_ss(obj, [ss=std::move(ss)](auto) mutable {
+       return std::move(ss);
+      });
+
+      objs[hoid] = obj;
+    }
+  }
+
+  DoutPrefix dpp(nullptr, ceph_subsys_test, "test_crimson_scrub");
+  const auto ret = crimson::osd::scrub::validate_chunk(
+    dpp, policy, maps);
+  EXPECT_EQ(ret.object_errors.size(), 0);
+  ASSERT_EQ(ret.snapset_errors.size(), 0) << fmt::format(
+    "Got snapset_errors: {}", ret.snapset_errors);
+
+  EXPECT_EQ(ret.stats, expected_stats);
+}