From: Samuel Just Date: Fri, 11 Aug 2023 21:03:48 +0000 (-0700) Subject: crimson/osd/scrub: add scrub_machine and scrub_validator along with tests X-Git-Tag: v19.3.0~296^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=b1455202b177e94bc4dd97104c1d5e351b3aa537;p=ceph.git crimson/osd/scrub: add scrub_machine and scrub_validator along with tests scrub_validator.h/cc contain the logic from translating a set of ScrubMaps into per-chunk stats and errors. scrub_machine.h/cc contain the logic for the overall scrub lifecycle and reservation management. ScrubContext (scrub_machine.h) is the interface through which the above logic performs reservations and scans. Signed-off-by: Samuel Just --- diff --git a/src/crimson/CMakeLists.txt b/src/crimson/CMakeLists.txt index 9e751fcebc9..510ffbd9df9 100644 --- a/src/crimson/CMakeLists.txt +++ b/src/crimson/CMakeLists.txt @@ -121,6 +121,7 @@ add_library(crimson-common STATIC ${PROJECT_SOURCE_DIR}/src/osd/HitSet.cc ${PROJECT_SOURCE_DIR}/src/osd/OSDMap.cc ${PROJECT_SOURCE_DIR}/src/osd/PGPeeringEvent.cc + ${PROJECT_SOURCE_DIR}/src/common/scrub_types.cc ${PROJECT_SOURCE_DIR}/src/xxHash/xxhash.c ${crimson_common_srcs} $ diff --git a/src/crimson/osd/CMakeLists.txt b/src/crimson/osd/CMakeLists.txt index 817027c18ff..e3ab3cf4d73 100644 --- a/src/crimson/osd/CMakeLists.txt +++ b/src/crimson/osd/CMakeLists.txt @@ -33,6 +33,8 @@ add_executable(crimson-osd replicated_recovery_backend.cc scheduler/scheduler.cc scheduler/mclock_scheduler.cc + scrub/scrub_machine.cc + scrub/scrub_validator.cc osdmap_gate.cc pg_activation_blocker.cc pg_map.cc @@ -40,6 +42,7 @@ add_executable(crimson-osd objclass.cc ${PROJECT_SOURCE_DIR}/src/objclass/class_api.cc ${PROJECT_SOURCE_DIR}/src/osd/ClassHandler.cc + ${PROJECT_SOURCE_DIR}/src/osd/ECUtil.cc ${PROJECT_SOURCE_DIR}/src/osd/osd_op_util.cc ${PROJECT_SOURCE_DIR}/src/osd/OSDCap.cc ${PROJECT_SOURCE_DIR}/src/osd/PeeringState.cc diff --git a/src/crimson/osd/scrub/scrub_machine.cc b/src/crimson/osd/scrub/scrub_machine.cc new file mode 100644 index 00000000000..7d674feffeb --- /dev/null +++ b/src/crimson/osd/scrub/scrub_machine.cc @@ -0,0 +1,76 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "include/ceph_assert.h" + +#include "crimson/osd/scrub/scrub_machine.h" + +namespace crimson::osd::scrub { + +WaitUpdate::WaitUpdate(my_context ctx) : ScrubState(ctx) +{ + auto &cs = context(); + cs.range_reserved = true; + assert(cs.range); + get_scrub_context().reserve_range(cs.range->start, cs.range->end); +} + +ScanRange::ScanRange(my_context ctx) : ScrubState(ctx) +{ + ceph_assert(context().range); + const auto &cs = context(); + const auto &range = cs.range.value(); + get_scrub_context( + ).foreach_id_to_scrub([this, &range, &cs](const auto &id) { + get_scrub_context().scan_range( + id, cs.version, + context().deep, + range.start, range.end); + waiting_on++; + }); +} + +sc::result ScanRange::react(const ScrubContext::scan_range_complete_t &event) +{ + auto [_, inserted] = maps.insert(event.value.to_pair()); + ceph_assert(inserted); + ceph_assert(waiting_on > 0); + --waiting_on; + + if (waiting_on > 0) { + return discard_event(); + } else { + ceph_assert(context().range); + { + auto results = validate_chunk( + get_scrub_context().get_dpp(), + context().policy, + maps); + context().stats.add(results.stats); + get_scrub_context().emit_chunk_result( + *(context().range), + std::move(results)); + } + if (context().range->end.is_max()) { + get_scrub_context().emit_scrub_result( + context().deep, + context().stats); + return transit(); + } else { + context().advance_current( + context().range->end); + return transit(); + } + } +} + +ReplicaScanChunk::ReplicaScanChunk(my_context ctx) : ScrubState(ctx) +{ + auto &to_scan = context().to_scan; + get_scrub_context().generate_and_submit_chunk_result( + to_scan.start, + to_scan.end, + to_scan.deep); +} + +}; diff --git a/src/crimson/osd/scrub/scrub_machine.h b/src/crimson/osd/scrub/scrub_machine.h new file mode 100644 index 00000000000..d2d127adc0d --- /dev/null +++ b/src/crimson/osd/scrub/scrub_machine.h @@ -0,0 +1,613 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/fmt_common.h" +#include "common/hobject.h" +#include "common/hobject_fmt.h" +#include "crimson/common/log.h" +#include "osd/osd_types_fmt.h" +#include "scrub_validator.h" + +namespace crimson::osd::scrub { + +/* Development Notes + * + * Notes: + * - We're leaving out all of the throttle waits. We actually want to handle + * that using crimson's operation throttler machinery. + * + * TODOs: + * - Leaving SnapMapper validation to later work + * - Note, each replica should validate and repair locally as the SnapMapper + * is meant to be a local index of the local object contents + * - Leaving preemption for later + * - Leaving scheduling for later, for now the only way to trigger a scrub + * is via the ceph tell [deep_]scrub command + */ + +namespace sc = boost::statechart; + +template +struct simple_event_t : sc::event { + template + auto fmt_print_ctx(FormatContext & ctx) const { + return fmt::format_to(ctx.out(), "{}", T::event_name); + } +}; + +template +struct value_event_t : sc::event { + const V value; + + template + value_event_t(Args&&... args) : value(std::forward(args)...) {} + + value_event_t(const value_event_t &) = default; + value_event_t(value_event_t &&) = default; + value_event_t &operator=(const value_event_t&) = default; + value_event_t &operator=(value_event_t&&) = default; + + template + auto fmt_print_ctx(FormatContext & ctx) const { + return fmt::format_to(ctx.out(), "{}", T::event_name); + } +}; + + +#define SIMPLE_EVENT(T) struct T : simple_event_t { \ + static constexpr const char * event_name = #T; \ + }; + +#define VALUE_EVENT(T, V) struct T : value_event_t { \ + static constexpr const char * event_name = #T; \ + \ + template \ + T(Args&&... args) : value_event_t( \ + std::forward(args)...) {} \ + }; + +/** + * ScrubContext + * + * Interface to external PG/OSD/IO machinery. + * + * Methods which may take time return immediately and define an event which + * will be asynchronously delivered to the state machine with the result. This + * is a bit clumsy to use, but should render this component highly testable. + * + * Events sent as a completion to a ScrubContext interface method are defined + * within ScrubContext. Other events are defined within ScrubMachine. + */ +struct ScrubContext { + /// return ids to scrub + virtual const std::set &get_ids_to_scrub() const = 0; + + /// iterates over each pg_shard_t to scrub + template + void foreach_id_to_scrub(F &&f) { + for (const auto &id : get_ids_to_scrub()) { + std::invoke(f, id); + } + } + + /// return struct defining chunk validation rules + virtual chunk_validation_policy_t get_policy() const = 0; + + /// notifies implementation of scrub start + virtual void notify_scrub_start(bool deep) = 0; + + /// notifies implementation of scrub end + virtual void notify_scrub_end(bool deep) = 0; + + /// requests range to scrub starting at start + struct request_range_result_t { + hobject_t start; + hobject_t end; + + request_range_result_t( + const hobject_t &start, + const hobject_t &end) : start(start), end(end) {} + + auto fmt_print_ctx(auto &ctx) const -> decltype(ctx.out()) { + return fmt::format_to(ctx.out(), "start: {}, end: {}", start, end); + } + }; + VALUE_EVENT(request_range_complete_t, request_range_result_t); + virtual void request_range( + const hobject_t &start) = 0; + + /// reserves range [start, end) + VALUE_EVENT(reserve_range_complete_t, eversion_t); + virtual void reserve_range( + const hobject_t &start, + const hobject_t &end) = 0; + + /// waits until implementation has committed up to version + SIMPLE_EVENT(await_update_complete_t); + virtual bool await_update( + const eversion_t &version) = 0; + + /// cancel in progress or currently reserved range + virtual void release_range() = 0; + + /// scans [begin, end) on target as of version + struct scan_range_value_t { + pg_shard_t from; + ScrubMap map; + + template + scan_range_value_t( + pg_shard_t from, + Map &&map) : from(from), map(std::forward(map)) {} + + auto to_pair() const { return std::make_pair(from, map); } + auto fmt_print_ctx(auto &ctx) const -> decltype(ctx.out()) { + return fmt::format_to(ctx.out(), "from: {}", from); + } + }; + VALUE_EVENT(scan_range_complete_t, scan_range_value_t); + virtual void scan_range( + pg_shard_t target, + eversion_t version, + bool deep, + const hobject_t &start, + const hobject_t &end) = 0; + + /// instructs implmentatino to scan [begin, end) and emit result to primary + SIMPLE_EVENT(generate_and_submit_chunk_result_complete_t); + virtual void generate_and_submit_chunk_result( + const hobject_t &begin, + const hobject_t &end, + bool deep) = 0; + + /// notifies implementation of chunk scrub results + virtual void emit_chunk_result( + const request_range_result_t &range, + chunk_result_t &&result) = 0; + + /// notifies implementation of full scrub results + virtual void emit_scrub_result( + bool deep, + object_stat_sum_t scrub_stats) = 0; + + /// get dpp instance for logging + virtual DoutPrefixProvider &get_dpp() = 0; +}; + +struct Crash; +struct Inactive; + +namespace events { +/// reset ScrubMachine +SIMPLE_EVENT(reset_t); + +/// start (deep) scrub +struct start_scrub_event_t { + bool deep = false; + + start_scrub_event_t(bool deep) : deep(deep) {} + + auto fmt_print_ctx(auto &ctx) const -> decltype(ctx.out()) { + return fmt::format_to(ctx.out(), "deep: {}", deep); + } +}; +VALUE_EVENT(start_scrub_t, start_scrub_event_t); + +/// notifies ScrubMachine about a write on oid resulting in delta_stats +struct op_stat_event_t { + hobject_t oid; + object_stat_sum_t delta_stats; + + op_stat_event_t( + hobject_t oid, + object_stat_sum_t delta_stats) : oid(oid), delta_stats(delta_stats) {} + + auto fmt_print_ctx(auto &ctx) const -> decltype(ctx.out()) { + return fmt::format_to(ctx.out(), "oid: {}", oid); + } +}; +VALUE_EVENT(op_stats_t, op_stat_event_t); + +/// Prepares statemachine for primary events +SIMPLE_EVENT(primary_activate_t); + +/// Prepares statemachine for replica events +SIMPLE_EVENT(replica_activate_t); + +/// Instructs replica to (deep) scrub [start, end) as of version version +struct replica_scan_event_t { + hobject_t start; + hobject_t end; + eversion_t version; + bool deep = false; + + replica_scan_event_t() = default; + + replica_scan_event_t( + hobject_t start, + hobject_t end, + eversion_t version, + bool deep) : start(start), end(end), version(version), deep(deep) {} + + auto fmt_print_ctx(auto &ctx) const -> decltype(ctx.out()) { + return fmt::format_to( + ctx.out(), "start: {}, end: {}, version: {}, deep: {}", + start, end, version, deep); + } +}; +VALUE_EVENT(replica_scan_t, replica_scan_event_t); + +} + + +/** + * ScrubMachine + * + * Manages orchestration of rados's distributed scrub process. + * + * There are two general ways in which ScrubMachine may need to release + * resources: + * - interval_change_t -- represents case where PG as a whole undergoes + * a distributed mapping change. Distributed resources are released + * implicitly as remote PG instances receive the new map. Local + * resources are still released by ScrubMachine via ScrubContext methods + * generally via state destructors + * - otherwise, ScrubMachine is responsible for notifying remote PG + * instances via the appropriate ScrubContext methods again generally + * from state destructors. + * + * TODO: interval_change_t will be added with remote reservations. + */ +class ScrubMachine + : public sc::state_machine { +public: + static constexpr std::string_view full_name = "ScrubMachine"; + + ScrubContext &context; + ScrubMachine(ScrubContext &context) : context(context) {} +}; + +/** + * ScrubState + * + * Template defining machinery/state common to all scrub state machine + * states. + */ +template +struct ScrubState : sc::state { + using sc_base = sc::state; + DoutPrefixProvider &dpp; + + /* machinery for populating a full_name member for each ScrubState with + * ScrubMachine/.../ParentState/ChildState full_name */ + template + struct concat; + + template + struct concat, CN, std::index_sequence> { + static constexpr size_t value_size = PN.size() + CN.size() + 1; + static constexpr const char value[value_size]{PN[PI]..., '/', CN[CI]...}; + }; + + template + struct join { + using conc = concat< + PN, std::make_index_sequence, + CN, std::make_index_sequence>; + static constexpr std::string_view value{ + conc::value, + conc::value_size + }; + }; + + /// Populated with ScrubMachine/.../Parent/Child for each state Child + static constexpr std::string_view full_name = + join::value; + + template + explicit ScrubState(C ctx) : sc_base(ctx), dpp(get_scrub_context().get_dpp()) { + LOG_PREFIX(ScrubState::ScrubState); + SUBDEBUGDPP(osd, "entering state {}", dpp, full_name); + } + + ~ScrubState() { + LOG_PREFIX(ScrubState::~ScrubState); + SUBDEBUGDPP(osd, "exiting state {}", dpp, full_name); + } + + auto &get_scrub_context() { + return sc_base::template context().context; + } +}; + +struct Crash : ScrubState { + static constexpr std::string_view state_name = "Crash"; + explicit Crash(my_context ctx) : ScrubState(ctx) { + ceph_abort("Crash state impossible"); + } + +}; + +struct PrimaryActive; +struct ReplicaActive; +struct Inactive : ScrubState { + static constexpr std::string_view state_name = "Inactive"; + explicit Inactive(my_context ctx) : ScrubState(ctx) {} + + using reactions = boost::mpl::list< + sc::transition, + sc::transition, + sc::custom_reaction, + sc::custom_reaction, + sc::custom_reaction, + sc::transition< boost::statechart::event_base, Crash > + >; + + sc::result react(const events::reset_t &) { + return discard_event(); + } + sc::result react(const events::start_scrub_t &) { + return discard_event(); + } + sc::result react(const events::op_stats_t &) { + return discard_event(); + } +}; + +struct AwaitScrub; +struct PrimaryActive : ScrubState { + static constexpr std::string_view state_name = "PrimaryActive"; + explicit PrimaryActive(my_context ctx) : ScrubState(ctx) {} + + bool local_reservation_held = false; + std::set remote_reservations_held; + + using reactions = boost::mpl::list< + sc::transition, + sc::custom_reaction, + sc::custom_reaction, + sc::transition< boost::statechart::event_base, Crash > + >; + + sc::result react(const events::start_scrub_t &event) { + return discard_event(); + } + + sc::result react(const events::op_stats_t &) { + return discard_event(); + } +}; + +namespace internal_events { +VALUE_EVENT(set_deep_t, bool); +} + +struct Scrubbing; +struct AwaitScrub : ScrubState { + static constexpr std::string_view state_name = "AwaitScrub"; + explicit AwaitScrub(my_context ctx) : ScrubState(ctx) {} + + using reactions = boost::mpl::list< + sc::custom_reaction + >; + + sc::result react(const events::start_scrub_t &event) { + post_event(internal_events::set_deep_t{event.value.deep}); + return transit(); + } +}; + +struct ChunkState; +struct Scrubbing : ScrubState { + static constexpr std::string_view state_name = "Scrubbing"; + explicit Scrubbing(my_context ctx) + : ScrubState(ctx), policy(get_scrub_context().get_policy()) {} + + + using reactions = boost::mpl::list< + sc::custom_reaction, + sc::custom_reaction + >; + + chunk_validation_policy_t policy; + + /// hobjects < current have been scrubbed + hobject_t current; + + /// true for deep scrub + bool deep = false; + + /// stats for objects < current, maintained via events::op_stats_t + object_stat_sum_t stats; + + void advance_current(const hobject_t &next) { + current = next; + } + + sc::result react(const internal_events::set_deep_t &event) { + deep = event.value; + get_scrub_context().notify_scrub_start(deep); + return discard_event(); + } + + void exit() { + get_scrub_context().notify_scrub_end(deep); + } + + sc::result react(const events::op_stats_t &event) { + if (event.value.oid < current) { + stats.add(event.value.delta_stats); + } + return discard_event(); + } +}; + +struct GetRange; +struct ChunkState : ScrubState { + static constexpr std::string_view state_name = "ChunkState"; + explicit ChunkState(my_context ctx) : ScrubState(ctx) {} + + /// Current chunk includes objects in [range_start, range_end) + boost::optional range; + + /// true once we have requested that the range be reserved + bool range_reserved = false; + + /// version of last update for the reserved chunk + eversion_t version; + + void exit() { + if (range_reserved) { + get_scrub_context().release_range(); + } + } +}; + +struct WaitUpdate; +struct GetRange : ScrubState { + static constexpr std::string_view state_name = "GetRange"; + explicit GetRange(my_context ctx) : ScrubState(ctx) { + get_scrub_context().request_range(context().current); + } + + using reactions = boost::mpl::list< + sc::custom_reaction + >; + + sc::result react(const ScrubContext::request_range_complete_t &event) { + context().range = event.value; + return transit(); + } +}; + +struct ScanRange; +struct WaitUpdate : ScrubState { + static constexpr std::string_view state_name = "WaitUpdate"; + explicit WaitUpdate(my_context ctx); + + using reactions = boost::mpl::list< + sc::custom_reaction + >; + + sc::result react(const ScrubContext::reserve_range_complete_t &e) { + context().version = e.value; + return transit(); + } +}; + +struct ScanRange : ScrubState { + static constexpr std::string_view state_name = "ScanRange"; + explicit ScanRange(my_context ctx); + + scrub_map_set_t maps; + unsigned waiting_on = 0; + + using reactions = boost::mpl::list< + sc::custom_reaction + >; + + sc::result react(const ScrubContext::scan_range_complete_t &); +}; + +struct ReplicaIdle; +struct ReplicaActive : + ScrubState { + static constexpr std::string_view state_name = "ReplicaActive"; + explicit ReplicaActive(my_context ctx) : ScrubState(ctx) {} + + using reactions = boost::mpl::list< + sc::transition, + sc::custom_reaction, + sc::custom_reaction, + sc::transition< boost::statechart::event_base, Crash > + >; + + sc::result react(const events::start_scrub_t &) { + return discard_event(); + } + + sc::result react(const events::op_stats_t &) { + return discard_event(); + } +}; + +struct ReplicaChunkState; +struct ReplicaIdle : ScrubState { + static constexpr std::string_view state_name = "ReplicaIdle"; + explicit ReplicaIdle(my_context ctx) : ScrubState(ctx) {} + + using reactions = boost::mpl::list< + sc::custom_reaction + >; + + sc::result react(const events::replica_scan_t &event) { + LOG_PREFIX(ScrubState::ReplicaIdle::react(events::replica_scan_t)); + SUBDEBUGDPP(osd, "event.value: {}", get_scrub_context().get_dpp(), event.value); + post_event(event); + return transit(); + } +}; + +struct ReplicaWaitUpdate; +struct ReplicaChunkState : ScrubState { + static constexpr std::string_view state_name = "ReplicaChunkState"; + explicit ReplicaChunkState(my_context ctx) : ScrubState(ctx) {} + + using reactions = boost::mpl::list< + sc::custom_reaction + >; + + events::replica_scan_event_t to_scan; + + sc::result react(const events::replica_scan_t &event) { + LOG_PREFIX(ScrubState::ReplicaWaitUpdate::react(events::replica_scan_t)); + SUBDEBUGDPP(osd, "event.value: {}", get_scrub_context().get_dpp(), event.value); + to_scan = event.value; + if (get_scrub_context().await_update(event.value.version)) { + post_event(ScrubContext::await_update_complete_t{}); + } + return discard_event(); + } +}; + +struct ReplicaScanChunk; +struct ReplicaWaitUpdate : ScrubState { + static constexpr std::string_view state_name = "ReplicaWaitUpdate"; + explicit ReplicaWaitUpdate(my_context ctx) : ScrubState(ctx) {} + + using reactions = boost::mpl::list< + sc::transition + >; +}; + +struct ReplicaScanChunk : ScrubState { + static constexpr std::string_view state_name = "ReplicaScanChunk"; + explicit ReplicaScanChunk(my_context ctx); + + using reactions = boost::mpl::list< + sc::transition + >; +}; + +#undef SIMPLE_EVENT +#undef VALUE_EVENT + +} diff --git a/src/crimson/osd/scrub/scrub_validator.cc b/src/crimson/osd/scrub/scrub_validator.cc new file mode 100644 index 00000000000..b7dcc46a35e --- /dev/null +++ b/src/crimson/osd/scrub/scrub_validator.cc @@ -0,0 +1,502 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include + +#include "osd/osd_types_fmt.h" + +#include "crimson/common/log.h" +#include "crimson/osd/scrub/scrub_validator.h" +#include "osd/ECUtil.h" + +SET_SUBSYS(osd); + +namespace crimson::osd::scrub { + +using object_set_t = std::set; +object_set_t get_object_set(const scrub_map_set_t &in) +{ + object_set_t ret; + for (const auto& [from, map] : in) { + std::transform(map.objects.begin(), map.objects.end(), + std::inserter(ret, ret.end()), + [](const auto& i) { return i.first; }); + } + return ret; +} + +struct shard_evaluation_t { + pg_shard_t source; + shard_info_wrapper shard_info; + + std::optional object_info; + std::optional snapset; + std::optional hinfo; + + size_t omap_keys{0}; + size_t omap_bytes{0}; + + bool has_errors() const { + return shard_info.has_errors(); + } + + bool is_primary() const { + return shard_info.primary; + } + + std::weak_ordering operator<=>(const shard_evaluation_t &rhs) const { + return std::make_tuple(!has_errors(), is_primary()) <=> + std::make_tuple(!rhs.has_errors(), rhs.is_primary()); + } +}; +shard_evaluation_t evaluate_object_shard( + const chunk_validation_policy_t &policy, + const hobject_t &oid, + pg_shard_t from, + const ScrubMap::object *maybe_obj) +{ + shard_evaluation_t ret; + ret.source = from; + if (from == policy.primary) { + ret.shard_info.primary = true; + } + if (!maybe_obj || maybe_obj->negative) { + // impossible since chunky scrub was introduced + ceph_assert(!maybe_obj->negative); + ret.shard_info.set_missing(); + return ret; + } + + auto &obj = *maybe_obj; + /* We are ignoring ScrubMap::object::large_omap_object*, object_omap_* is all the + * info we need */ + ret.omap_keys = obj.object_omap_keys; + ret.omap_bytes = obj.object_omap_bytes; + + ret.shard_info.set_object(obj); + + if (obj.ec_hash_mismatch) { + ret.shard_info.set_ec_hash_mismatch(); + } + + if (obj.ec_size_mismatch) { + ret.shard_info.set_ec_size_mismatch(); + } + + if (obj.read_error) { + ret.shard_info.set_read_error(); + } + + if (obj.stat_error) { + ret.shard_info.set_stat_error(); + } + + { + auto xiter = obj.attrs.find(OI_ATTR); + if (xiter == obj.attrs.end()) { + ret.shard_info.set_info_missing(); + } else { + bufferlist bl; + bl.push_back(xiter->second); + ret.object_info = object_info_t{}; + try { + auto bliter = bl.cbegin(); + ::decode(*(ret.object_info), bliter); + } catch (...) { + ret.shard_info.set_info_corrupted(); + ret.object_info = std::nullopt; + } + } + } + + ret.shard_info.size = obj.size; + if (ret.object_info && + obj.size != policy.logical_to_ondisk_size(ret.object_info->size)) { + ret.shard_info.set_size_mismatch_info(); + } + + if (oid.is_head()) { + auto xiter = obj.attrs.find(SS_ATTR); + if (xiter == obj.attrs.end()) { + ret.shard_info.set_snapset_missing(); + } else { + bufferlist bl; + bl.push_back(xiter->second); + ret.snapset = SnapSet{}; + try { + auto bliter = bl.cbegin(); + ::decode(*(ret.snapset), bliter); + } catch (...) { + ret.shard_info.set_snapset_corrupted(); + ret.snapset = std::nullopt; + } + } + } + + if (policy.is_ec()) { + auto xiter = obj.attrs.find(ECUtil::get_hinfo_key()); + if (xiter == obj.attrs.end()) { + ret.shard_info.set_hinfo_missing(); + } else { + bufferlist bl; + bl.push_back(xiter->second); + ret.hinfo = ECUtil::HashInfo{}; + try { + auto bliter = bl.cbegin(); + decode(*(ret.hinfo), bliter); + } catch (...) { + ret.shard_info.set_hinfo_corrupted(); + ret.hinfo = std::nullopt; + } + } + } + + if (ret.object_info) { + if (ret.shard_info.data_digest_present && + ret.object_info->is_data_digest() && + (ret.object_info->data_digest != ret.shard_info.data_digest)) { + ret.shard_info.set_data_digest_mismatch_info(); + } + if (ret.shard_info.omap_digest_present && + ret.object_info->is_omap_digest() && + (ret.object_info->omap_digest != ret.shard_info.omap_digest)) { + ret.shard_info.set_omap_digest_mismatch_info(); + } + } + + return ret; +} + +librados::obj_err_t compare_candidate_to_authoritative( + const chunk_validation_policy_t &policy, + const hobject_t &oid, + const shard_evaluation_t &auth, + const shard_evaluation_t &cand) +{ + using namespace librados; + obj_err_t ret; + + if (cand.shard_info.has_shard_missing()) { + return ret; + } + + const auto &auth_si = auth.shard_info; + const auto &cand_si = cand.shard_info; + + if (auth_si.data_digest != cand_si.data_digest) { + ret.errors |= obj_err_t::DATA_DIGEST_MISMATCH; + } + + if (auth_si.omap_digest != cand_si.omap_digest) { + ret.errors |= obj_err_t::OMAP_DIGEST_MISMATCH; + } + + { + auto aiter = auth_si.attrs.find(OI_ATTR); + ceph_assert(aiter != auth_si.attrs.end()); + + auto citer = cand_si.attrs.find(OI_ATTR); + if (citer == cand_si.attrs.end() || + !aiter->second.contents_equal(citer->second)) { + ret.errors |= obj_err_t::OBJECT_INFO_INCONSISTENCY; + } + } + + if (oid.is_head()) { + auto aiter = auth_si.attrs.find(SS_ATTR); + ceph_assert(aiter != auth_si.attrs.end()); + + auto citer = cand_si.attrs.find(SS_ATTR); + if (citer == cand_si.attrs.end() || + !aiter->second.contents_equal(citer->second)) { + ret.errors |= obj_err_t::SNAPSET_INCONSISTENCY; + } + } + + if (policy.is_ec()) { + auto aiter = auth_si.attrs.find(ECUtil::get_hinfo_key()); + ceph_assert(aiter != auth_si.attrs.end()); + + auto citer = cand_si.attrs.find(ECUtil::get_hinfo_key()); + if (citer == cand_si.attrs.end() || + !aiter->second.contents_equal(citer->second)) { + ret.errors |= obj_err_t::HINFO_INCONSISTENCY; + } + } + + if (auth_si.size != cand_si.size) { + ret.errors |= obj_err_t::SIZE_MISMATCH; + } + + auto is_sys_attr = [&policy](const auto &str) { + return str == OI_ATTR || str == SS_ATTR || + (policy.is_ec() && str == ECUtil::get_hinfo_key()); + }; + for (auto aiter = auth_si.attrs.begin(); aiter != auth_si.attrs.end(); ++aiter) { + if (is_sys_attr(aiter->first)) continue; + + auto citer = cand_si.attrs.find(aiter->first); + if (citer == cand_si.attrs.end()) { + ret.errors |= obj_err_t::ATTR_NAME_MISMATCH; + } else if (!aiter->second.contents_equal(citer->second)) { + ret.errors |= obj_err_t::ATTR_VALUE_MISMATCH; + } + } + if (std::any_of( + cand_si.attrs.begin(), cand_si.attrs.end(), + [&is_sys_attr, &auth_si](auto &p) { + return !is_sys_attr(p.first) && + auth_si.attrs.find(p.first) == auth_si.attrs.end(); + })) { + ret.errors |= obj_err_t::ATTR_NAME_MISMATCH; + } + + return ret; +} + +struct object_evaluation_t { + std::optional inconsistency; + std::optional object_info; + std::optional snapset; + + size_t omap_keys{0}; + size_t omap_bytes{0}; +}; +object_evaluation_t evaluate_object( + const chunk_validation_policy_t &policy, + const hobject_t &hoid, + const scrub_map_set_t &maps) +{ + ceph_assert(maps.size() > 0); + using evaluation_vec_t = std::vector; + evaluation_vec_t shards; + std::transform( + maps.begin(), maps.end(), + std::inserter(shards, shards.end()), + [&hoid, &policy](const auto &item) -> evaluation_vec_t::value_type { + const auto &[shard, scrub_map] = item; + auto miter = scrub_map.objects.find(hoid); + auto maybe_shard = miter == scrub_map.objects.end() ? + nullptr : &(miter->second); + return evaluate_object_shard(policy, hoid, shard, maybe_shard); + }); + + std::sort(shards.begin(), shards.end()); + + auto &auth_eval = shards.back(); + + object_evaluation_t ret; + inconsistent_obj_wrapper iow{hoid}; + if (!auth_eval.has_errors()) { + ret.object_info = auth_eval.object_info; + ret.omap_keys = auth_eval.omap_keys; + ret.omap_bytes = auth_eval.omap_bytes; + ret.snapset = auth_eval.snapset; + if (auth_eval.object_info->size > policy.max_object_size) { + iow.set_size_too_large(); + } + auth_eval.shard_info.selected_oi = true; + std::for_each( + shards.begin(), shards.end() - 1, + [&policy, &hoid, &auth_eval, &iow](auto &cand_eval) { + auto err = compare_candidate_to_authoritative( + policy, hoid, auth_eval, cand_eval); + iow.merge(err); + }); + } + + if (iow.errors || + std::any_of(shards.begin(), shards.end(), + [](auto &cand) { return cand.has_errors(); })) { + for (auto &eval : shards) { + iow.shards.emplace( + librados::osd_shard_t{eval.source.osd, eval.source.shard}, + eval.shard_info); + iow.union_shards.errors |= eval.shard_info.errors; + } + if (auth_eval.object_info) { + iow.version = auth_eval.object_info->version.version; + } + ret.inconsistency = iow; + } + return ret; +} + +using clone_meta_list_t = std::list>; +std::optional evaluate_snapset( + DoutPrefixProvider &dpp, + const hobject_t &hoid, + const std::optional &maybe_snapset, + const clone_meta_list_t &clones) +{ + LOG_PREFIX(evaluate_snapset); + /* inconsistent_snapset_t has several error codes that seem to pertain to + * specific objects rather than to the snapset specifically. I'm choosing + * to ignore those for now */ + inconsistent_snapset_wrapper ret{hoid}; + if (!maybe_snapset) { + ret.set_headless(); + return ret; + } + const auto &snapset = *maybe_snapset; + + auto clone_iter = clones.begin(); + for (auto ss_clone_id : snapset.clones) { + for (; clone_iter != clones.end() && + clone_iter->first.snap < ss_clone_id; + ++clone_iter) { + ret.set_clone(clone_iter->first.snap); + } + + if (clone_iter != clones.end() && + clone_iter->first.snap == ss_clone_id) { + auto ss_clone_size_iter = snapset.clone_size.find(ss_clone_id); + if (ss_clone_size_iter == snapset.clone_size.end() || + ss_clone_size_iter->second != clone_iter->second.size) { + ret.set_size_mismatch(); + } + ++clone_iter; + } else { + ret.set_clone_missing(ss_clone_id); + } + } + + for (; clone_iter != clones.end(); ++clone_iter) { + ret.set_clone(clone_iter->first.snap); + } + + if (ret.errors) { + DEBUGDPP( + "snapset {}, clones {}", + dpp, snapset, clones); + return ret; + } else { + return std::nullopt; + } +} + +void add_object_to_stats( + const chunk_validation_policy_t &policy, + const object_evaluation_t &eval, + object_stat_sum_t *out) +{ + auto &ss = eval.snapset; + if (!eval.object_info) { + return; + } + auto &oi = *eval.object_info; + ceph_assert(out); + out->num_objects++; + if (ss) { + out->num_bytes += oi.size; + for (auto clone : ss->clones) { + out->num_bytes += ss->get_clone_bytes(clone); + out->num_object_clones++; + } + if (oi.is_whiteout()) { + out->num_whiteouts++; + } + } + if (oi.is_dirty()) { + out->num_objects_dirty++; + } + if (oi.is_cache_pinned()) { + out->num_objects_pinned++; + } + if (oi.has_manifest()) { + out->num_objects_manifest++; + } + + if (eval.omap_keys > 0) { + out->num_objects_omap++; + } + out->num_omap_keys += eval.omap_keys; + out->num_omap_bytes += eval.omap_bytes; + + if (oi.soid.nspace == policy.hitset_namespace) { + out->num_objects_hit_set_archive++; + out->num_bytes_hit_set_archive += oi.size; + } + + if (eval.omap_keys > policy.omap_key_limit || + eval.omap_bytes > policy.omap_bytes_limit) { + out->num_large_omap_objects++; + } +} + +chunk_result_t validate_chunk( + DoutPrefixProvider &dpp, + const chunk_validation_policy_t &policy, const scrub_map_set_t &in) +{ + chunk_result_t ret; + + const std::set object_set = get_object_set(in); + + std::list> heads; + clone_meta_list_t clones; + for (const auto &oid: object_set) { + object_evaluation_t eval = evaluate_object(policy, oid, in); + add_object_to_stats(policy, eval, &ret.stats); + if (eval.inconsistency) { + ret.object_errors.push_back(*eval.inconsistency); + } + if (oid.is_head()) { + /* We're only going to consider the head object as "existing" if + * evaluate_object was able to find a sensible, authoritative copy + * complete with snapset */ + if (eval.snapset) { + heads.emplace_back(oid, *eval.snapset); + } + } else { + /* We're only going to consider the clone object as "existing" if + * evaluate_object was able to find a sensible, authoritative copy + * complete with an object_info */ + if (eval.object_info) { + clones.emplace_back(oid, *eval.object_info); + } + } + } + + const hobject_t max_oid = hobject_t::get_max(); + while (heads.size() || clones.size()) { + const hobject_t &next_head = heads.size() ? heads.front().first : max_oid; + const hobject_t &next_clone = clones.size() ? clones.front().first : max_oid; + hobject_t head_to_process = std::min(next_head, next_clone).get_head(); + + clone_meta_list_t clones_to_process; + auto clone_iter = clones.begin(); + while (clone_iter != clones.end() && clone_iter->first < head_to_process) + ++clone_iter; + clones_to_process.splice( + clones_to_process.end(), clones, clones.begin(), clone_iter); + + const auto head_meta = [&]() -> std::optional { + if (head_to_process == next_head) { + auto ret = std::move(heads.front().second); + heads.pop_front(); + return ret; + } else { + return std::nullopt; + } + }(); + + if (auto result = evaluate_snapset( + dpp, head_to_process, head_meta, clones_to_process); result) { + ret.snapset_errors.push_back(*result); + } + } + + for (const auto &i: ret.object_errors) { + ret.stats.num_shallow_scrub_errors += + (i.has_shallow_errors() || i.union_shards.has_shallow_errors()); + ret.stats.num_deep_scrub_errors += + (i.has_deep_errors() || i.union_shards.has_deep_errors()); + } + ret.stats.num_shallow_scrub_errors += ret.snapset_errors.size(); + ret.stats.num_scrub_errors = ret.stats.num_shallow_scrub_errors + + ret.stats.num_deep_scrub_errors; + + return ret; +} + +} diff --git a/src/crimson/osd/scrub/scrub_validator.h b/src/crimson/osd/scrub/scrub_validator.h new file mode 100644 index 00000000000..32f5933d0db --- /dev/null +++ b/src/crimson/osd/scrub/scrub_validator.h @@ -0,0 +1,180 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include +#include + +#include "common/config_proxy.h" +#include "common/scrub_types.h" +#include "crimson/common/log.h" +#include "osd/ECUtil.h" +#include "osd/osd_types.h" + +namespace crimson::osd::scrub { + +struct chunk_validation_policy_t { + pg_shard_t primary; + std::optional stripe_info; + + // osd_max_object_size + size_t max_object_size; + + // osd_hit_set_namespace + std::string hitset_namespace; + + // osd_deep_scrub_large_omap_object_key_threshold + // osd_deep_scrub_large_omap_object_value_sum_threshold + uint64_t omap_key_limit; + size_t omap_bytes_limit; + + + bool is_ec() const { + return !!stripe_info; + } + + size_t logical_to_ondisk_size(size_t size) const { + return stripe_info ? stripe_info->logical_to_next_chunk_offset(size) : size; + } +}; + +using scrub_map_set_t = std::map; + +struct chunk_result_t { + /* Scrub interacts with stats in two ways: + * 1. scrub accumulates a subset of object_stat_sum_t members to + * to ultimately compare to the object_stat_sum_t value maintained + * by the OSD. These members will be referred to as + * *scrub_checked_stats*. + * See iterate_scrub_checked_stats() for the relevant members. + * 2. scrub also updates some members that can't be maintained online + * (like num_omap_*, num_large_omap_objects) or that pertain + * specifically to scrub (like num_shallow_scrub_errors). + * Let these by referred to as *scrub_maintained_stats*. + * See iterate_scrub_maintained_stats() for the relevant members. + * + * The following stats member contains both, but the two sets are + * disjoint and treated seperately. + */ + object_stat_sum_t stats; + + // detected errors + std::vector snapset_errors; + std::vector object_errors; + + bool has_errors() const { + return !snapset_errors.empty() || !object_errors.empty(); + } +}; + +/** + * validate_chunk + * + * Compares shard chunks and based on policy and returns a chunk_result_t + * containing the results. See chunk_result_t for details. + */ +chunk_result_t validate_chunk( + DoutPrefixProvider &dpp, + const chunk_validation_policy_t &policy, const scrub_map_set_t &in); + +/** + * iterate_scrub_checked_stats + * + * For each scrub_checked_stat member of object_stat_sum_t, invokes + * op with three arguments: + * - name of member (string_view) + * - pointer to member (T object_stat_sum_t::*) + * - function to corresponding pg_stat_t invalid member + * (bool func(const pg_stat_t &)) + * + * Should be used to perform operations on all scrub_checked_stat members + * such as checking the accumlated scrub stats against the maintained + * pg stats. + */ +template +void foreach_scrub_checked_stat(Func &&op) { + using namespace std::string_view_literals; + op("num_objects"sv, + &object_stat_sum_t::num_objects, + [](const pg_stat_t &in) { return false; }); + op("num_bytes"sv, + &object_stat_sum_t::num_bytes, + [](const pg_stat_t &in) { return false; }); + op("num_object_clones"sv, + &object_stat_sum_t::num_object_clones, + [](const pg_stat_t &in) { return false; }); + op("num_whiteouts"sv, + &object_stat_sum_t::num_whiteouts, + [](const pg_stat_t &in) { return false; }); + op("num_objects_dirty"sv, + &object_stat_sum_t::num_objects_dirty, + [](const pg_stat_t &in) { return in.dirty_stats_invalid; }); + op("num_objects_omap"sv, + &object_stat_sum_t::num_objects_omap, + [](const pg_stat_t &in) { return in.omap_stats_invalid; }); + op("num_objects_pinned"sv, + &object_stat_sum_t::num_objects_pinned, + [](const pg_stat_t &in) { return in.pin_stats_invalid; }); + op("num_objects_hit_set_archive"sv, + &object_stat_sum_t::num_objects_hit_set_archive, + [](const pg_stat_t &in) { return in.hitset_stats_invalid; }); + op("num_bytes_hit_set_archive"sv, + &object_stat_sum_t::num_bytes_hit_set_archive, + [](const pg_stat_t &in) { return in.hitset_bytes_stats_invalid; }); + op("num_objects_manifest"sv, + &object_stat_sum_t::num_objects_manifest, + [](const pg_stat_t &in) { return in.manifest_stats_invalid; }); +} + +/** + * iterate_scrub_maintained_stats + * + * For each scrub_maintained_stat member of object_stat_sum_t, invokes + * op with three arguments: + * - name of member (string_view) + * - pointer to member (T object_stat_sum_t::*) + * - skip for shallow (bool) + * + * Should be used to perform operations on all scrub_maintained_stat members + * such as updating the pg maintained instance once scrub is complete. + */ +template +void foreach_scrub_maintained_stat(Func &&op) { + using namespace std::string_view_literals; + op("num_scrub_errors"sv, &object_stat_sum_t::num_scrub_errors, false); + op("num_shallow_scrub_errors"sv, + &object_stat_sum_t::num_shallow_scrub_errors, + false); + op("num_deep_scrub_errors"sv, &object_stat_sum_t::num_deep_scrub_errors, true); + op("num_omap_bytes"sv, &object_stat_sum_t::num_omap_bytes, true); + op("num_omap_keys"sv, &object_stat_sum_t::num_omap_keys, true); + op("num_large_omap_objects"sv, + &object_stat_sum_t::num_large_omap_objects, + true); +} + +} + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } + + template + auto format( + const crimson::osd::scrub::chunk_result_t &result, FormatContext& ctx) const + { + return fmt::format_to( + ctx.out(), + "chunk_result_t(" + "num_scrub_errors: {}, " + "num_deep_scrub_errors: {}, " + "snapset_errors: [{}], " + "object_errors: [{}])", + result.stats.num_scrub_errors, + result.stats.num_deep_scrub_errors, + result.snapset_errors, + result.object_errors + ); + } +}; diff --git a/src/test/crimson/CMakeLists.txt b/src/test/crimson/CMakeLists.txt index b1851cca2c7..c943ff88546 100644 --- a/src/test/crimson/CMakeLists.txt +++ b/src/test/crimson/CMakeLists.txt @@ -103,3 +103,13 @@ target_link_libraries( crimson::gtest) add_ceph_unittest(unittest-seastar-errorator --memory 256M --smp 1) + +add_executable(unittest-crimson-scrub + test_crimson_scrub.cc + ${PROJECT_SOURCE_DIR}/src/crimson/osd/scrub/scrub_machine.cc + ${PROJECT_SOURCE_DIR}/src/crimson/osd/scrub/scrub_validator.cc + ${PROJECT_SOURCE_DIR}/src/osd/ECUtil.cc) +target_link_libraries( + unittest-crimson-scrub + crimson-common + crimson::gtest) diff --git a/src/test/crimson/test_crimson_scrub.cc b/src/test/crimson/test_crimson_scrub.cc new file mode 100644 index 00000000000..65b1f315253 --- /dev/null +++ b/src/test/crimson/test_crimson_scrub.cc @@ -0,0 +1,1347 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include + +#include + +#include + +#include "test/crimson/gtest_seastar.h" + +#include "include/rados/rados_types.hpp" +#include "common/scrub_types.h" +#include "crimson/common/interruptible_future.h" +#include "crimson/osd/scrub/scrub_machine.h" +#include "crimson/osd/scrub/scrub_validator.h" + +#include "osd/osd_types_fmt.h" + +constexpr static size_t TEST_MAX_OBJECT_SIZE = 128<<20; +constexpr static std::string_view TEST_INTERNAL_NAMESPACE = ".internal"; +constexpr static uint64_t TEST_OMAP_KEY_LIMIT = 200000; +constexpr static size_t TEST_OMAP_BYTES_LIMIT = 1<<30; + +void so_set_attr_len(ScrubMap::object &obj, const std::string &name, size_t len) +{ + obj.attrs[name] = buffer::ptr(len); +} + +void so_set_attr(ScrubMap::object &obj, const std::string &name, bufferlist bl) +{ + bl.rebuild(); + obj.attrs[name] = bl.front(); +} + +std::optional so_get_attr( + ScrubMap::object &obj, const std::string &name) +{ + if (obj.attrs.count(name)) { + bufferlist bl; + bl.push_back(obj.attrs[name]); + return bl; + } else { + return std::nullopt; + } +} + +template +void so_set_attr_type( + ScrubMap::object &obj, const std::string &name, + const std::optional &v) +{ + if (v) { + bufferlist bl; + encode(*v, bl, CEPH_FEATURES_ALL); + so_set_attr(obj, name, std::move(bl)); + } else { + obj.attrs.erase(name); + } +} + +template +std::optional so_get_attr_type(ScrubMap::object &obj, const std::string &name) +{ + auto maybe_bl = so_get_attr(obj, name); + if (!maybe_bl) { + return std::nullopt; + } + auto bl = std::move(*maybe_bl); + try { + T ret; + auto bliter = bl.cbegin(); + decode(ret, bliter); + return ret; + } catch (...) { + return std::nullopt; + } +} + +void so_set_oi(ScrubMap::object &obj, const std::optional &oi) +{ + return so_set_attr_type(obj, OI_ATTR, oi); +} + +std::optional so_get_oi(ScrubMap::object &obj) +{ + return so_get_attr_type(obj, OI_ATTR); +} + +template +void so_mut_oi(ScrubMap::object &obj, F &&f) { + so_set_oi(obj, std::invoke(std::forward(f), so_get_oi(obj))); +} + +void so_set_ss(ScrubMap::object &obj, const std::optional &ss) +{ + return so_set_attr_type(obj, SS_ATTR, ss); +} + +std::optional so_get_ss(ScrubMap::object &obj) +{ + return so_get_attr_type(obj, SS_ATTR); +} + +template +void so_mut_ss(ScrubMap::object &obj, F &&f) { + so_set_ss(obj, std::invoke(std::forward(f), so_get_ss(obj))); +} + +void so_set_hinfo( + ScrubMap::object &obj, const std::optional &hinfo) +{ + return so_set_attr_type(obj, ECUtil::get_hinfo_key(), hinfo); +} + +std::optional so_get_hinfo(ScrubMap::object &obj) +{ + return so_get_attr_type(obj, ECUtil::get_hinfo_key()); +} + +template +void so_mut_hinfo(ScrubMap::object &obj, F &&f) { + auto maybe_hinfo = so_get_hinfo(obj); + auto new_maybe_hinfo = std::invoke(std::forward(f), std::move(maybe_hinfo)); + so_set_hinfo(obj, new_maybe_hinfo); +} + +/** + * so_builder_t + * + * Utility class for constructing test objects. + */ +struct so_builder_t { + ScrubMap::object so; + + void set_defaults() { + so.size = 0; + so_mut_oi(so, [](auto maybe_oi) { + if (maybe_oi) { + maybe_oi->size = 0; + } + return maybe_oi; + }); + } + + static hobject_t make_hoid(std::string name, snapid_t cloneid=CEPH_NOSNAP) { + auto oid = object_t(name); + return hobject_t{ + oid, + "", + cloneid, + static_cast(std::hash()(oid)), + 1, + "" + }; + } + + static so_builder_t make_head(std::string name) { + auto hoid = make_hoid(name); + so_builder_t ret; + so_set_oi(ret.so, object_info_t{hoid}); + so_set_ss(ret.so, SnapSet{}); + ret.set_defaults(); + return ret; + } + + static so_builder_t make_clone( + std::string name, + snapid_t cloneid = 4 + ) { + auto hoid = make_hoid(name, cloneid); + so_builder_t ret; + so_set_oi(ret.so, object_info_t{hoid}); + ret.set_defaults(); + return ret; + } + + static so_builder_t make_ec_head(std::string name) { + auto ret = make_head(name); + so_set_hinfo(ret.so, ECUtil::HashInfo{}); + return ret; + } + + static so_builder_t make_ec_clone( + std::string name, + snapid_t cloneid = 4 + ) { + auto ret = make_clone(name, cloneid); + so_set_hinfo(ret.so, ECUtil::HashInfo{}); + return ret; + } + + so_builder_t &set_size( + size_t size, + const std::optional stripe_info = std::nullopt) { + if (stripe_info) { + so.size = stripe_info->logical_to_next_chunk_offset(size); + } else { + so.size = size; + } + + so_mut_oi(so, [size](auto maybe_oi) { + if (maybe_oi) { + maybe_oi->size = size; + } + return maybe_oi; + }); + so_mut_hinfo(so, [size, &stripe_info](auto maybe_hinfo) { + if (maybe_hinfo) { + ceph_assert(stripe_info); + maybe_hinfo->set_total_chunk_size_clear_hash( + stripe_info->logical_to_next_chunk_offset(size)); + } + return maybe_hinfo; + }); + return *this; + } + + so_builder_t &add_attr(const std::string &name, size_t len) { + so_set_attr_len(so, name, len); + return *this; + } + + ScrubMap::object get() const { + return so; + } +}; + +/** + * test_obj_t + * + * test param combining an so_builder_t with human readable description with + * a stripe_info. + */ +struct test_obj_t : so_builder_t { + std::optional stripe_info; + std::string desc; + hobject_t hoid; + + test_obj_t( + so_builder_t _builder, + std::optional _stripe_info, + std::string _desc, + hobject_t _hoid) : + so_builder_t(std::move(_builder)), + stripe_info(std::move(_stripe_info)), + desc(std::move(_desc)), + hoid(std::move(_hoid)) { + ceph_assert(!desc.empty()); + } + + static test_obj_t make( + const std::string &desc, + std::optional stripe_info, + so_builder_t builder) { + hobject_t hoid = so_get_oi(builder.so)->soid; + return test_obj_t{ + std::move(builder), + stripe_info, + desc, + std::move(hoid)}; + } + + template + static test_obj_t make_head(const std::string &desc, Args&&... args) { + return make( + desc, + std::nullopt, + so_builder_t::make_head(std::forward(args)...)); + } + + template + static test_obj_t make_clone(const std::string &desc, Args&&... args) { + return make( + desc, + std::nullopt, + so_builder_t::make_clone(std::forward(args)...)); + } + + template + static test_obj_t make_ec_head(const std::string &desc, Args&&... args) { + return make( + desc, + ECUtil::stripe_info_t{4, 1<<20}, + so_builder_t::make_ec_head(std::forward(args)...)); + } + + template + static test_obj_t make_ec_clone(const std::string &desc, Args&&... args) { + return make( + desc, + ECUtil::stripe_info_t{4, 1<<20}, + so_builder_t::make_ec_clone(std::forward(args)...)); + } + + test_obj_t &set_size( + size_t size) { + so_builder_t::set_size(size, stripe_info); + return *this; + } + + test_obj_t &add_attr(const std::string &name, size_t len) { + so_builder_t::add_attr(name, len); + return *this; + } + + ScrubMap::object get() const { + return so_builder_t::get(); + } +}; + +/** + * Interface for a test case on a single object. + */ +struct SingleErrorTestCase { + /// Describes limitations on test preconditions + enum class restriction_t { + NONE, /// No limitations + REPLICA_ONLY, /// Only works if injected on replica + EC_ONLY, /// Only valid for ec objects + HEAD_ONLY /// Only valid for head objects + }; + + /// returns human-readable string describing the test for debugging + virtual std::string_view get_description() const = 0; + + /// returns test_obj_t with error injected + virtual test_obj_t adjust_base_object(test_obj_t ret) const { + return ret; + } + + /// returns test_obj_t with error injected + virtual test_obj_t inject_error(test_obj_t) const = 0; + + /// returns expected shard error + virtual librados::err_t get_shard_error_sig() const = 0; + + /// returns expected object error + virtual librados::obj_err_t get_object_error_sig() const = 0; + + /// returns true if test should be run with passed restriction + virtual bool valid_for_restriction(restriction_t restriction) const = 0; + + virtual ~SingleErrorTestCase() = default; +}; + +/// Utility template for implementing SimpleErrorTestCase +template +struct SingleErrorTestCaseT : SingleErrorTestCase { + /// Defaults for REQUIRE_EC and REQUIRES_HEAD + constexpr static bool REQUIRES_EC = false; + constexpr static bool REQUIRES_HEAD = false; + + /* Every implementor must define: + constexpr static librados::err_t shard_error_sig{ + }; + constexpr static librados::obj_err_t object_error_sig{ + }; + */ + + librados::err_t get_shard_error_sig() const final { + return T::shard_error_sig; + } + librados::obj_err_t get_object_error_sig() const final { + return T::object_error_sig; + } + + constexpr static bool requires_ec() { + return T::REQUIRES_EC; + } + constexpr static bool requires_head() { + return T::REQUIRES_HEAD; + } + constexpr static bool requires_replica() { + /* If there are no shard_errors, we'll take primary to be authoritative. */ + return T::shard_error_sig.errors == 0; + } + + bool valid_for_restriction(restriction_t restriction) const final { + // There aren't currently any tests with two restrictions, if this + // changes, the suite instantiations will need to change as well. + static_assert( + (requires_ec() + requires_head() + requires_replica()) <= 1); + return [] { + if constexpr (requires_replica()) { + return restriction_t::REPLICA_ONLY; + } else if constexpr (requires_head()) { + return restriction_t::HEAD_ONLY; + } else if constexpr (requires_ec()) { + return restriction_t::EC_ONLY; + } else { + return restriction_t::NONE; + } + }() == restriction; + } + virtual ~SingleErrorTestCaseT() = default; +}; + +/* The following classes exercise each possible error code detected + * by evaluate_object_shard and compare_candidate_to_authoritative + * in crimson/osd/scrub/scrub_validator.* + * + * Note, any newly added cases must also be added to the test_cases + * array below. + */ + +struct ECHashMismatch : SingleErrorTestCaseT { + constexpr static librados::err_t shard_error_sig{ + librados::err_t::SHARD_EC_HASH_MISMATCH + }; + constexpr static librados::obj_err_t object_error_sig{ + }; + + std::string_view get_description() const { + return "ECHashMismatch"; + }; + test_obj_t inject_error(test_obj_t obj) const { + obj.so.ec_hash_mismatch = true; + return obj; + } +}; + +struct ECSizeMismatch : SingleErrorTestCaseT { + constexpr static librados::err_t shard_error_sig{ + librados::err_t::SHARD_EC_SIZE_MISMATCH + }; + constexpr static librados::obj_err_t object_error_sig{ + }; + + std::string_view get_description() const { + return "ECSizeMismatch"; + }; + test_obj_t inject_error(test_obj_t obj) const { + obj.so.ec_size_mismatch = true; + return obj; + } +}; + +struct ReadError : SingleErrorTestCaseT { + constexpr static librados::err_t shard_error_sig{ + librados::err_t::SHARD_READ_ERR + }; + constexpr static librados::obj_err_t object_error_sig{}; + + std::string_view get_description() const { + return "ReadError"; + }; + test_obj_t inject_error(test_obj_t obj) const { + obj.so.read_error = true; + return obj; + } +}; + +struct StatError : SingleErrorTestCaseT { + constexpr static librados::err_t shard_error_sig{ + librados::err_t::SHARD_STAT_ERR + }; + constexpr static librados::obj_err_t object_error_sig{ + }; + + std::string_view get_description() const { + return "StatError"; + }; + test_obj_t inject_error(test_obj_t obj) const { + obj.so.stat_error = true; + return obj; + } +}; + +struct MissingOI : SingleErrorTestCaseT { + constexpr static librados::err_t shard_error_sig{ + librados::err_t::INFO_MISSING + }; + constexpr static librados::obj_err_t object_error_sig{ + librados::obj_err_t::OBJECT_INFO_INCONSISTENCY + }; + + std::string_view get_description() const { + return "MissingOI"; + }; + test_obj_t inject_error(test_obj_t obj) const { + so_mut_oi(obj.so, [](auto) { return std::nullopt; }); + return obj; + } +}; + +struct CorruptOI: SingleErrorTestCaseT { + constexpr static librados::err_t shard_error_sig{ + librados::err_t::INFO_CORRUPTED + }; + constexpr static librados::obj_err_t object_error_sig{ + librados::obj_err_t::OBJECT_INFO_INCONSISTENCY + }; + + std::string_view get_description() const { + return "CorruptOI"; + }; + test_obj_t inject_error(test_obj_t obj) const { + so_set_attr_len(obj.so, OI_ATTR, 10); + return obj; + } +}; + +struct CorruptOndiskSize : SingleErrorTestCaseT { + constexpr static librados::err_t shard_error_sig{ + librados::err_t::SIZE_MISMATCH_INFO + }; + constexpr static librados::obj_err_t object_error_sig{ + librados::obj_err_t::SIZE_MISMATCH + }; + + std::string_view get_description() const { + return "CorruptOndiskSize"; + }; + test_obj_t inject_error(test_obj_t obj) const { + obj.so.size += 2; + return obj; + } +}; + +struct MissingSS : SingleErrorTestCaseT { + constexpr static librados::err_t shard_error_sig{ + librados::err_t::SNAPSET_MISSING + }; + constexpr static librados::obj_err_t object_error_sig{ + librados::obj_err_t::SNAPSET_INCONSISTENCY + }; + constexpr static bool REQUIRES_HEAD = true; + + std::string_view get_description() const { + return "MissingSS"; + }; + test_obj_t inject_error(test_obj_t obj) const { + ceph_assert(obj.hoid.is_head()); + so_mut_ss(obj.so, [](auto) { return std::nullopt; }); + return obj; + } +}; + +struct CorruptSS : SingleErrorTestCaseT { + constexpr static librados::err_t shard_error_sig{ + librados::err_t::SNAPSET_CORRUPTED + }; + constexpr static librados::obj_err_t object_error_sig{ + librados::obj_err_t::SNAPSET_INCONSISTENCY + }; + constexpr static bool REQUIRES_HEAD = true; + + std::string_view get_description() const { + return "CorruptSS"; + }; + test_obj_t inject_error(test_obj_t obj) const { + ceph_assert(obj.hoid.is_head()); + so_set_attr_len(obj.so, SS_ATTR, 10); + return obj; + } +}; + +struct MissingHinfo : SingleErrorTestCaseT { + constexpr static librados::err_t shard_error_sig{ + librados::err_t::HINFO_MISSING + }; + constexpr static librados::obj_err_t object_error_sig{ + librados::obj_err_t::HINFO_INCONSISTENCY + }; + constexpr static bool REQUIRES_EC = true; + + std::string_view get_description() const { + return "MissingHinfo"; + }; + test_obj_t inject_error(test_obj_t obj) const { + ceph_assert(obj.stripe_info); + so_mut_hinfo(obj.so, [](auto) { return std::nullopt; }); + return obj; + } +}; + +struct CorruptHinfo : SingleErrorTestCaseT { + constexpr static librados::err_t shard_error_sig{ + librados::err_t::HINFO_CORRUPTED + }; + constexpr static librados::obj_err_t object_error_sig{ + librados::obj_err_t::HINFO_INCONSISTENCY + }; + constexpr static bool REQUIRES_EC = true; + + std::string_view get_description() const { + return "CorruptHinfo"; + }; + test_obj_t inject_error(test_obj_t obj) const { + ceph_assert(obj.stripe_info); + so_set_attr_len(obj.so, ECUtil::get_hinfo_key(), 10); + return obj; + } +}; + +struct DataDigestMismatch : SingleErrorTestCaseT { + constexpr static librados::err_t shard_error_sig{ + librados::err_t::DATA_DIGEST_MISMATCH_INFO + }; + constexpr static librados::obj_err_t object_error_sig{ + librados::obj_err_t::DATA_DIGEST_MISMATCH + }; + + std::string_view get_description() const { + return "DataDigestMismatch"; + }; + test_obj_t adjust_base_object(test_obj_t obj) const { + so_mut_oi(obj.so, [](auto maybe_oi) { + ceph_assert(maybe_oi); + maybe_oi->set_data_digest(1); + return maybe_oi; + }); + obj.so.digest_present = true; + obj.so.digest = 1; + return obj; + } + test_obj_t inject_error(test_obj_t obj) const { + ceph_assert(so_get_oi(obj.so)->is_data_digest()); + obj.so.digest = 2; + return obj; + } +}; + +struct OmapDigestMismatch : SingleErrorTestCaseT { + constexpr static librados::err_t shard_error_sig{ + librados::err_t::OMAP_DIGEST_MISMATCH_INFO + }; + constexpr static librados::obj_err_t object_error_sig{ + librados::obj_err_t::OMAP_DIGEST_MISMATCH + }; + + std::string_view get_description() const { + return "OmapDigestMismatch"; + }; + test_obj_t adjust_base_object(test_obj_t obj) const { + so_mut_oi(obj.so, [](auto maybe_oi) { + ceph_assert(maybe_oi); + maybe_oi->set_omap_digest(1); + return maybe_oi; + }); + obj.so.omap_digest_present = true; + obj.so.omap_digest = 1; + return obj; + } + test_obj_t inject_error(test_obj_t obj) const { + ceph_assert(so_get_oi(obj.so)->is_omap_digest()); + obj.so.omap_digest = 2; + return obj; + } +}; + +struct ExtraAttribute : SingleErrorTestCaseT { + constexpr static librados::err_t shard_error_sig{}; + constexpr static librados::obj_err_t object_error_sig{ + librados::obj_err_t::ATTR_NAME_MISMATCH + }; + + std::string_view get_description() const { + return "ExtraAttribute"; + }; + test_obj_t inject_error(test_obj_t obj) const { + so_set_attr_len(obj.so, "attr_added_erroneously", 10); + return obj; + } +}; + +struct MissingAttribute : SingleErrorTestCaseT { + constexpr static librados::err_t shard_error_sig{}; + constexpr static librados::obj_err_t object_error_sig{ + librados::obj_err_t::ATTR_NAME_MISMATCH + }; + + std::string_view get_description() const { + return "MissingAttribute"; + }; + test_obj_t adjust_base_object(test_obj_t obj) const { + so_set_attr_len(obj.so, "attr_to_be_missing", 10); + return obj; + } + test_obj_t inject_error(test_obj_t obj) const { + obj.so.attrs.erase("attr_to_be_missing"); + return obj; + } +}; + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } + + template + auto format(const auto &test_case, FormatContext& ctx) const + { + return fmt::format_to( + ctx.out(), "{}", + test_case.get_description()); + } +}; + +std::unique_ptr test_cases[] = { + std::make_unique(), + std::make_unique(), + std::make_unique(), + std::make_unique(), + std::make_unique(), + std::make_unique(), + std::make_unique(), + std::make_unique(), + std::make_unique(), + std::make_unique(), + std::make_unique(), + std::make_unique(), + std::make_unique(), + std::make_unique(), + std::make_unique() +}; +const SingleErrorTestCase *to_ptr( + const std::unique_ptr &tc) { + return tc.get(); +} +// iterator over the above set as pointers +using test_case_ptr_iter_t = boost::transform_iterator< + std::function, decltype(std::begin(test_cases))>; +template +struct test_case_filter_t { + bool operator()(const SingleErrorTestCase *tc) const { + return tc->valid_for_restriction(restriction); + } +}; +template +// iterator over the above set filtered by restriction +using test_case_filter_iter_t = boost::filter_iterator< + test_case_filter_t, + test_case_ptr_iter_t>; +template +// begin and end, used below to instantiate test suites +auto test_cases_begin() { + return test_case_filter_iter_t( + test_case_filter_t(), + test_case_ptr_iter_t(std::begin(test_cases), to_ptr), + test_case_ptr_iter_t(std::end(test_cases), to_ptr)); +} +template +auto test_cases_end() { + return test_case_filter_iter_t( + test_case_filter_t(), + test_case_ptr_iter_t(std::end(test_cases), to_ptr), + test_case_ptr_iter_t(std::end(test_cases), to_ptr)); +} + +/// tuple defining each generated test case +using single_error_test_param_t = std::tuple< + test_obj_t, /// initial test object + bool, /// inject on primary? + const SingleErrorTestCase* /// test case + >; +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } + + template + auto format(const auto ¶m, FormatContext& ctx) const + { + const auto &[obj, is_primary, test_case] = param; + return fmt::format_to( + ctx.out(), "{}{}{}", + obj.desc, + is_primary ? "Primary" : "Replica", + test_case->get_description()); + } +}; +std::ostream &operator<<(std::ostream &out, const single_error_test_param_t &p) +{ + return out << fmt::format("{}", p); +} + +class TestSingleError : + public testing::TestWithParam { +}; + +/** + * compare_error_signatures + * + * Generic helper for comparing err_t, obj_err_t, and + * inconsistent_snapset_t with descriptive output. + */ +auto compare_error_signatures(const auto &lh, const auto &rh) +{ + if (lh.errors == rh.errors) { + return ::testing::AssertionSuccess() << fmt::format( + "Signature match: {}", lh); + } else { + return ::testing::AssertionFailure() << fmt::format( + "Signature mismatch: {} should be {}", + lh, rh); + } +} + +TEST_P(TestSingleError, SingleError) { + const auto &[_obj, is_primary, test_case] = GetParam(); + auto obj = test_case->adjust_base_object(_obj); + + const pg_shard_t primary(0, shard_id_t::NO_SHARD); + const pg_shard_t replica(1, shard_id_t::NO_SHARD); + crimson::osd::scrub::chunk_validation_policy_t policy { + primary, + obj.stripe_info, + TEST_MAX_OBJECT_SIZE, + std::string{TEST_INTERNAL_NAMESPACE}, + TEST_OMAP_KEY_LIMIT, + TEST_OMAP_BYTES_LIMIT + }; + const pg_shard_t &target = is_primary ? primary : replica; + const std::vector shards = { + primary, replica + }; + + auto with_error = test_case->inject_error(obj); + crimson::osd::scrub::scrub_map_set_t maps; + for (const auto &osd : shards) { + if (osd == target) { + maps[osd].objects[obj.hoid] = with_error.get(); + } else { + maps[osd].objects[obj.hoid] = obj.get(); + } + } + + DoutPrefix dpp(nullptr, ceph_subsys_test, "test_crimson_scrub"); + const auto ret = crimson::osd::scrub::validate_chunk( + dpp, policy, maps); + const auto &object_errors = ret.object_errors; + + ASSERT_EQ(object_errors.size(), 1) << fmt::format( + "{}: generated an incorrect number of errors: {}\n", + *test_case, object_errors); + + auto &obj_error = object_errors.front(); + + EXPECT_EQ( + ret.stats.num_shallow_scrub_errors, + (obj_error.has_shallow_errors() || + obj_error.union_shards.has_shallow_errors()) + + ret.snapset_errors.size()); + EXPECT_EQ( + ret.stats.num_deep_scrub_errors, + (obj_error.has_deep_errors() || + obj_error.union_shards.has_deep_errors())); + + EXPECT_TRUE(compare_error_signatures( + static_cast(obj_error), + test_case->get_object_error_sig())); + + EXPECT_EQ(obj_error.shards.size(), shards.size()); + bool found_selected_oi = false; + for (const auto &shard : shards) { + auto siter = obj_error.shards.find( + librados::osd_shard_t(shard.osd, shard.shard) + ); + if (siter == obj_error.shards.end()) { + EXPECT_NE(siter, obj_error.shards.end()); + continue; + } + if (shard == target) { + EXPECT_TRUE(compare_error_signatures( + static_cast(siter->second), + test_case->get_shard_error_sig())); + } else { + EXPECT_FALSE(siter->second.has_errors()); + if (siter->second.selected_oi) found_selected_oi = true; + } + if (shard == primary) { + EXPECT_TRUE(siter->second.primary); + } + } + EXPECT_TRUE(found_selected_oi); +} + +/* Tests that don't have restrictions */ +INSTANTIATE_TEST_SUITE_P( + SingleErrorGeneral, + TestSingleError, + ::testing::Combine( + ::testing::Values( + test_obj_t::make_head("Small", "foo").set_size(64), + test_obj_t::make_clone("EmptyWithAttr", "foo2").add_attr("extra_attr", 64), + test_obj_t::make_head("ReplicatedRBD", "foo2").set_size(4<<20), + test_obj_t::make_ec_head("ECHead", "foo").set_size(4<<20), + test_obj_t::make_ec_clone("LargeECClone", "foo").set_size(16<<20) + ), + ::testing::Bool(), + ::testing::ValuesIn( + test_cases_begin(), + test_cases_end()) + ), + [](const auto &info) { + return fmt::format("{}", info.param); + } +); + +/* Some tests don't trigger shard errors, so we can't actually tell which + * replica is wrong. Such tests are written for the error to be injected + * on the replica. */ +INSTANTIATE_TEST_SUITE_P( + SingleErrorPrimaryOnly, + TestSingleError, + ::testing::Combine( + ::testing::Values( + test_obj_t::make_head("Small", "foo").set_size(64), + test_obj_t::make_clone("EmptyWithAttr", "foo2").add_attr("extra_attr", 64), + test_obj_t::make_head("ReplicatedRBD", "foo2").set_size(4<<20), + test_obj_t::make_ec_head("ECHead", "foo").set_size(4<<20), + test_obj_t::make_ec_clone("LargeECClone", "foo").set_size(16<<20) + ), + ::testing::Values(false), // replica only + ::testing::ValuesIn( + test_cases_begin(), + test_cases_end()) + ), + [](const auto &info) { + return fmt::format("{}", info.param); + } +); + +/* Some tests only make sense on ec objects. */ +INSTANTIATE_TEST_SUITE_P( + SingleErrorOnly, + TestSingleError, + ::testing::Combine( + ::testing::Values( + test_obj_t::make_ec_head("ECHead", "foo").set_size(4<<20), + test_obj_t::make_ec_clone("LargeECClone", "foo").set_size(16<<20) + ), + ::testing::Bool(), + ::testing::ValuesIn( + test_cases_begin(), + test_cases_end()) + ), + [](const auto &info) { + return fmt::format("{}", info.param); + } +); + +/* Some tests only make sense on head objects. */ +INSTANTIATE_TEST_SUITE_P( + SingleErrorHEAD, + TestSingleError, + ::testing::Combine( + ::testing::Values( + test_obj_t::make_head("Small", "foo").set_size(64), + test_obj_t::make_head("ReplicatedRBD", "foo2").set_size(4<<20), + test_obj_t::make_ec_head("ECHead", "foo").set_size(4<<20) + ), + ::testing::Bool(), + ::testing::ValuesIn( + test_cases_begin(), + test_cases_end()) + ), + [](const auto &info) { + return fmt::format("{}", info.param); + } +); + +using test_clone_spec_t = std::pair< + snapid_t, // clone id + size_t // clone size + >; + +/// descending order of clone id +using test_clone_list_t = std::vector; + +/** + * snapset_test_case_t + * + * This descriptor can express 3 types of error + * - missing clone + * - extra clone + * - clone size mismatch + * in 4 positions using one bit for each pair. + */ +class snapset_test_case_t { + uint32_t signature; + + snapset_test_case_t(uint32_t signature) : signature(signature) {} + + constexpr static uint32_t POSITION_BITS = 4; + constexpr static uint32_t position_mask[] = { + 0x1, 0x2, 0x4, 0x8 + }; + constexpr static unsigned MAX_POS = std::size(position_mask); + + constexpr static uint32_t MIN_VALID = 0; + constexpr static uint32_t MAX_VALID = 0xFFF; + enum type_t { + MISSING = 0, + EXTRA, + SIZE + }; + + bool should_inject(type_t type, unsigned position) const { + ceph_assert(position < MAX_POS); + return (signature >> (type * POSITION_BITS)) & position_mask[position]; + } + static snapset_test_case_t make(type_t type, unsigned position) { + ceph_assert(position < std::size(position_mask)); + return snapset_test_case_t{ + position_mask[position] << (type * POSITION_BITS) + }; + } + static auto generate_single_errors(type_t type) { + std::vector ret; + ret.reserve(std::size(position_mask)); + for (unsigned i = 0; i < MAX_POS; ++i) { + ret.push_back(make(type, i)); + } + return ret; + } + +public: + constexpr static unsigned get_max_pos() { return MAX_POS; } + + bool should_inject_missing(unsigned position) const { + return should_inject(MISSING, position); + } + bool should_inject_extra(unsigned position) const { + return should_inject(EXTRA, position); + } + bool should_inject_size(unsigned position) const { + return should_inject(SIZE, position); + } + + static auto generate_single_missing_errors() { + return generate_single_errors(MISSING); + } + static auto generate_single_extra_errors() { + return generate_single_errors(EXTRA); + } + static auto generate_single_size_errors() { + return generate_single_errors(SIZE); + } + static auto generate_random_errors(size_t num, int seed = 0) { + std::default_random_engine e1(seed); + std::uniform_int_distribution uniform_dist(1, MAX_VALID); + + std::vector ret; + ret.reserve(num); + for (unsigned i = 0; i < num; ++i) { + ret.push_back(snapset_test_case_t{uniform_dist(e1)}); + } + return ret; + } + friend std::ostream &operator<<(std::ostream &out, snapset_test_case_t rhs); +}; +std::ostream &operator<<(std::ostream &out, snapset_test_case_t rhs) { + for (auto &[s, type] : + std::vector>( + {{"M", snapset_test_case_t::MISSING}, + {"E", snapset_test_case_t::EXTRA}, + {"S", snapset_test_case_t::SIZE}})) { + out << s; + for (unsigned i = 0; + i < snapset_test_case_t::MAX_POS; ++i) { + if (rhs.should_inject(type, i)) { + out << i; + } + } + } + return out; +} + +class TestSnapSetCloneError : + public testing::TestWithParam { +}; + + +SnapSet make_snapset(const test_clone_list_t &clone_list) +{ + SnapSet ss; + for (const auto &[cloneid, size] : clone_list) { + ss.clones.push_back(cloneid); + ss.clone_size[cloneid] = size; + ss.clone_overlap[cloneid]; + ss.clone_snaps[cloneid].push_back(cloneid); + } + return ss; +} + +std::pair make_clone( + std::string name, std::pair in) +{ + ScrubMap ret; + auto [cloneid, size] = in; + hobject_t hoid = so_builder_t::make_hoid(name, in.first); + auto so = so_builder_t::make_clone( + name, cloneid); + so.set_size(size); + return std::make_pair(hoid, so.get()); +} + +TEST_P(TestSnapSetCloneError, CloneError) { + const pg_shard_t primary(0, shard_id_t::NO_SHARD); + crimson::osd::scrub::chunk_validation_policy_t policy { + primary, + std::nullopt, + TEST_MAX_OBJECT_SIZE, + std::string{TEST_INTERNAL_NAMESPACE}, + TEST_OMAP_KEY_LIMIT, + TEST_OMAP_BYTES_LIMIT + }; + + crimson::osd::scrub::scrub_map_set_t maps; + const std::string name = "test_obj"; + auto &map = maps[primary]; + inconsistent_snapset_wrapper expected_error; + + test_clone_list_t should_exist = { + { 10, 32 }, { 25, 64 }, { 50, 32 }, { 100, 64 } + }; + test_clone_list_t extra = { + { 9, 64 }, { 11, 32 }, { 99, 64 }, { 101, 32 } + }; + + for (unsigned i = 0; i < snapset_test_case_t::get_max_pos(); ++i) { + hobject_t hoid = so_builder_t::make_hoid(name, should_exist[i].first); + if (!GetParam().should_inject_missing(i)) { + auto to_insert = make_clone(name, should_exist[i]); + if (GetParam().should_inject_size(i)) { + expected_error.set_size_mismatch(); + to_insert.second = so_builder_t(to_insert.second).set_size( + so_get_oi(to_insert.second)->size + 1).get(); + } + map.objects.insert(to_insert); + } else { + expected_error.set_clone_missing(should_exist[i].first); + } + if (GetParam().should_inject_extra(i)) { + map.objects.insert(make_clone(name, extra[i])); + expected_error.set_clone(extra[i].first); + } + } + + hobject_t hoid = so_builder_t::make_hoid(name); + map.objects[hoid] = so_builder_t::make_head(name).get(); + + so_set_ss(map.objects[hoid], make_snapset(should_exist)); + + DoutPrefix dpp(nullptr, ceph_subsys_test, "test_crimson_scrub"); + const auto ret = crimson::osd::scrub::validate_chunk( + dpp, policy, maps); + EXPECT_EQ(ret.object_errors.size(), 0); + ASSERT_EQ(ret.snapset_errors.size(), 1) << fmt::format( + "Got snapset_errors: {}", ret.snapset_errors); + + EXPECT_TRUE(compare_error_signatures( + ret.snapset_errors.front(), + expected_error)); + +} + +INSTANTIATE_TEST_SUITE_P( + SingleMissing, + TestSnapSetCloneError, + ::testing::ValuesIn(snapset_test_case_t::generate_single_missing_errors()) +); + +INSTANTIATE_TEST_SUITE_P( + SingleExtra, + TestSnapSetCloneError, + ::testing::ValuesIn(snapset_test_case_t::generate_single_extra_errors()) +); + +INSTANTIATE_TEST_SUITE_P( + SingleSize, + TestSnapSetCloneError, + ::testing::ValuesIn(snapset_test_case_t::generate_single_size_errors()) +); + +INSTANTIATE_TEST_SUITE_P( + MultipleRandom, + TestSnapSetCloneError, + ::testing::ValuesIn(snapset_test_case_t::generate_random_errors(100)) +); + +TEST(TestSnapSet, MissingHead) { + const pg_shard_t primary(0, shard_id_t::NO_SHARD); + crimson::osd::scrub::chunk_validation_policy_t policy { + primary, + std::nullopt, + TEST_MAX_OBJECT_SIZE, + std::string{TEST_INTERNAL_NAMESPACE}, + TEST_OMAP_KEY_LIMIT, + TEST_OMAP_BYTES_LIMIT + }; + + crimson::osd::scrub::scrub_map_set_t maps; + inconsistent_snapset_wrapper expected_error; + + test_clone_list_t clones = { + { 10, 64 }, { 25, 32 }, { 50, 64 }, { 100, 32 } + }; + for (const auto &desc : test_clone_list_t{clones}) { + maps[primary].objects.emplace(make_clone("test_object", desc)); + } + expected_error.set_headless(); + + + DoutPrefix dpp(nullptr, ceph_subsys_test, "test_crimson_scrub"); + const auto ret = crimson::osd::scrub::validate_chunk( + dpp, policy, maps); + EXPECT_EQ(ret.object_errors.size(), 0); + ASSERT_EQ(ret.snapset_errors.size(), 1) << fmt::format( + "Got snapset_errors: {}", ret.snapset_errors); + + EXPECT_TRUE(compare_error_signatures( + ret.snapset_errors.front(), + expected_error)); + +} + +TEST(TestSnapSet, Stats) { + const pg_shard_t primary(0, shard_id_t::NO_SHARD); + crimson::osd::scrub::chunk_validation_policy_t policy { + primary, + std::nullopt, + TEST_MAX_OBJECT_SIZE, + std::string{TEST_INTERNAL_NAMESPACE}, + TEST_OMAP_KEY_LIMIT, + TEST_OMAP_BYTES_LIMIT + }; + + + object_stat_sum_t expected_stats; + crimson::osd::scrub::scrub_map_set_t maps; + auto &objs = maps[primary].objects; + + unsigned num = 0; + auto add_simple_head = [&](size_t size, auto &&f) + -> ScrubMap::object & { + auto name = fmt::format("obj-{}", ++num); + auto hoid = so_builder_t::make_hoid(name); + auto obj = so_builder_t::make_head(name).set_size(size).get(); + so_mut_oi(obj, std::forward(f)); + expected_stats.num_bytes += size; + expected_stats.num_objects++; + return objs[hoid] = obj; + }; + + add_simple_head(64, [&expected_stats](auto maybe_oi) { + ceph_assert(maybe_oi); + maybe_oi->set_flag(object_info_t::FLAG_DIRTY); + expected_stats.num_objects_dirty++; + return maybe_oi; + }); + + add_simple_head(128, [&expected_stats](auto maybe_oi) { + ceph_assert(maybe_oi); + maybe_oi->set_flag(object_info_t::FLAG_MANIFEST); + expected_stats.num_objects_manifest++; + return maybe_oi; + }); + + add_simple_head(0, [&expected_stats](auto maybe_oi) { + ceph_assert(maybe_oi); + maybe_oi->set_flag(object_info_t::FLAG_WHITEOUT); + expected_stats.num_whiteouts++; + return maybe_oi; + }); + + { + auto &so = add_simple_head(32, [](auto ret) { return ret; }); + expected_stats.num_omap_keys += (so.object_omap_keys = 10); + expected_stats.num_omap_bytes += (so.object_omap_bytes = 100); + expected_stats.num_objects_omap++; + } + + { + auto &so = add_simple_head(64, [](auto ret) { return ret; }); + expected_stats.num_omap_keys += + (so.object_omap_keys = (TEST_OMAP_KEY_LIMIT + 1)); + expected_stats.num_omap_bytes += + (so.object_omap_bytes = so.object_omap_keys); + expected_stats.num_objects_omap++; + expected_stats.num_large_omap_objects++; + } + + { + auto &so = add_simple_head(64, [](auto ret) { return ret; }); + expected_stats.num_omap_keys += (so.object_omap_keys = 1); + expected_stats.num_omap_bytes += + (so.object_omap_bytes = (TEST_OMAP_BYTES_LIMIT + 1)); + expected_stats.num_objects_omap++; + expected_stats.num_large_omap_objects++; + } + + { + auto name = fmt::format("obj-{}", ++num); + + std::map> clone_overlap; + test_clone_list_t clones; + auto add_clone = [&](std::pair clone_desc, + interval_set overlap) -> ScrubMap::object & { + auto hoid = so_builder_t::make_hoid(name, clone_desc.first); + clones.push_back(clone_desc); + auto [_, obj] = make_clone(name, clone_desc); + expected_stats.num_object_clones++; + expected_stats.num_objects++; + + expected_stats.num_bytes += clone_desc.second - overlap.size(); + clone_overlap[clone_desc.first] = std::move(overlap); + + return objs[hoid] = obj; + }; + + auto make_is = [](uint64_t off, uint64_t len) { + interval_set ret; + ret.insert(off, len); + return ret; + }; + + add_clone({99, 32}, {}); + add_clone({100, 64}, make_is(31, 33)); + + { + auto hoid = so_builder_t::make_hoid(name); + size_t size = 64; + auto obj = so_builder_t::make_head(name).set_size(size).get(); + expected_stats.num_bytes += size; + expected_stats.num_objects++; + + SnapSet ss = make_snapset(clones); + ss.clone_overlap = std::move(clone_overlap); + so_mut_ss(obj, [ss=std::move(ss)](auto) mutable { + return std::move(ss); + }); + + objs[hoid] = obj; + } + } + + DoutPrefix dpp(nullptr, ceph_subsys_test, "test_crimson_scrub"); + const auto ret = crimson::osd::scrub::validate_chunk( + dpp, policy, maps); + EXPECT_EQ(ret.object_errors.size(), 0); + ASSERT_EQ(ret.snapset_errors.size(), 0) << fmt::format( + "Got snapset_errors: {}", ret.snapset_errors); + + EXPECT_EQ(ret.stats, expected_stats); +}