From dc254da593956eb05f585a7ff30614d3cc694504 Mon Sep 17 00:00:00 2001 From: Ronen Friedman Date: Mon, 1 Aug 2022 10:14:58 +0000 Subject: [PATCH] osd/scrub: verify SnapMapper consistency Whenever the scrubber access the SnapMapper for the snaps of a specific clone, the mapper will now verify that the snaps have the required mapping DB entries (the 'SNA_' keys). Signed-off-by: Ronen Friedman --- src/common/hobject_fmt.h | 1 + src/osd/PGBackend.h | 1 - src/osd/PrimaryLogPG.h | 3 - src/osd/SnapMapper.h | 3 +- src/osd/scrubber/pg_scrubber.cc | 15 ++- src/osd/scrubber/pg_scrubber.h | 12 +-- src/osd/scrubber/scrub_backend.cc | 103 ++++++++++++++----- src/test/test_snap_mapper.cc | 160 ++++++++++++++++++++++++++++++ 8 files changed, 257 insertions(+), 41 deletions(-) diff --git a/src/common/hobject_fmt.h b/src/common/hobject_fmt.h index 9c33f43446a..622611121ae 100644 --- a/src/common/hobject_fmt.h +++ b/src/common/hobject_fmt.h @@ -6,6 +6,7 @@ * \file fmtlib formatters for some hobject.h classes */ #include +#include #include "common/hobject.h" #include "include/object_fmt.h" diff --git a/src/osd/PGBackend.h b/src/osd/PGBackend.h index 29e969f1b24..82cb23a95ff 100644 --- a/src/osd/PGBackend.h +++ b/src/osd/PGBackend.h @@ -297,7 +297,6 @@ typedef std::shared_ptr OSDMapRef; virtual bool check_failsafe_full() = 0; - virtual bool pg_is_repair() = 0; virtual void inc_osd_stat_repaired() = 0; virtual bool pg_is_remote_backfilling() = 0; virtual void pg_add_local_num_bytes(int64_t num_bytes) = 0; diff --git a/src/osd/PrimaryLogPG.h b/src/osd/PrimaryLogPG.h index 20469ce1eb7..46c51ba29fe 100644 --- a/src/osd/PrimaryLogPG.h +++ b/src/osd/PrimaryLogPG.h @@ -442,9 +442,6 @@ public: release_object_locks(manager); } - bool pg_is_repair() override { - return is_repair(); - } void inc_osd_stat_repaired() override { osd->inc_osd_stat_repaired(); } diff --git a/src/osd/SnapMapper.h b/src/osd/SnapMapper.h index 36eed57810c..45d03298b2f 100644 --- a/src/osd/SnapMapper.h +++ b/src/osd/SnapMapper.h @@ -106,7 +106,8 @@ public: * particular snap will group under up to 8 prefixes. */ class SnapMapper : public Scrub::SnapMapReaderI { - friend class MapperVerifier; + friend class MapperVerifier; // unit-test support + friend class DirectMapper; // unit-test support public: CephContext* cct; struct object_snaps { diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc index 580c1e4c848..2a610b30a07 100644 --- a/src/osd/scrubber/pg_scrubber.cc +++ b/src/osd/scrubber/pg_scrubber.cc @@ -1087,7 +1087,7 @@ int PgScrubber::build_replica_map_chunk() auto required_fixes = m_be->replica_clean_meta(replica_scrubmap, m_end.is_max(), m_start, - *this); + get_snap_mapper_accessor()); // actuate snap-mapper changes: apply_snap_mapper_fixes(required_fixes); @@ -1261,14 +1261,19 @@ void PgScrubber::apply_snap_mapper_fixes( for (auto& [fix_op, hoid, snaps, bogus_snaps] : fix_list) { - if (fix_op == snap_mapper_op_t::update) { + if (fix_op != snap_mapper_op_t::add) { // must remove the existing snap-set before inserting the correct one if (auto r = m_pg->snap_mapper.remove_oid(hoid, &t_drv); r < 0) { derr << __func__ << ": remove_oid returned " << cpp_strerror(r) << dendl; - ceph_abort(); + if (fix_op == snap_mapper_op_t::update) { + // for inconsistent snapmapper objects (i.e. for + // snap_mapper_op_t::inconsistent), we don't fret if we can't remove + // the old entries + ceph_abort(); + } } m_osds->clog->error() << fmt::format( @@ -1293,7 +1298,6 @@ void PgScrubber::apply_snap_mapper_fixes( } // now - insert the correct snap-set - m_pg->snap_mapper.add_oid(hoid, snaps, &t_drv); } @@ -1325,7 +1329,8 @@ void PgScrubber::maps_compare_n_cleanup() { m_pg->add_objects_scrubbed_count(m_be->get_primary_scrubmap().objects.size()); - auto required_fixes = m_be->scrub_compare_maps(m_end.is_max(), *this); + auto required_fixes = + m_be->scrub_compare_maps(m_end.is_max(), get_snap_mapper_accessor()); if (!required_fixes.inconsistent_objs.empty()) { if (state_test(PG_STATE_REPAIR)) { dout(10) << __func__ << ": discarding scrub results (repairing)" << dendl; diff --git a/src/osd/scrubber/pg_scrubber.h b/src/osd/scrubber/pg_scrubber.h index 66292c0edff..aa1c341edf4 100644 --- a/src/osd/scrubber/pg_scrubber.h +++ b/src/osd/scrubber/pg_scrubber.h @@ -102,10 +102,8 @@ struct BuildMap; * std::vector: no need to pre-reserve. */ class ReplicaReservations { - using OrigSet = decltype(std::declval().get_actingset()); - PG* m_pg; - OrigSet m_acting_set; + std::set m_acting_set; OSDService* m_osds; std::vector m_waited_for_peers; std::vector m_reserved_peers; @@ -273,7 +271,6 @@ ostream& operator<<(ostream& out, const scrub_flags_t& sf); */ class PgScrubber : public ScrubPgIF, public ScrubMachineListener, - public SnapMapperAccessor, public ScrubBeListener { public: explicit PgScrubber(PG* pg); @@ -549,11 +546,10 @@ class PgScrubber : public ScrubPgIF, utime_t scrub_begin_stamp; std::ostream& gen_prefix(std::ostream& out) const final; - // fetching the snap-set for a given object (used by the scrub-backend) - int get_snaps(const hobject_t& hoid, - std::set* snaps_set) const final + /// facilitate scrub-backend access to SnapMapper mappings + Scrub::SnapMapReaderI& get_snap_mapper_accessor() { - return m_pg->snap_mapper.get_snaps(hoid, snaps_set); + return m_pg->snap_mapper; } void log_cluster_warning(const std::string& warning) const final; diff --git a/src/osd/scrubber/scrub_backend.cc b/src/osd/scrubber/scrub_backend.cc index 478eca12380..1f7c734a636 100644 --- a/src/osd/scrubber/scrub_backend.cc +++ b/src/osd/scrubber/scrub_backend.cc @@ -171,7 +171,7 @@ std::vector ScrubBackend::replica_clean_meta( ScrubMap& repl_map, bool max_reached, const hobject_t& start, - SnapMapperAccessor& snaps_getter) + SnapMapReaderI& snaps_getter) { dout(15) << __func__ << ": REPL META # " << m_cleaned_meta_map.objects.size() << " objects" << dendl; @@ -191,7 +191,7 @@ std::vector ScrubBackend::replica_clean_meta( objs_fix_list_t ScrubBackend::scrub_compare_maps( bool max_reached, - SnapMapperAccessor& snaps_getter) + SnapMapReaderI& snaps_getter) { dout(10) << __func__ << " has maps, analyzing" << dendl; ceph_assert(m_scrubber.is_primary()); @@ -1525,9 +1525,9 @@ void ScrubBackend::scrub_snapshot_metadata(ScrubMap& map) if (!expected) { // If we couldn't read the head's snapset, just ignore clones if (head && !snapset) { - clog.error() << m_mode_desc << " " << m_pg_id << " TTTTT:" << m_pg_id - << " " << soid - << " : clone ignored due to missing snapset"; + clog.error() << m_mode_desc << " " << m_pg_id + << " " << soid + << " : clone ignored due to missing snapset"; } else { clog.error() << m_mode_desc << " " << m_pg_id << " " << soid << " : is an unexpected clone"; @@ -1745,7 +1745,7 @@ void ScrubBackend::log_missing(int missing, std::vector ScrubBackend::scan_snaps( ScrubMap& smap, - SnapMapperAccessor& snaps_getter) + SnapMapReaderI& snaps_getter) { std::vector out_orders; hobject_t head; @@ -1768,14 +1768,19 @@ std::vector ScrubBackend::scan_snaps( // parse the SnapSet bufferlist bl; if (o.attrs.find(SS_ATTR) == o.attrs.end()) { - continue; + // no snaps for this head + continue; } bl.push_back(o.attrs[SS_ATTR]); auto p = bl.cbegin(); try { - decode(snapset, p); + decode(snapset, p); } catch (...) { - continue; + dout(20) << fmt::format("{}: failed to decode the snapset ({})", + __func__, + hoid) + << dendl; + continue; } head = hoid.get_head(); continue; @@ -1791,6 +1796,8 @@ std::vector ScrubBackend::scan_snaps( continue; } + // the 'hoid' is a clone hoid at this point. The 'snapset' below was taken + // from the corresponding head hoid. auto maybe_fix_order = scan_object_snaps(hoid, snapset, snaps_getter); if (maybe_fix_order) { out_orders.push_back(std::move(*maybe_fix_order)); @@ -1805,9 +1812,11 @@ std::vector ScrubBackend::scan_snaps( std::optional ScrubBackend::scan_object_snaps( const hobject_t& hoid, const SnapSet& snapset, - SnapMapperAccessor& snaps_getter) + SnapMapReaderI& snaps_getter) { - // check and if necessary fix snap_mapper + using result_t = Scrub::SnapMapReaderI::result_t; + dout(15) << fmt::format("{}: obj:{} snapset:{}", __func__, hoid, snapset) + << dendl; auto p = snapset.clone_snaps.find(hoid.snap); if (p == snapset.clone_snaps.end()) { @@ -1817,21 +1826,69 @@ std::optional ScrubBackend::scan_object_snaps( } set obj_snaps{p->second.begin(), p->second.end()}; - set cur_snaps; - int r = snaps_getter.get_snaps(hoid, &cur_snaps); - if (r != 0 && r != -ENOENT) { - derr << __func__ << ": get_snaps returned " << cpp_strerror(r) << dendl; - ceph_abort(); + // clang-format off + + // validate both that the mapper contains the correct snaps for the object + // and that it is internally consistent. + // possible outcomes: + // + // Error scenarios: + // - SnapMapper index of object snaps does not match that stored in head + // object snapset attribute: + // we should delete the snapmapper entry and re-add it. + // - no mapping found for the object's snaps: + // we should add the missing mapper entries. + // - the snapmapper set for this object is internally inconsistent (e.g. + // the OBJ_ entries do not match the SNA_ entries). We remove + // whatever entries are there, and redo the DB content for this object. + // + // And + // There is the "happy path": cur_snaps == obj_snaps. Nothing to do there. + + // clang-format on + + auto cur_snaps = snaps_getter.get_snaps_check_consistency(hoid); + if (!cur_snaps) { + switch (auto e = cur_snaps.error(); e.code) { + case result_t::code_t::backend_error: + derr << __func__ << ": get_snaps returned " + << cpp_strerror(e.backend_error) << " for " << hoid << dendl; + ceph_abort(); + case result_t::code_t::not_found: + dout(10) << __func__ << ": no snaps for " << hoid << ". Adding." + << dendl; + return snap_mapper_fix_t{snap_mapper_op_t::add, hoid, obj_snaps, {}}; + case result_t::code_t::inconsistent: + dout(10) << __func__ << ": inconsistent snapmapper data for " << hoid + << ". Recreating." << dendl; + return snap_mapper_fix_t{ + snap_mapper_op_t::overwrite, hoid, obj_snaps, {}}; + default: + dout(10) << __func__ << ": error (" << cpp_strerror(e.backend_error) + << ") fetching snapmapper data for " << hoid << ". Recreating." + << dendl; + return snap_mapper_fix_t{ + snap_mapper_op_t::overwrite, hoid, obj_snaps, {}}; + } + __builtin_unreachable(); } - if (r == -ENOENT || cur_snaps != obj_snaps) { - // add this object to the list of snapsets that needs fixing. Note - // that we also collect the existing (bogus) list, for logging purposes - snap_mapper_op_t fixing_op = - (r == -ENOENT ? snap_mapper_op_t::add : snap_mapper_op_t::update); - return snap_mapper_fix_t{fixing_op, hoid, obj_snaps, cur_snaps}; + if (*cur_snaps == obj_snaps) { + dout(20) << fmt::format( + "{}: {}: snapset match SnapMapper's ({})", __func__, hoid, + obj_snaps) + << dendl; + return std::nullopt; } - return std::nullopt; + + // add this object to the list of snapsets that needs fixing. Note + // that we also collect the existing (bogus) list, for logging purposes + dout(20) << fmt::format( + "{}: obj {}: was: {} updating to: {}", __func__, hoid, + *cur_snaps, obj_snaps) + << dendl; + return snap_mapper_fix_t{ + snap_mapper_op_t::update, hoid, obj_snaps, *cur_snaps}; } /* diff --git a/src/test/test_snap_mapper.cc b/src/test/test_snap_mapper.cc index e7c2e6636db..3da2ffd4226 100644 --- a/src/test/test_snap_mapper.cc +++ b/src/test/test_snap_mapper.cc @@ -8,6 +8,7 @@ #include "include/buffer.h" #include "common/map_cacher.hpp" +#include "osd/osd_types_fmt.h" #include "osd/SnapMapper.h" #include "common/Cond.h" @@ -777,3 +778,162 @@ TEST_F(SnapMapperTest, LegacyKeyConvertion) { ASSERT_EQ(converted_key, new_key); } +/** + * 'DirectMapper' provides simple, controlled, interface to the underlying + * SnapMapper. + */ +class DirectMapper { +public: + std::unique_ptr driver{make_unique()}; + std::unique_ptr mapper; + uint32_t mask; + uint32_t bits; + ceph::mutex lock = ceph::make_mutex("lock"); + + DirectMapper( + uint32_t mask, + uint32_t bits) + : mapper(new SnapMapper(g_ceph_context, driver.get(), mask, bits, 0, shard_id_t(1))), + mask(mask), bits(bits) {} + + hobject_t random_hobject() { + return hobject_t( + random_string(1+(rand() % 16)), + random_string(1+(rand() % 16)), + snapid_t(rand() % 1000), + (rand() & ((~0)< &snaps) { + std::lock_guard l{lock}; + PausyAsyncMap::Transaction t; + mapper->add_oid(obj, snaps, &t); + driver->submit(&t); + } + + std::pair to_raw( + const std::pair &to_map) { + return mapper->to_raw(to_map); + } + + std::string to_legacy_raw_key( + const std::pair &to_map) { + return mapper->to_legacy_raw_key(to_map); + } + + std::string to_raw_key( + const std::pair &to_map) { + return mapper->to_raw_key(to_map); + } + + void shorten_mapping_key(snapid_t snap, const hobject_t &clone) + { + // calculate the relevant key + std::string k = mapper->to_raw_key(snap, clone); + + // find the value for this key + map kvmap; + auto r = mapper->backend.get_keys(set{k}, &kvmap); + ASSERT_GE(r, 0); + + // replace the key with its shortened version + PausyAsyncMap::Transaction t; + mapper->backend.remove_keys(set{k}, &t); + auto short_k = k.substr(0, 10); + mapper->backend.set_keys(map{{short_k, kvmap[k]}}, &t); + driver->submit(&t); + driver->flush(); + } +}; + +class DirectMapperTest : public ::testing::Test { + public: + // ctor & initialization + DirectMapperTest() = default; + ~DirectMapperTest() = default; + void SetUp() override; + void TearDown() override; + + protected: + std::unique_ptr direct; +}; + +void DirectMapperTest::SetUp() +{ + direct = std::make_unique(0, 0); +} + +void DirectMapperTest::TearDown() +{ + direct->driver->stop(); + direct->mapper.reset(); + direct->driver.reset(); +} + + +TEST_F(DirectMapperTest, BasciObject) +{ + auto obj = direct->random_hobject(); + set snaps{100, 200}; + direct->create_object(obj, snaps); + + // verify that the OBJ_ & SNA_ entries are there + auto osn1 = direct->mapper->get_snaps(obj); + ASSERT_EQ(snaps, osn1); + auto vsn1 = direct->mapper->get_snaps_check_consistency(obj); + ASSERT_EQ(snaps, vsn1); +} + +TEST_F(DirectMapperTest, CorruptedSnaRecord) +{ + object_t base_name{"obj"}; + std::string key{"key"}; + + hobject_t head{base_name, key, CEPH_NOSNAP, 0x17, 0, ""}; + hobject_t cln1{base_name, key, 10, 0x17, 0, ""}; + hobject_t cln2{base_name, key, 20, 0x17, 0, ""}; // the oldest version + set head_snaps{400, 500}; + set cln1_snaps{300}; + set cln2_snaps{100, 200}; + + PausyAsyncMap::Transaction t; + direct->mapper->add_oid(head, head_snaps, &t); + direct->mapper->add_oid(cln1, cln1_snaps, &t); + direct->mapper->add_oid(cln2, cln2_snaps, &t); + direct->driver->submit(&t); + direct->driver->flush(); + + // verify that the OBJ_ & SNA_ entries are there + { + auto osn1 = direct->mapper->get_snaps(cln1); + EXPECT_EQ(cln1_snaps, osn1); + auto osn2 = direct->mapper->get_snaps(cln2); + EXPECT_EQ(cln2_snaps, osn2); + auto osnh = direct->mapper->get_snaps(head); + EXPECT_EQ(head_snaps, osnh); + } + { + auto vsn1 = direct->mapper->get_snaps_check_consistency(cln1); + EXPECT_EQ(cln1_snaps, vsn1); + auto vsn2 = direct->mapper->get_snaps_check_consistency(cln2); + EXPECT_EQ(cln2_snaps, vsn2); + auto vsnh = direct->mapper->get_snaps_check_consistency(head); + EXPECT_EQ(head_snaps, vsnh); + } + + // corrupt the SNA_ entry for cln1 + direct->shorten_mapping_key(300, cln1); + { + auto vsnh = direct->mapper->get_snaps_check_consistency(head); + EXPECT_EQ(head_snaps, vsnh); + auto vsn1 = direct->mapper->get_snaps(cln1); + EXPECT_EQ(cln1_snaps, vsn1); + auto osn1 = direct->mapper->get_snaps_check_consistency(cln1); + EXPECT_NE(cln1_snaps, osn1); + auto vsn2 = direct->mapper->get_snaps_check_consistency(cln2); + EXPECT_EQ(cln2_snaps, vsn2); + } +} + +///\todo test the case of a corrupted OBJ_ entry -- 2.39.5