]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/scrub: verify SnapMapper consistency
authorRonen Friedman <rfriedma@redhat.com>
Mon, 1 Aug 2022 10:14:58 +0000 (10:14 +0000)
committerRonen Friedman <rfriedma@redhat.com>
Fri, 2 Sep 2022 07:40:54 +0000 (10:40 +0300)
Whenever the scrubber access the SnapMapper for the snaps of a specific
clone, the mapper will now verify that the snaps have the required
mapping DB entries (the 'SNA_' keys).

Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
src/common/hobject_fmt.h
src/osd/PGBackend.h
src/osd/PrimaryLogPG.h
src/osd/SnapMapper.h
src/osd/scrubber/pg_scrubber.cc
src/osd/scrubber/pg_scrubber.h
src/osd/scrubber/scrub_backend.cc
src/test/test_snap_mapper.cc

index 9c33f43446a8ccf4e96e0dfbe315a555d12a6015..622611121ae6fb680a9d83cd505987dcd37c8ac7 100644 (file)
@@ -6,6 +6,7 @@
  * \file fmtlib formatters for some hobject.h classes
  */
 #include <fmt/format.h>
+#include <fmt/ranges.h>
 
 #include "common/hobject.h"
 #include "include/object_fmt.h"
index 29e969f1b24cec25d0cd006f1846f850c0f15c53..82cb23a95ffa6e47f4ef04f12f310aef4191e6ed 100644 (file)
@@ -297,7 +297,6 @@ typedef std::shared_ptr<const OSDMap> OSDMapRef;
 
      virtual bool check_failsafe_full() = 0;
 
-     virtual bool pg_is_repair() = 0;
      virtual void inc_osd_stat_repaired() = 0;
      virtual bool pg_is_remote_backfilling() = 0;
      virtual void pg_add_local_num_bytes(int64_t num_bytes) = 0;
index 20469ce1eb76ad0a1607f1be973000f2a0160187..46c51ba29fe0fd0afa8d2fbb5e158238b281ffec 100644 (file)
@@ -442,9 +442,6 @@ public:
     release_object_locks(manager);
   }
 
-  bool pg_is_repair() override {
-    return is_repair();
-  }
   void inc_osd_stat_repaired() override {
     osd->inc_osd_stat_repaired();
   }
index 36eed57810c30584b658882073dbd175416a5706..45d03298b2fb5415f10f3ee62be9b46aff00b487 100644 (file)
@@ -106,7 +106,8 @@ public:
  * particular snap will group under up to 8 prefixes.
  */
 class SnapMapper : public Scrub::SnapMapReaderI {
-  friend class MapperVerifier;
+  friend class MapperVerifier; // unit-test support
+  friend class DirectMapper; // unit-test support
 public:
   CephContext* cct;
   struct object_snaps {
index 580c1e4c8487b390bb2863fb53647677c93ed5a9..2a610b30a077dd0e3268a0fd9f1b3719f9eac4af 100644 (file)
@@ -1087,7 +1087,7 @@ int PgScrubber::build_replica_map_chunk()
       auto required_fixes = m_be->replica_clean_meta(replica_scrubmap,
                                                     m_end.is_max(),
                                                     m_start,
-                                                    *this);
+                                                    get_snap_mapper_accessor());
       // actuate snap-mapper changes:
       apply_snap_mapper_fixes(required_fixes);
 
@@ -1261,14 +1261,19 @@ void PgScrubber::apply_snap_mapper_fixes(
 
   for (auto& [fix_op, hoid, snaps, bogus_snaps] : fix_list) {
 
-    if (fix_op == snap_mapper_op_t::update) {
+    if (fix_op != snap_mapper_op_t::add) {
 
       // must remove the existing snap-set before inserting the correct one
       if (auto r = m_pg->snap_mapper.remove_oid(hoid, &t_drv); r < 0) {
 
        derr << __func__ << ": remove_oid returned " << cpp_strerror(r)
             << dendl;
-       ceph_abort();
+       if (fix_op == snap_mapper_op_t::update) {
+         // for inconsistent snapmapper objects (i.e. for
+         // snap_mapper_op_t::inconsistent), we don't fret if we can't remove
+         // the old entries
+         ceph_abort();
+       }
       }
 
       m_osds->clog->error() << fmt::format(
@@ -1293,7 +1298,6 @@ void PgScrubber::apply_snap_mapper_fixes(
     }
 
     // now - insert the correct snap-set
-
     m_pg->snap_mapper.add_oid(hoid, snaps, &t_drv);
   }
 
@@ -1325,7 +1329,8 @@ void PgScrubber::maps_compare_n_cleanup()
 {
   m_pg->add_objects_scrubbed_count(m_be->get_primary_scrubmap().objects.size());
 
-  auto required_fixes = m_be->scrub_compare_maps(m_end.is_max(), *this);
+  auto required_fixes =
+    m_be->scrub_compare_maps(m_end.is_max(), get_snap_mapper_accessor());
   if (!required_fixes.inconsistent_objs.empty()) {
     if (state_test(PG_STATE_REPAIR)) {
       dout(10) << __func__ << ": discarding scrub results (repairing)" << dendl;
index 66292c0edffd1a99558499be8df99476eb308651..aa1c341edf49d7d7694fe4fe74748f2eafc4bad0 100644 (file)
@@ -102,10 +102,8 @@ struct BuildMap;
  * std::vector: no need to pre-reserve.
  */
 class ReplicaReservations {
-  using OrigSet = decltype(std::declval<PG>().get_actingset());
-
   PG* m_pg;
-  OrigSet m_acting_set;
+  std::set<pg_shard_t> m_acting_set;
   OSDService* m_osds;
   std::vector<pg_shard_t> m_waited_for_peers;
   std::vector<pg_shard_t> m_reserved_peers;
@@ -273,7 +271,6 @@ ostream& operator<<(ostream& out, const scrub_flags_t& sf);
  */
 class PgScrubber : public ScrubPgIF,
                    public ScrubMachineListener,
-                   public SnapMapperAccessor,
                    public ScrubBeListener {
  public:
   explicit PgScrubber(PG* pg);
@@ -549,11 +546,10 @@ class PgScrubber : public ScrubPgIF,
   utime_t scrub_begin_stamp;
   std::ostream& gen_prefix(std::ostream& out) const final;
 
-  //  fetching the snap-set for a given object (used by the scrub-backend)
-  int get_snaps(const hobject_t& hoid,
-               std::set<snapid_t>* snaps_set) const final
+  /// facilitate scrub-backend access to SnapMapper mappings
+  Scrub::SnapMapReaderI& get_snap_mapper_accessor()
   {
-    return m_pg->snap_mapper.get_snaps(hoid, snaps_set);
+    return m_pg->snap_mapper;
   }
 
   void log_cluster_warning(const std::string& warning) const final;
index 478eca12380f9b8efca1a3571a225cc12c66d2f2..1f7c734a6366edd0c53fd779bbe3a8fc0c61a9ea 100644 (file)
@@ -171,7 +171,7 @@ std::vector<snap_mapper_fix_t> ScrubBackend::replica_clean_meta(
   ScrubMap& repl_map,
   bool max_reached,
   const hobject_t& start,
-  SnapMapperAccessor& snaps_getter)
+  SnapMapReaderI& snaps_getter)
 {
   dout(15) << __func__ << ": REPL META # " << m_cleaned_meta_map.objects.size()
            << " objects" << dendl;
@@ -191,7 +191,7 @@ std::vector<snap_mapper_fix_t> ScrubBackend::replica_clean_meta(
 
 objs_fix_list_t ScrubBackend::scrub_compare_maps(
   bool max_reached,
-  SnapMapperAccessor& snaps_getter)
+  SnapMapReaderI& snaps_getter)
 {
   dout(10) << __func__ << " has maps, analyzing" << dendl;
   ceph_assert(m_scrubber.is_primary());
@@ -1525,9 +1525,9 @@ void ScrubBackend::scrub_snapshot_metadata(ScrubMap& map)
     if (!expected) {
       // If we couldn't read the head's snapset, just ignore clones
       if (head && !snapset) {
-        clog.error() << m_mode_desc << " " << m_pg_id << " TTTTT:" << m_pg_id
-                      << " " << soid
-                      << " : clone ignored due to missing snapset";
+       clog.error() << m_mode_desc << " " << m_pg_id
+                    << " " << soid
+                    << " : clone ignored due to missing snapset";
       } else {
         clog.error() << m_mode_desc << " " << m_pg_id << " " << soid
                       << " : is an unexpected clone";
@@ -1745,7 +1745,7 @@ void ScrubBackend::log_missing(int missing,
 
 std::vector<snap_mapper_fix_t> ScrubBackend::scan_snaps(
   ScrubMap& smap,
-  SnapMapperAccessor& snaps_getter)
+  SnapMapReaderI& snaps_getter)
 {
   std::vector<snap_mapper_fix_t> out_orders;
   hobject_t head;
@@ -1768,14 +1768,19 @@ std::vector<snap_mapper_fix_t> ScrubBackend::scan_snaps(
       // parse the SnapSet
       bufferlist bl;
       if (o.attrs.find(SS_ATTR) == o.attrs.end()) {
-        continue;
+       // no snaps for this head
+       continue;
       }
       bl.push_back(o.attrs[SS_ATTR]);
       auto p = bl.cbegin();
       try {
-        decode(snapset, p);
+       decode(snapset, p);
       } catch (...) {
-        continue;
+       dout(20) << fmt::format("{}: failed to decode the snapset ({})",
+                               __func__,
+                               hoid)
+                << dendl;
+       continue;
       }
       head = hoid.get_head();
       continue;
@@ -1791,6 +1796,8 @@ std::vector<snap_mapper_fix_t> ScrubBackend::scan_snaps(
         continue;
       }
 
+      // the 'hoid' is a clone hoid at this point. The 'snapset' below was taken
+      // from the corresponding head hoid.
       auto maybe_fix_order = scan_object_snaps(hoid, snapset, snaps_getter);
       if (maybe_fix_order) {
         out_orders.push_back(std::move(*maybe_fix_order));
@@ -1805,9 +1812,11 @@ std::vector<snap_mapper_fix_t> ScrubBackend::scan_snaps(
 std::optional<snap_mapper_fix_t> ScrubBackend::scan_object_snaps(
   const hobject_t& hoid,
   const SnapSet& snapset,
-  SnapMapperAccessor& snaps_getter)
+  SnapMapReaderI& snaps_getter)
 {
-  // check and if necessary fix snap_mapper
+  using result_t = Scrub::SnapMapReaderI::result_t;
+  dout(15) << fmt::format("{}: obj:{} snapset:{}", __func__, hoid, snapset)
+          << dendl;
 
   auto p = snapset.clone_snaps.find(hoid.snap);
   if (p == snapset.clone_snaps.end()) {
@@ -1817,21 +1826,69 @@ std::optional<snap_mapper_fix_t> ScrubBackend::scan_object_snaps(
   }
   set<snapid_t> obj_snaps{p->second.begin(), p->second.end()};
 
-  set<snapid_t> cur_snaps;
-  int r = snaps_getter.get_snaps(hoid, &cur_snaps);
-  if (r != 0 && r != -ENOENT) {
-    derr << __func__ << ": get_snaps returned " << cpp_strerror(r) << dendl;
-    ceph_abort();
+  // clang-format off
+
+  // validate both that the mapper contains the correct snaps for the object
+  // and that it is internally consistent.
+  // possible outcomes:
+  //
+  // Error scenarios:
+  // - SnapMapper index of object snaps does not match that stored in head
+  //   object snapset attribute:
+  //    we should delete the snapmapper entry and re-add it.
+  // - no mapping found for the object's snaps:
+  //    we should add the missing mapper entries.
+  // - the snapmapper set for this object is internally inconsistent (e.g.
+  //    the OBJ_ entries do not match the SNA_ entries). We remove
+  //    whatever entries are there, and redo the DB content for this object.
+  //
+  // And
+  // There is the "happy path": cur_snaps == obj_snaps. Nothing to do there.
+
+  // clang-format on
+
+  auto cur_snaps = snaps_getter.get_snaps_check_consistency(hoid);
+  if (!cur_snaps) {
+    switch (auto e = cur_snaps.error(); e.code) {
+      case result_t::code_t::backend_error:
+       derr << __func__ << ": get_snaps returned "
+            << cpp_strerror(e.backend_error) << " for " << hoid << dendl;
+       ceph_abort();
+      case result_t::code_t::not_found:
+       dout(10) << __func__ << ": no snaps for " << hoid << ". Adding."
+                << dendl;
+       return snap_mapper_fix_t{snap_mapper_op_t::add, hoid, obj_snaps, {}};
+      case result_t::code_t::inconsistent:
+       dout(10) << __func__ << ": inconsistent snapmapper data for " << hoid
+                << ". Recreating." << dendl;
+       return snap_mapper_fix_t{
+         snap_mapper_op_t::overwrite, hoid, obj_snaps, {}};
+      default:
+       dout(10) << __func__ << ": error (" << cpp_strerror(e.backend_error)
+                << ") fetching snapmapper data for " << hoid << ". Recreating."
+                << dendl;
+       return snap_mapper_fix_t{
+         snap_mapper_op_t::overwrite, hoid, obj_snaps, {}};
+    }
+    __builtin_unreachable();
   }
-  if (r == -ENOENT || cur_snaps != obj_snaps) {
 
-    // add this object to the list of snapsets that needs fixing. Note
-    // that we also collect the existing (bogus) list, for logging purposes
-    snap_mapper_op_t fixing_op =
-      (r == -ENOENT ? snap_mapper_op_t::add : snap_mapper_op_t::update);
-    return snap_mapper_fix_t{fixing_op, hoid, obj_snaps, cur_snaps};
+  if (*cur_snaps == obj_snaps) {
+    dout(20) << fmt::format(
+                 "{}: {}: snapset match SnapMapper's ({})", __func__, hoid,
+                 obj_snaps)
+            << dendl;
+    return std::nullopt;
   }
-  return std::nullopt;
+
+  // add this object to the list of snapsets that needs fixing. Note
+  // that we also collect the existing (bogus) list, for logging purposes
+  dout(20) << fmt::format(
+               "{}: obj {}: was: {} updating to: {}", __func__, hoid,
+               *cur_snaps, obj_snaps)
+          << dendl;
+  return snap_mapper_fix_t{
+    snap_mapper_op_t::update, hoid, obj_snaps, *cur_snaps};
 }
 
 /*
index e7c2e6636dbb5af0070b810557989dd9527def3a..3da2ffd42266b05de8ba47a0c966bbd6ec1878ba 100644 (file)
@@ -8,6 +8,7 @@
 
 #include "include/buffer.h"
 #include "common/map_cacher.hpp"
+#include "osd/osd_types_fmt.h"
 #include "osd/SnapMapper.h"
 #include "common/Cond.h"
 
@@ -777,3 +778,162 @@ TEST_F(SnapMapperTest, LegacyKeyConvertion) {
     ASSERT_EQ(converted_key, new_key);
 }
 
+/**
+ * 'DirectMapper' provides simple, controlled, interface to the underlying
+ * SnapMapper.
+ */
+class DirectMapper {
+public:
+  std::unique_ptr<PausyAsyncMap> driver{make_unique<PausyAsyncMap>()};
+  std::unique_ptr<SnapMapper> mapper;
+  uint32_t mask;
+  uint32_t bits;
+  ceph::mutex lock = ceph::make_mutex("lock");
+
+  DirectMapper(
+    uint32_t mask,
+    uint32_t bits)
+   : mapper(new SnapMapper(g_ceph_context, driver.get(), mask, bits, 0, shard_id_t(1))), 
+             mask(mask), bits(bits) {}
+
+  hobject_t random_hobject() {
+    return hobject_t(
+      random_string(1+(rand() % 16)),
+      random_string(1+(rand() % 16)),
+      snapid_t(rand() % 1000),
+      (rand() & ((~0)<<bits)) | (mask & ~((~0)<<bits)),
+      0, random_string(rand() % 16));
+  }
+
+  void create_object(const hobject_t& obj, const set<snapid_t> &snaps) {
+    std::lock_guard l{lock};
+      PausyAsyncMap::Transaction t;
+      mapper->add_oid(obj, snaps, &t);
+      driver->submit(&t);
+  }
+
+  std::pair<std::string, ceph::buffer::list> to_raw(
+    const std::pair<snapid_t, hobject_t> &to_map) {
+    return mapper->to_raw(to_map);
+  }
+
+  std::string to_legacy_raw_key(
+    const std::pair<snapid_t, hobject_t> &to_map) {
+    return mapper->to_legacy_raw_key(to_map);
+  }
+
+  std::string to_raw_key(
+    const std::pair<snapid_t, hobject_t> &to_map) {
+    return mapper->to_raw_key(to_map);
+  }
+
+  void shorten_mapping_key(snapid_t snap, const hobject_t &clone)
+  {
+    // calculate the relevant key
+    std::string k = mapper->to_raw_key(snap, clone);
+
+    // find the value for this key
+    map<string, bufferlist> kvmap;
+    auto r = mapper->backend.get_keys(set{k}, &kvmap);
+    ASSERT_GE(r, 0);
+
+    // replace the key with its shortened version
+    PausyAsyncMap::Transaction t;
+    mapper->backend.remove_keys(set{k}, &t);
+    auto short_k = k.substr(0, 10);
+    mapper->backend.set_keys(map<string, bufferlist>{{short_k, kvmap[k]}}, &t);
+    driver->submit(&t);
+    driver->flush();
+  }
+};
+
+class DirectMapperTest : public ::testing::Test {
+ public:
+  // ctor & initialization
+  DirectMapperTest() = default;
+  ~DirectMapperTest() = default;
+  void SetUp() override;
+  void TearDown() override;
+
+ protected:
+  std::unique_ptr<DirectMapper> direct;
+};
+
+void DirectMapperTest::SetUp()
+{
+  direct = std::make_unique<DirectMapper>(0, 0);
+}
+
+void DirectMapperTest::TearDown()
+{
+  direct->driver->stop();
+  direct->mapper.reset();
+  direct->driver.reset();
+}
+
+
+TEST_F(DirectMapperTest, BasciObject)
+{
+  auto obj = direct->random_hobject();
+  set<snapid_t> snaps{100, 200};
+  direct->create_object(obj, snaps);
+
+  // verify that the OBJ_ & SNA_ entries are there
+  auto osn1 = direct->mapper->get_snaps(obj);
+  ASSERT_EQ(snaps, osn1);
+  auto vsn1 = direct->mapper->get_snaps_check_consistency(obj);
+  ASSERT_EQ(snaps, vsn1);
+}
+
+TEST_F(DirectMapperTest, CorruptedSnaRecord)
+{
+  object_t base_name{"obj"};
+  std::string key{"key"};
+
+  hobject_t head{base_name, key, CEPH_NOSNAP, 0x17, 0, ""};
+  hobject_t cln1{base_name, key, 10, 0x17, 0, ""};
+  hobject_t cln2{base_name, key, 20, 0x17, 0, ""}; // the oldest version
+  set<snapid_t> head_snaps{400, 500};
+  set<snapid_t> cln1_snaps{300};
+  set<snapid_t> cln2_snaps{100, 200};
+
+  PausyAsyncMap::Transaction t;
+  direct->mapper->add_oid(head, head_snaps, &t);
+  direct->mapper->add_oid(cln1, cln1_snaps, &t);
+  direct->mapper->add_oid(cln2, cln2_snaps, &t);
+  direct->driver->submit(&t);
+  direct->driver->flush();
+
+  // verify that the OBJ_ & SNA_ entries are there
+  {
+    auto osn1 = direct->mapper->get_snaps(cln1);
+    EXPECT_EQ(cln1_snaps, osn1);
+    auto osn2 = direct->mapper->get_snaps(cln2);
+    EXPECT_EQ(cln2_snaps, osn2);
+    auto osnh = direct->mapper->get_snaps(head);
+    EXPECT_EQ(head_snaps, osnh);
+  }
+  {
+    auto vsn1 = direct->mapper->get_snaps_check_consistency(cln1);
+    EXPECT_EQ(cln1_snaps, vsn1);
+    auto vsn2 = direct->mapper->get_snaps_check_consistency(cln2);
+    EXPECT_EQ(cln2_snaps, vsn2);
+    auto vsnh = direct->mapper->get_snaps_check_consistency(head);
+    EXPECT_EQ(head_snaps, vsnh);
+  }
+
+  // corrupt the SNA_ entry for cln1
+  direct->shorten_mapping_key(300, cln1);
+  {
+    auto vsnh = direct->mapper->get_snaps_check_consistency(head);
+    EXPECT_EQ(head_snaps, vsnh);
+    auto vsn1 = direct->mapper->get_snaps(cln1);
+    EXPECT_EQ(cln1_snaps, vsn1);
+    auto osn1 = direct->mapper->get_snaps_check_consistency(cln1);
+    EXPECT_NE(cln1_snaps, osn1);
+    auto vsn2 = direct->mapper->get_snaps_check_consistency(cln2);
+    EXPECT_EQ(cln2_snaps, vsn2);
+  }
+}
+
+///\todo test the case of a corrupted OBJ_ entry