osd: Correct scrub analysis for OSDs and fix up written shards.

author Alex Ainscow <aainscow@uk.ibm.com>

Thu, 8 May 2025 16:52:16 +0000 (17:52 +0100)

committer Jon Bailey <jonathan.bailey1@ibm.com>

Thu, 15 May 2025 13:18:35 +0000 (14:18 +0100)
author Alex Ainscow <aainscow@uk.ibm.com>
Thu, 8 May 2025 16:52:16 +0000 (17:52 +0100)
committer Jon Bailey <jonathan.bailey1@ibm.com>
Thu, 15 May 2025 13:18:35 +0000 (14:18 +0100)
diff --git a/src/osd/ECBackend.cc b/src/osd/ECBackend.cc

index 9259f959f239de8464bc3be5f7fac82e5503ac4e..2b093f3109de2c97c67afbe2caa621da84de8d7b 100644 (file)
--- a/src/osd/ECBackend.cc
+++ b/src/osd/ECBackend.cc
@@ -336,7 +336,7 @@ void ECBackend::RecoveryBackend::handle_recovery_read_complete(
        op.recovery_info.oi = op.obc->obs.oi;
      }
  
-    if (sinfo.require_hinfo()) {
+    if (sinfo.get_is_hinfo_required()) {
        ECUtil::HashInfo hinfo(sinfo.get_k_plus_m());
        if (op.obc->obs.oi.size > 0) {
          ceph_assert(op.xattrs.count(ECUtil::get_hinfo_key()));
@@ -545,7 +545,7 @@ void ECBackend::RecoveryBackend::continue_recovery_op(
  
        if (op.recovery_progress.first && op.obc) {
          op.xattrs = op.obc->attr_cache;
-        if (sinfo.require_hinfo()) {
+        if (sinfo.get_is_hinfo_required()) {
            if (auto [r, attrs, size] = ecbackend->get_attrs_n_size_from_disk(
                op.hoid);
              r >= 0 || r == -ENOENT) {
@@ -1806,13 +1806,6 @@ int ECBackend::be_deep_scrub(
      o.read_error = true;
      return 0;
    }
-  if (bl.length() % sinfo.get_chunk_size()) {
-    dout(20) << __func__ << "  " << poid << " got "
-            << r << " on read, not chunk size " << sinfo.get_chunk_size() << " aligned"
-            << dendl;
-    o.read_error = true;
-    return 0;
-  }
    if (r > 0) {
      pos.data_hash << bl;
    }
@@ -1822,6 +1815,14 @@ int ECBackend::be_deep_scrub(
      return -EINPROGRESS;
    }
  
+  if (!sinfo.get_is_hinfo_required()) {
+    o.digest = 0;
+    o.digest_present = true;
+    o.omap_digest = -1;
+    o.omap_digest_present = true;
+    return 0;
+  }
+
    ECUtil::HashInfoRef hinfo = unstable_hashinfo_registry.get_hash_info(
      poid, false, o.attrs, o.size);
    if (!hinfo) {
@@ -1829,49 +1830,39 @@ int ECBackend::be_deep_scrub(
      o.read_error = true;
      o.digest_present = false;
      return 0;
-  } else {
-    if (!sinfo.supports_ec_overwrites()) {
-      if (!hinfo->has_chunk_hash()) {
-        dout(0) << "_scan_list  " << poid << " got invalid hash info" << dendl;
-        o.ec_size_mismatch = true;
-        return 0;
-      }
-      if (hinfo->get_total_chunk_size() != (unsigned)pos.data_pos) {
-        dout(0) << "_scan_list  " << poid << " got incorrect size on read 0x"
-               << std::hex << pos
-               << " expected 0x" << hinfo->get_total_chunk_size() << std::dec
-               << dendl;
-        o.ec_size_mismatch = true;
-        return 0;
-      }
-
-      if (hinfo->get_chunk_hash(get_parent()->whoami_shard().shard) !=
-        pos.data_hash.digest()) {
-        dout(0) << "_scan_list  " << poid << " got incorrect hash on read 0x"
-               << std::hex << pos.data_hash.digest() << " !=  expected 0x"
-               << hinfo->get_chunk_hash(get_parent()->whoami_shard().shard)
-               << std::dec << dendl;
-        o.ec_hash_mismatch = true;
-        return 0;
-      }
+  }
+  if (!hinfo->has_chunk_hash()) {
+    dout(0) << "_scan_list  " << poid << " got invalid hash info" << dendl;
+    o.ec_size_mismatch = true;
+    return 0;
+  }
+  if (hinfo->get_total_chunk_size() != (unsigned)pos.data_pos) {
+    dout(0) << "_scan_list  " << poid << " got incorrect size on read 0x"
+           << std::hex << pos
+           << " expected 0x" << hinfo->get_total_chunk_size() << std::dec
+           << dendl;
+    o.ec_size_mismatch = true;
+    return 0;
+  }
  
-      /* We checked above that we match our own stored hash.  We cannot
-       * send a hash of the actual object, so instead we simply send
-       * our locally stored hash of shard 0 on the assumption that if
-       * we match our chunk hash and our recollection of the hash for
-       * chunk 0 matches that of our peers, there is likely no corruption.
-       */
-      o.digest = hinfo->get_chunk_hash(shard_id_t(0));
-      o.digest_present = true;
-    } else {
-      /* Hack! We must be using partial overwrites, and partial overwrites
-       * don't support deep-scrub yet
-       */
-      o.digest = 0;
-      o.digest_present = true;
-    }
+  if (hinfo->get_chunk_hash(get_parent()->whoami_shard().shard) !=
+    pos.data_hash.digest()) {
+    dout(0) << "_scan_list  " << poid << " got incorrect hash on read 0x"
+           << std::hex << pos.data_hash.digest() << " !=  expected 0x"
+           << hinfo->get_chunk_hash(get_parent()->whoami_shard().shard)
+           << std::dec << dendl;
+    o.ec_hash_mismatch = true;
+    return 0;
    }
  
+  /* We checked above that we match our own stored hash.  We cannot
+   * send a hash of the actual object, so instead we simply send
+   * our locally stored hash of shard 0 on the assumption that if
+   * we match our chunk hash and our recollection of the hash for
+   * chunk 0 matches that of our peers, there is likely no corruption.
+   */
+  o.digest = hinfo->get_chunk_hash(shard_id_t(0));
+  o.digest_present = true;
    o.omap_digest = -1;
    o.omap_digest_present = true;
    return 0;
diff --git a/src/osd/ECBackend.h b/src/osd/ECBackend.h

index 510e53cfbd04af507f700a24c976b6699d62942b..305d84274fd5c852874f787645f63836e9a32e54 100644 (file)
--- a/src/osd/ECBackend.h
+++ b/src/osd/ECBackend.h
@@ -446,6 +446,14 @@ class ECBackend : public ECCommon {
      return sinfo.object_size_to_shard_size(size, shard);
    }
  
+  uint64_t get_is_nonprimary_shard(shard_id_t shard) const {
+    return sinfo.is_nonprimary_shard(shard);
+  }
+
+  bool get_is_hinfo_required() const {
+    return sinfo.get_is_hinfo_required();
+  }
+
    /**
     * ECReadPred
     *
diff --git a/src/osd/ECSwitch.h b/src/osd/ECSwitch.h

index 0ee16181d0381ea8bf896d9f2ef2887b9e8d7463..bd0acad9ad7c629a2881d777baac9a0f13902e3c 100644 (file)
--- a/src/osd/ECSwitch.h
+++ b/src/osd/ECSwitch.h
@@ -359,4 +359,19 @@ public:
      return legacy.object_size_to_shard_size(size);
      // All shards are the same size.
    }
+  bool get_is_nonprimary_shard(shard_id_t shard) const final {
+    if (is_optimized()) {
+      return optimized.get_is_nonprimary_shard(shard);
+    }
+    return false;
+  }
+  bool get_is_hinfo_required() const final {
+    if (is_optimized()) {
+      return optimized.get_is_hinfo_required();
+    }
+    return true;
+  }
+  bool get_is_ec_optimized() const final {
+    return is_optimized();
+  }
  };
diff --git a/src/osd/ECUtil.h b/src/osd/ECUtil.h

index f0abbb0cd3ca8e7888c23dc2de364390295d4aef..d39b7a5c1bcb2e1f0d663f5ef777f482c17c39be 100644 (file)
--- a/src/osd/ECUtil.h
+++ b/src/osd/ECUtil.h
@@ -544,7 +544,7 @@ public:
        ErasureCodeInterface::FLAG_EC_PLUGIN_REQUIRE_SUB_CHUNKS) != 0;
    }
  
-  bool require_hinfo() const {
+  bool get_is_hinfo_required() const {
      return !supports_ec_overwrites();
    }
  
diff --git a/src/osd/PG.h b/src/osd/PG.h

index 4ca94d3f9356243a1ee3a872223e70c73e3c0ac5..87c52f5bb723281c30c86c2eb12fbba65a8b6b34 100644 (file)
--- a/src/osd/PG.h
+++ b/src/osd/PG.h
@@ -250,6 +250,20 @@ public:
      return !waiting_for_unreadable_object.empty();
    }
  
+  bool get_is_nonprimary_shard(const pg_shard_t &shard) const final
+  {
+    return get_pgbackend()->get_is_nonprimary_shard(shard.shard);
+  }
+
+  bool get_is_hinfo_required() const final
+  {
+    return get_pgbackend()->get_is_hinfo_required();
+  }
+
+  bool get_is_ec_optimized() const final {
+    return get_pgbackend()->get_is_ec_optimized();
+  }
+
    static void set_last_scrub_stamp(
      utime_t t, pg_history_t &history, pg_stat_t &stats) {
      stats.last_scrub_stamp = t;
@@ -1396,7 +1410,7 @@ public:
   }
  
   uint64_t logical_to_ondisk_size(uint64_t logical_size,
-                                 int8_t shard_id) const final {
+                                 shard_id_t shard_id) const final {
     return get_pgbackend()->be_get_ondisk_size(logical_size, shard_id_t(shard_id));
   }
  };
diff --git a/src/osd/PGBackend.h b/src/osd/PGBackend.h

index 4aa1cc3630811f4b2ddfe3b0e8daa5c9f03da148..efe2f173cf5fc116b17f6b7a23d04ff3102507a7 100644 (file)
--- a/src/osd/PGBackend.h
+++ b/src/osd/PGBackend.h
@@ -430,6 +430,15 @@ typedef std::shared_ptr<const OSDMap> OSDMapRef;
     virtual int get_ec_stripe_chunk_size() const { return 0; };
     virtual uint64_t object_size_to_shard_size(const uint64_t size, shard_id_t shard) const { return size; };
     virtual void dump_recovery_info(ceph::Formatter *f) const = 0;
+   virtual bool get_is_nonprimary_shard(shard_id_t shard) const {
+     return false; // Only EC has nonprimary shards.
+   };
+   virtual bool get_is_hinfo_required() const {
+     return false; // Only EC can have hinfo.
+   }
+   virtual bool get_is_ec_optimized() const {
+     return false; // Only EC can have be ec optimized!
+   }
  
   private:
     std::set<hobject_t> temp_contents;
diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc

index 4a6e21d1d509f38d4719fc79e56597f196c831f0..daaa929b9aab18b5bea7bfec36ed17a33df97da0 100644 (file)
--- a/src/osd/osd_types.cc
+++ b/src/osd/osd_types.cc
@@ -6662,6 +6662,8 @@ ostream& operator<<(ostream& out, const object_info_t& oi)
        << " " << oi.alloc_hint_flags << "]";
    if (oi.has_manifest())
      out << " " << oi.manifest;
+  if (!oi.shard_versions.empty())
+    out << " shard_versions=" << oi.shard_versions;
    out << ")";
    return out;
  }
diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h

index 9543256120ea08a07c3a4d7ee671188c53cf1248..492cc96c6019988d07f4aa04a4d2254231649cc1 100644 (file)
--- a/src/osd/osd_types.h
+++ b/src/osd/osd_types.h
@@ -6195,6 +6195,17 @@ struct object_info_t {
      clear_omap_digest();
    }
  
+  eversion_t get_version_for_shard(shard_id_t shard) const {
+    auto iter = shard_versions.find(shard);
+
+    // If the shard_versions is not included, then it is the same as this.
+    if (iter == shard_versions.end()) {
+      return version;
+    }
+    // Otherwise, the shard_versions should be fully populated.
+    return iter->second;
+  }
+
    void encode(ceph::buffer::list& bl, uint64_t features) const;
    void decode(ceph::buffer::list::const_iterator& bl);
    void decode(const ceph::buffer::list& bl) {
diff --git a/src/osd/scrubber/scrub_backend.cc b/src/osd/scrubber/scrub_backend.cc

index bfa439c0e22cca0a13f1ef14468a85e24cb75634..dcabb3476c65c7b3921cebfaf590f24ae8c7f43e 100644 (file)
--- a/src/osd/scrubber/scrub_backend.cc
+++ b/src/osd/scrubber/scrub_backend.cc
@@ -98,7 +98,7 @@ ScrubBackend::ScrubBackend(ScrubBeListener& scrubber,
  }
  
  uint64_t ScrubBackend::logical_to_ondisk_size(uint64_t logical_size,
-                                 int8_t shard_id) const
+                                 shard_id_t shard_id) const
  {
    return m_pg.logical_to_ondisk_size(logical_size, shard_id);
  }
@@ -212,7 +212,7 @@ objs_fix_list_t ScrubBackend::scrub_compare_maps(
    // ok, do the pg-type specific scrubbing
  
    // (Validates consistency of the object info and snap sets)
-  scrub_snapshot_metadata(for_meta_scrub);
+  scrub_snapshot_metadata(for_meta_scrub, m_pg_whoami);
  
    return objs_fix_list_t{std::move(this_chunk->m_inconsistent_objs),
                           scan_snaps(for_meta_scrub, snaps_getter)};
@@ -471,6 +471,13 @@ auth_selection_t ScrubBackend::select_auth_object(const hobject_t& ho,
        // do not emit the returned error message to the log
        dout(15) << fmt::format("{}: {} not found on shard {}", __func__, ho, l)
                 << dendl;
+    } else if (shard_ret.possible_auth == shard_as_auth_t::usable_t::not_usable_no_err) {
+      dout(20) << fmt::format("{}: skipping not_usable_no_err {} {} {}",
+                        __func__,
+                        l,
+                        shard_ret.oi.version,
+                        shard_ret.oi.soid)
+               << dendl;
      } else {
  
        dout(30) << fmt::format("{}: consider using {} srv: {} oi soid: {}",
@@ -620,7 +627,7 @@ shard_as_auth_t ScrubBackend::possible_auth_shard(const hobject_t& obj,
    // We won't pick an auth copy if the snapset is missing or won't decode.
    ceph_assert(!obj.is_snapdir());
  
-  if (obj.is_head()) {
+  if (obj.is_head() && !m_pg.get_is_nonprimary_shard(j_shard)) {
      auto k = smap_obj.attrs.find(SS_ATTR);
      if (dup_error_cond(err,
                         false,
@@ -656,7 +663,7 @@ shard_as_auth_t ScrubBackend::possible_auth_shard(const hobject_t& obj,
      }
    }
  
-  if (!m_is_replicated) {
+  if (m_pg.get_is_hinfo_required()) {
      auto k = smap_obj.attrs.find(ECUtil::get_hinfo_key());
      if (dup_error_cond(err,
                         false,
@@ -724,7 +731,7 @@ shard_as_auth_t ScrubBackend::possible_auth_shard(const hobject_t& obj,
      }
    }
  
-  uint64_t ondisk_size = logical_to_ondisk_size(oi.size, srd.shard.id);
+  uint64_t ondisk_size = logical_to_ondisk_size(oi.size, srd.shard);
    if (test_error_cond(smap_obj.size != ondisk_size, shard_info,
                        &shard_info_wrapper::set_obj_size_info_mismatch)) {
  
@@ -744,8 +751,11 @@ shard_as_auth_t ScrubBackend::possible_auth_shard(const hobject_t& obj,
  
    ceph_assert(!err);
    // note that the error text is made available to the caller, even
-  // for a successful shard selection
-  return shard_as_auth_t{oi, j, errstream.str(), digest};
+  // for a successful shard selection.
+  // Non-primary shards cannot be used as authoritative, but this is not
+  // considered a failure.
+  return shard_as_auth_t{oi, j, errstream.str(), digest,
+                         m_pg.get_is_nonprimary_shard(j_shard)};
  }
  
  // re-implementation of PGBackend::be_compare_scrubmaps()
@@ -1244,7 +1254,7 @@ bool ScrubBackend::compare_obj_details(pg_shard_t auth_shard,
        if (!can_bl.contents_equal(auth_bl)) {
          object_info_t oi(can_bl);
          fmt::format_to(std::back_inserter(out),
-                       "{}object info inconsistent auth_io={} candidate_oi={}",
+                       "{}object info inconsistent auth_oi={} candidate_oi={}",
                         sep(error), auth_oi, oi);
          obj_result.set_object_info_inconsistency();
        }
@@ -1258,7 +1268,7 @@ bool ScrubBackend::compare_obj_details(pg_shard_t auth_shard,
        if (oi.version != auth_oi.get_version_for_shard(shard.shard) ||
              oi.size != auth_oi.size) {
          fmt::format_to(std::back_inserter(out),
-                       "{}object info version incorrect auth_io={} candidate_oi={}",
+                       "{}object info version incorrect auth_oi={} candidate_oi={}",
                         sep(error), auth_oi, oi);
          obj_result.set_object_info_inconsistency();
        }
@@ -1271,7 +1281,7 @@ bool ScrubBackend::compare_obj_details(pg_shard_t auth_shard,
      }
    }
  
-  if (has_snapset) {
+  if (has_snapset && !m_pg.get_is_nonprimary_shard(shard)) {
      if (!shard_result.has_snapset_missing() &&
          !shard_result.has_snapset_corrupted()) {
  
@@ -1294,7 +1304,9 @@ bool ScrubBackend::compare_obj_details(pg_shard_t auth_shard,
  
    // ------------------------------------------------------------------------
  
-  if (!m_is_replicated) {
+  // Only EC can have hinfo
+  // There the below if statement will only be entered true for EC objects
+  if (m_pg.get_is_hinfo_required() && !m_pg.get_is_nonprimary_shard(shard)) {
      if (!shard_result.has_hinfo_missing() &&
          !shard_result.has_hinfo_corrupted()) {
  
@@ -1318,8 +1330,7 @@ bool ScrubBackend::compare_obj_details(pg_shard_t auth_shard,
    // ------------------------------------------------------------------------
  
    // sizes:
-  // NOTE: This will be fixed as a later PR as part of the optimized EC work.
-  uint64_t oi_size = logical_to_ondisk_size(auth_oi.size, 0);
+  uint64_t oi_size = logical_to_ondisk_size(auth_oi.size, shard.shard);
    if (oi_size != candidate.size) {
      fmt::format_to(std::back_inserter(out),
                     "{}size {} != size {} from auth oi {}",
@@ -1330,7 +1341,9 @@ bool ScrubBackend::compare_obj_details(pg_shard_t auth_shard,
      shard_result.set_size_mismatch_info();
    }
  
-  if (auth.size != candidate.size) {
+  // In optimized EC, the different shards are of different sizes, so this test
+  // does not work.  All sizes should have been checked above.
+  if (!m_pg.get_is_ec_optimized() && auth.size != candidate.size) {
      fmt::format_to(std::back_inserter(out),
                     "{}size {} != size {} from shard {}",
                     sep(error),
@@ -1356,42 +1369,45 @@ bool ScrubBackend::compare_obj_details(pg_shard_t auth_shard,
    // ------------------------------------------------------------------------
  
    // comparing the attributes:
+  // Other than OI, Only potential primaries have the attribues.
  
-  for (const auto& [k, v] : auth.attrs) {
-    if (k == OI_ATTR || k[0] != '_') {
-      // We check system keys separately
-      continue;
-    }
+  if (!m_pg.get_is_nonprimary_shard(shard)) {
+    for (const auto& [k, v] : auth.attrs) {
+      if (k == OI_ATTR || k[0] != '_') {
+        // We check system keys separately
+        continue;
+      }
  
-    auto cand = candidate.attrs.find(k);
-    if (cand == candidate.attrs.end()) {
-      fmt::format_to(std::back_inserter(out),
-                    "{}attr name mismatch '{}'",
-                    sep(error),
-                    k);
-      obj_result.set_attr_name_mismatch();
-    } else if (!cand->second.contents_equal(v)) {
-      fmt::format_to(std::back_inserter(out),
-                    "{}attr value mismatch '{}'",
-                    sep(error),
-                    k);
-      obj_result.set_attr_value_mismatch();
+      auto cand = candidate.attrs.find(k);
+      if (cand == candidate.attrs.end()) {
+        fmt::format_to(std::back_inserter(out),
+                       "{}attr name mismatch '{}'",
+                       sep(error),
+                       k);
+        obj_result.set_attr_name_mismatch();
+      } else if (!cand->second.contents_equal(v)) {
+        fmt::format_to(std::back_inserter(out),
+                       "{}attr value mismatch '{}'",
+                       sep(error),
+                       k);
+        obj_result.set_attr_value_mismatch();
+      }
      }
-  }
  
-  for (const auto& [k, v] : candidate.attrs) {
-    if (k == OI_ATTR || k[0] != '_') {
-      // We check system keys separately
-      continue;
-    }
+    for (const auto& [k, v] : candidate.attrs) {
+      if (k == OI_ATTR || k[0] != '_') {
+        // We check system keys separately
+        continue;
+      }
  
-    auto in_auth = auth.attrs.find(k);
-    if (in_auth == auth.attrs.end()) {
-      fmt::format_to(std::back_inserter(out),
-                    "{}attr name mismatch '{}'",
-                    sep(error),
-                    k);
-      obj_result.set_attr_name_mismatch();
+      auto in_auth = auth.attrs.find(k);
+      if (in_auth == auth.attrs.end()) {
+        fmt::format_to(std::back_inserter(out),
+                      "{}attr name mismatch '{}'",
+                      sep(error),
+                      k);
+        obj_result.set_attr_name_mismatch();
+      }
      }
    }
  
@@ -1439,7 +1455,7 @@ static inline bool doing_clones(
   *              [Snapset clones 4]
   * EOL                  obj4 snap 4, (expected)
   */
-void ScrubBackend::scrub_snapshot_metadata(ScrubMap& map)
+void ScrubBackend::scrub_snapshot_metadata(ScrubMap& map, const pg_shard_t &srd)
  {
    dout(10) << __func__ << " num stat obj "
            << m_pg.get_pg_info(ScrubberPasskey{}).stats.stats.sum.num_objects
@@ -1496,13 +1512,12 @@ void ScrubBackend::scrub_snapshot_metadata(ScrubMap& map)
      }
  
      if (oi) {
-      // NOTE: Fix planned as part of the optimized EC work.
-      if (logical_to_ondisk_size(oi->size, 0) != p->second.size) {
+      if (logical_to_ondisk_size(oi->size, srd.shard) != p->second.size) {
          clog.error() << m_mode_desc << " " << m_pg_id << " " << soid
                        << " : on disk size (" << p->second.size
                        << ") does not match object info size (" << oi->size
                        << ") adjusted for ondisk to ("
-                      << logical_to_ondisk_size(oi->size, 0) << ")";
+                      << logical_to_ondisk_size(oi->size, srd.shard) << ")";
          soid_error.set_size_mismatch();
          this_chunk->m_error_counts.shallow_errors++;
        }
diff --git a/src/osd/scrubber/scrub_backend.h b/src/osd/scrubber/scrub_backend.h

index eb8cf8fa8c1aec314ff2beb173024f71693bd004..83abd71981cb4cebee6731bb640a9700ce036f93 100644 (file)
--- a/src/osd/scrubber/scrub_backend.h
+++ b/src/osd/scrubber/scrub_backend.h
@@ -115,7 +115,7 @@ struct objs_fix_list_t {
  struct shard_as_auth_t {
    // note: 'not_found' differs from 'not_usable' in that 'not_found'
    // does not carry an error message to be cluster-logged.
-  enum class usable_t : uint8_t { not_usable, not_found, usable };
+  enum class usable_t : uint8_t { not_usable, not_found, usable, not_usable_no_err };
  
    // the ctor used when the shard should not be considered as auth
    explicit shard_as_auth_t(std::string err_msg)
@@ -147,8 +147,9 @@ struct shard_as_auth_t {
    shard_as_auth_t(const object_info_t& anoi,
                    shard_to_scrubmap_t::iterator it,
                    std::string err_msg,
-                  std::optional<uint32_t> data_digest)
-      : possible_auth{usable_t::usable}
+                  std::optional<uint32_t> data_digest,
+                  bool nonprimary_ec)
+      : possible_auth{nonprimary_ec?usable_t::not_usable_no_err:usable_t::usable}
        , error_text{err_msg}
        , oi{anoi}
        , auth_iter{it}
@@ -192,6 +193,11 @@ struct formatter<shard_as_auth_t> {
        if (as_auth.possible_auth == shard_as_auth_t::usable_t::not_found) {
          return fmt::format_to(ctx.out(), "{{shard-not-found}}");
        }
+      if (as_auth.possible_auth == shard_as_auth_t::usable_t::not_usable_no_err) {
+        return fmt::format_to(ctx.out(),
+                              "{{shard-not-usable-no-err:{}}}",
+                              as_auth.error_text);
+      }
        return fmt::format_to(ctx.out(),
                              "{{shard-usable: soid:{} {{txt:{}}} }}",
                              as_auth.oi.soid,
@@ -359,6 +365,7 @@ class ScrubBackend {
    const spg_t m_pg_id;
    std::vector<pg_shard_t> m_acting_but_me;  // primary only
    bool m_is_replicated{true};
+  bool m_is_optimized_ec{false};
    std::string_view m_mode_desc;
    std::string m_formatted_id;
    const PGPool& m_pool;
@@ -486,7 +493,7 @@ class ScrubBackend {
    /**
     * Validate consistency of the object info and snap sets.
     */
-  void scrub_snapshot_metadata(ScrubMap& map);
+  void scrub_snapshot_metadata(ScrubMap& map, const pg_shard_t &srd);
  
    /**
     *  Updates the "global" (i.e. - not 'per-chunk') databases:
@@ -519,7 +526,7 @@ class ScrubBackend {
  
    // accessing the PG backend for this translation service
    uint64_t logical_to_ondisk_size(uint64_t logical_size,
-                                 int8_t shard_id) const;
+                                 shard_id_t shard_id) const;
  };
  
  namespace fmt {
diff --git a/src/osd/scrubber_common.h b/src/osd/scrubber_common.h

index 917b99f98a192718d30e614ba5d7a789802a5ccb..04933b39420b8a2a20467d87c18d00711e95c6a3 100644 (file)
--- a/src/osd/scrubber_common.h
+++ b/src/osd/scrubber_common.h
@@ -280,10 +280,21 @@ struct PgScrubBeListener {
  
    // query the PG backend for the on-disk size of an object
    virtual uint64_t logical_to_ondisk_size(uint64_t logical_size,
-                                 int8_t shard_id) const = 0;
+                                 shard_id_t shard_id) const = 0;
  
    // used to verify our "cleanliness" before scrubbing
    virtual bool is_waiting_for_unreadable_object() const = 0;
+
+  // A non-primary shard is one which can never become primary. It may
+  // have an old version and cannot be considered authoritative.
+  virtual bool get_is_nonprimary_shard(const pg_shard_t &pg_shard) const = 0;
+
+  // hinfo objects are not used for some EC configurations. Do not raise scrub
+  // errors on hinfo if they should not exist.
+  virtual bool get_is_hinfo_required() const = 0;
+
+  // If true, the EC optimisations have been enabled.
+  virtual bool get_is_ec_optimized() const = 0;
  };
  
  // defining a specific subset of performance counters. Each of the members
diff --git a/src/test/osd/test_scrubber_be.cc b/src/test/osd/test_scrubber_be.cc

index 7afb5c4ba7a4efe9e0f8337cdf6adca4bdc99d08..fc183a682cf8503a012788fac514727ea09ce6b9 100644 (file)
--- a/src/test/osd/test_scrubber_be.cc
+++ b/src/test/osd/test_scrubber_be.cc
@@ -91,7 +91,7 @@ class TestPg : public PgScrubBeListener {
    const pg_info_t& get_pg_info(ScrubberPasskey) const final { return m_info; }
  
    uint64_t logical_to_ondisk_size(uint64_t logical_size,
-                                 int8_t shard_id) const final
+                                  shard_id_t shard_id) const final
    {
      return logical_size;
    }
@@ -101,6 +101,20 @@ class TestPg : public PgScrubBeListener {
    std::shared_ptr<PGPool> m_pool;
    pg_info_t& m_info;
    pg_shard_t m_pshard;
+
+  bool get_is_nonprimary_shard(const pg_shard_t &pg_shard) const final
+  {
+    return get_is_ec_optimized() &&
+           m_pool->info.is_nonprimary_shard(pg_shard.shard);
+  }
+  bool get_is_hinfo_required() const final
+  {
+    return get_is_ec_optimized() &&
+           !m_pool->info.has_flag(m_pool->info.FLAG_EC_OVERWRITES);
+  }
+  bool get_is_ec_optimized() const final {
+    return m_pool->info.has_flag(m_pool->info.FLAG_EC_OPTIMIZATIONS);
+  }
  };
author	Alex Ainscow <aainscow@uk.ibm.com>
	Thu, 8 May 2025 16:52:16 +0000 (17:52 +0100)
committer	Jon Bailey <jonathan.bailey1@ibm.com>
	Thu, 15 May 2025 13:18:35 +0000 (14:18 +0100)
src/osd/ECBackend.cc		patch \| blob \| history
src/osd/ECBackend.h		patch \| blob \| history
src/osd/ECSwitch.h		patch \| blob \| history
src/osd/ECUtil.h		patch \| blob \| history
src/osd/PG.h		patch \| blob \| history
src/osd/PGBackend.h		patch \| blob \| history
src/osd/osd_types.cc		patch \| blob \| history
src/osd/osd_types.h		patch \| blob \| history
src/osd/scrubber/scrub_backend.cc		patch \| blob \| history
src/osd/scrubber/scrub_backend.h		patch \| blob \| history
src/osd/scrubber_common.h		patch \| blob \| history
src/test/osd/test_scrubber_be.cc		patch \| blob \| history