osd/scrub: make the scrubber-be the sole owner of the authoritative set

author Ronen Friedman <rfriedma@redhat.com>

Mon, 31 Jan 2022 13:25:58 +0000 (13:25 +0000)

committer Ronen Friedman <rfriedma@redhat.com>

Thu, 24 Feb 2022 14:26:37 +0000 (14:26 +0000)
author Ronen Friedman <rfriedma@redhat.com>
Mon, 31 Jan 2022 13:25:58 +0000 (13:25 +0000)
committer Ronen Friedman <rfriedma@redhat.com>
Thu, 24 Feb 2022 14:26:37 +0000 (14:26 +0000)
diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc

index 59db35e0910ad4ecc6bf316882aef065c56ae8dd..f1f0e295be0a08ae51d7d11d226b99b1807e8662 100644 (file)
--- a/src/osd/scrubber/pg_scrubber.cc
+++ b/src/osd/scrubber/pg_scrubber.cc
@@ -1626,7 +1626,8 @@ void PgScrubber::scrub_finish()
    // if the repair request comes from auto-repair and large number of errors,
    // we would like to cancel auto-repair
    if (m_is_repair && m_flags.auto_repair &&
-      m_authoritative.size() > m_pg->cct->_conf->osd_scrub_auto_repair_num_errors) {
+      m_be->authoritative_peers_count() >
+        static_cast<int>(m_pg->cct->_conf->osd_scrub_auto_repair_num_errors)) {
  
      dout(10) << __func__ << " undoing the repair" << dendl;
      state_clear(PG_STATE_REPAIR); // not expected to be set, anyway
@@ -1636,10 +1637,12 @@ void PgScrubber::scrub_finish()
  
    m_be->update_repair_status(m_is_repair);
  
-  // if a regular scrub had errors within the limit, do a deep scrub to auto repair
+  // if a regular scrub had errors within the limit, do a deep scrub to auto
+  // repair
    bool do_auto_scrub = false;
-  if (m_flags.deep_scrub_on_error && !m_authoritative.empty() &&
-      m_authoritative.size() <= m_pg->cct->_conf->osd_scrub_auto_repair_num_errors) {
+  if (m_flags.deep_scrub_on_error && m_be->authoritative_peers_count() &&
+      m_be->authoritative_peers_count() <=
+        static_cast<int>(m_pg->cct->_conf->osd_scrub_auto_repair_num_errors)) {
      ceph_assert(!m_is_deep);
      do_auto_scrub = true;
      dout(15) << __func__ << " Try to auto repair after scrub errors" << dendl;
@@ -1650,9 +1653,34 @@ void PgScrubber::scrub_finish()
    // type-specific finish (can tally more errors)
    _scrub_finish();
  
+  /// \todo fix the relevant scrub test so that we would not need the extra log
+  /// line here (even if the following 'if' is false)
+
+  if (m_be->authoritative_peers_count()) {
+
+    auto err_msg = fmt::format("{} {} {} missing, {} inconsistent objects",
+                               m_pg->info.pgid,
+                               m_mode_desc,
+                               m_be->m_missing.size(),
+                               m_be->m_inconsistent.size());
+
+    dout(2) << err_msg << dendl;
+    m_osds->clog->error() << fmt::to_string(err_msg);
+  }
+
    // note that the PG_STATE_REPAIR might have changed above
-  m_fixed_count += m_be->scrub_process_inconsistent();
-  bool has_error = !m_authoritative.empty() && m_is_repair;
+  if (m_be->authoritative_peers_count() && m_is_repair) {
+
+    state_clear(PG_STATE_CLEAN);
+    // we know we have a problem, so it's OK to set the user-visible flag
+    // even if we only reached here via auto-repair
+    state_set(PG_STATE_REPAIR);
+    update_op_mode_text();
+    m_be->update_repair_status(true);
+    m_fixed_count += m_be->scrub_process_inconsistent();
+  }
+
+  bool has_error = (m_be->authoritative_peers_count() > 0) && m_is_repair;
  
    {
      stringstream oss;
@@ -2103,7 +2131,6 @@ void PgScrubber::reset_internal_state()
  
    run_callbacks();
  
-  m_authoritative.clear();
    num_digest_updates_pending = 0;
    m_primary_scrubmap_pos.reset();
    replica_scrubmap = ScrubMap{};
diff --git a/src/osd/scrubber/scrub_backend.cc b/src/osd/scrubber/scrub_backend.cc

index d19c39782cab9c0e633a0d0f4c3e487cf9373d62..d8d00cbed0a7e73be03770606cc5372c88a0daa8 100644 (file)
--- a/src/osd/scrubber/scrub_backend.cc
+++ b/src/osd/scrubber/scrub_backend.cc
@@ -268,8 +268,8 @@ void ScrubBackend::omap_checks()
  
  /*
   * update_authoritative() updates:
- #
- *  - m_scrubber.m_authoritative: adds obj-> list of pairs < scrub-map, shard>
+ *
+ *  - m_auth_peers: adds obj-> list of pairs < scrub-map, shard>
   *
   *  - m_cleaned_meta_map: replaces [obj] entry with:
   *     the relevant object in the scrub-map of the "selected" (back-most) peer
@@ -284,20 +284,18 @@ void ScrubBackend::update_authoritative()
  
    compare_smaps();  // note: might cluster-log errors
  
-  /// \todo try replacing with algorithm-based code
-
-  // update the scrubber object's m_authoritative with the list of good
+  // update the session-wide m_auth_peers with the list of good
    // peers for each object (i.e. the ones that are in this_chunks's auth list)
    for (auto& [obj, peers] : this_chunk->authoritative) {
  
-    list<pair<ScrubMap::object, pg_shard_t>> good_peers;
+    auth_peers_t good_peers;
  
      for (auto& peer : peers) {
        good_peers.emplace_back(this_chunk->received_maps[peer].objects[obj],
                                peer);
      }
  
-    m_scrubber.m_authoritative.emplace(obj, good_peers);
+    m_auth_peers.emplace(obj, std::move(good_peers));
    }
  
    for (const auto& [obj, peers] : this_chunk->authoritative) {
@@ -360,13 +358,11 @@ int ScrubBackend::scrub_process_inconsistent()
                            __func__,
                            m_mode_desc,
                            m_repair,
-                          m_scrubber.m_authoritative.size())
+                          m_auth_peers.size())
             << dendl;
  
+  ceph_assert(!m_auth_peers.empty());
    // authoritative only store objects which are missing or inconsistent.
-  if (m_scrubber.m_authoritative.empty()) {
-    return 0;
-  }
  
    // some tests expect an error message that does not contain the __func__ and
    // PG:
@@ -847,7 +843,8 @@ std::optional<std::string> ScrubBackend::compare_obj_in_maps(const hobject_t& ho
        ++m_scrubber.m_shallow_errors;
      }
  
-    m_scrubber.m_store->add_object_error(ho.pool, object_error);
+    //m_scrubber.m_store->add_object_error(ho.pool, object_error);
+    this_chunk->m_inconsistent_objs.push_back(std::move(object_error));
      return fmt::format("{} soid {} : failed to pick suitable object info\n",
                         m_scrubber.m_pg_id.pgid,
                         ho);
@@ -1586,7 +1583,7 @@ void ScrubBackend::scrub_snapshot_metadata(ScrubMap& map)
        }
        ++m_scrubber.m_shallow_errors;
        soid_error.set_headless();
-      m_scrubber.m_store->add_snap_error(pool.id, soid_error);
+      this_chunk->m_inconsistent_objs.push_back(std::move(soid_error));
        ++soid_error_count;
        if (head && soid.get_head() == head->get_head())
          head_error.set_clone(soid.snap);
@@ -1604,8 +1601,10 @@ void ScrubBackend::scrub_snapshot_metadata(ScrubMap& map)
        }
  
        // Save previous head error information
-      if (head && (head_error.errors || soid_error_count))
-        m_scrubber.m_store->add_snap_error(pool.id, head_error);
+      if (head && (head_error.errors || soid_error_count)) {
+        this_chunk->m_inconsistent_objs.push_back(std::move(head_error));
+      }
+
        // Set this as a new head object
        head = soid;
        missing = 0;
diff --git a/src/osd/scrubber/scrub_backend.h b/src/osd/scrubber/scrub_backend.h

index c67a1a43accb2b998106409ffad26ec6e17fb394..6cdcb1e02f564f20a4f747b56a71a304f1da0c1d 100644 (file)
--- a/src/osd/scrubber/scrub_backend.h
+++ b/src/osd/scrubber/scrub_backend.h
@@ -352,6 +352,8 @@ class ScrubBackend {
  
    const omap_stat_t& this_scrub_omapstats() const { return m_omap_stats; }
  
+  int authoritative_peers_count() const { return m_auth_peers.size(); };
+
    std::ostream& logger_prefix(std::ostream* _dout, const ScrubBackend* t);
  
   private:
author	Ronen Friedman <rfriedma@redhat.com>
	Mon, 31 Jan 2022 13:25:58 +0000 (13:25 +0000)
committer	Ronen Friedman <rfriedma@redhat.com>
	Thu, 24 Feb 2022 14:26:37 +0000 (14:26 +0000)
src/osd/scrubber/pg_scrubber.cc		patch \| blob \| history
src/osd/scrubber/scrub_backend.cc		patch \| blob \| history
src/osd/scrubber/scrub_backend.h		patch \| blob \| history