]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/scrub: make the scrubber-be the sole owner of the authoritative set
authorRonen Friedman <rfriedma@redhat.com>
Mon, 31 Jan 2022 13:25:58 +0000 (13:25 +0000)
committerRonen Friedman <rfriedma@redhat.com>
Thu, 24 Feb 2022 14:26:37 +0000 (14:26 +0000)
simplifying the interfaces used by the scrubber backend.

Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
src/osd/scrubber/pg_scrubber.cc
src/osd/scrubber/scrub_backend.cc
src/osd/scrubber/scrub_backend.h

index 59db35e0910ad4ecc6bf316882aef065c56ae8dd..f1f0e295be0a08ae51d7d11d226b99b1807e8662 100644 (file)
@@ -1626,7 +1626,8 @@ void PgScrubber::scrub_finish()
   // if the repair request comes from auto-repair and large number of errors,
   // we would like to cancel auto-repair
   if (m_is_repair && m_flags.auto_repair &&
-      m_authoritative.size() > m_pg->cct->_conf->osd_scrub_auto_repair_num_errors) {
+      m_be->authoritative_peers_count() >
+        static_cast<int>(m_pg->cct->_conf->osd_scrub_auto_repair_num_errors)) {
 
     dout(10) << __func__ << " undoing the repair" << dendl;
     state_clear(PG_STATE_REPAIR); // not expected to be set, anyway
@@ -1636,10 +1637,12 @@ void PgScrubber::scrub_finish()
 
   m_be->update_repair_status(m_is_repair);
 
-  // if a regular scrub had errors within the limit, do a deep scrub to auto repair
+  // if a regular scrub had errors within the limit, do a deep scrub to auto
+  // repair
   bool do_auto_scrub = false;
-  if (m_flags.deep_scrub_on_error && !m_authoritative.empty() &&
-      m_authoritative.size() <= m_pg->cct->_conf->osd_scrub_auto_repair_num_errors) {
+  if (m_flags.deep_scrub_on_error && m_be->authoritative_peers_count() &&
+      m_be->authoritative_peers_count() <=
+        static_cast<int>(m_pg->cct->_conf->osd_scrub_auto_repair_num_errors)) {
     ceph_assert(!m_is_deep);
     do_auto_scrub = true;
     dout(15) << __func__ << " Try to auto repair after scrub errors" << dendl;
@@ -1650,9 +1653,34 @@ void PgScrubber::scrub_finish()
   // type-specific finish (can tally more errors)
   _scrub_finish();
 
+  /// \todo fix the relevant scrub test so that we would not need the extra log
+  /// line here (even if the following 'if' is false)
+
+  if (m_be->authoritative_peers_count()) {
+
+    auto err_msg = fmt::format("{} {} {} missing, {} inconsistent objects",
+                               m_pg->info.pgid,
+                               m_mode_desc,
+                               m_be->m_missing.size(),
+                               m_be->m_inconsistent.size());
+
+    dout(2) << err_msg << dendl;
+    m_osds->clog->error() << fmt::to_string(err_msg);
+  }
+
   // note that the PG_STATE_REPAIR might have changed above
-  m_fixed_count += m_be->scrub_process_inconsistent();
-  bool has_error = !m_authoritative.empty() && m_is_repair;
+  if (m_be->authoritative_peers_count() && m_is_repair) {
+
+    state_clear(PG_STATE_CLEAN);
+    // we know we have a problem, so it's OK to set the user-visible flag
+    // even if we only reached here via auto-repair
+    state_set(PG_STATE_REPAIR);
+    update_op_mode_text();
+    m_be->update_repair_status(true);
+    m_fixed_count += m_be->scrub_process_inconsistent();
+  }
+
+  bool has_error = (m_be->authoritative_peers_count() > 0) && m_is_repair;
 
   {
     stringstream oss;
@@ -2103,7 +2131,6 @@ void PgScrubber::reset_internal_state()
 
   run_callbacks();
 
-  m_authoritative.clear();
   num_digest_updates_pending = 0;
   m_primary_scrubmap_pos.reset();
   replica_scrubmap = ScrubMap{};
index d19c39782cab9c0e633a0d0f4c3e487cf9373d62..d8d00cbed0a7e73be03770606cc5372c88a0daa8 100644 (file)
@@ -268,8 +268,8 @@ void ScrubBackend::omap_checks()
 
 /*
  * update_authoritative() updates:
- #
- *  - m_scrubber.m_authoritative: adds obj-> list of pairs < scrub-map, shard>
+ *
+ *  - m_auth_peers: adds obj-> list of pairs < scrub-map, shard>
  *
  *  - m_cleaned_meta_map: replaces [obj] entry with:
  *     the relevant object in the scrub-map of the "selected" (back-most) peer
@@ -284,20 +284,18 @@ void ScrubBackend::update_authoritative()
 
   compare_smaps();  // note: might cluster-log errors
 
-  /// \todo try replacing with algorithm-based code
-
-  // update the scrubber object's m_authoritative with the list of good
+  // update the session-wide m_auth_peers with the list of good
   // peers for each object (i.e. the ones that are in this_chunks's auth list)
   for (auto& [obj, peers] : this_chunk->authoritative) {
 
-    list<pair<ScrubMap::object, pg_shard_t>> good_peers;
+    auth_peers_t good_peers;
 
     for (auto& peer : peers) {
       good_peers.emplace_back(this_chunk->received_maps[peer].objects[obj],
                               peer);
     }
 
-    m_scrubber.m_authoritative.emplace(obj, good_peers);
+    m_auth_peers.emplace(obj, std::move(good_peers));
   }
 
   for (const auto& [obj, peers] : this_chunk->authoritative) {
@@ -360,13 +358,11 @@ int ScrubBackend::scrub_process_inconsistent()
                           __func__,
                           m_mode_desc,
                           m_repair,
-                          m_scrubber.m_authoritative.size())
+                          m_auth_peers.size())
            << dendl;
 
+  ceph_assert(!m_auth_peers.empty());
   // authoritative only store objects which are missing or inconsistent.
-  if (m_scrubber.m_authoritative.empty()) {
-    return 0;
-  }
 
   // some tests expect an error message that does not contain the __func__ and
   // PG:
@@ -847,7 +843,8 @@ std::optional<std::string> ScrubBackend::compare_obj_in_maps(const hobject_t& ho
       ++m_scrubber.m_shallow_errors;
     }
 
-    m_scrubber.m_store->add_object_error(ho.pool, object_error);
+    //m_scrubber.m_store->add_object_error(ho.pool, object_error);
+    this_chunk->m_inconsistent_objs.push_back(std::move(object_error));
     return fmt::format("{} soid {} : failed to pick suitable object info\n",
                        m_scrubber.m_pg_id.pgid,
                        ho);
@@ -1586,7 +1583,7 @@ void ScrubBackend::scrub_snapshot_metadata(ScrubMap& map)
       }
       ++m_scrubber.m_shallow_errors;
       soid_error.set_headless();
-      m_scrubber.m_store->add_snap_error(pool.id, soid_error);
+      this_chunk->m_inconsistent_objs.push_back(std::move(soid_error));
       ++soid_error_count;
       if (head && soid.get_head() == head->get_head())
         head_error.set_clone(soid.snap);
@@ -1604,8 +1601,10 @@ void ScrubBackend::scrub_snapshot_metadata(ScrubMap& map)
       }
 
       // Save previous head error information
-      if (head && (head_error.errors || soid_error_count))
-        m_scrubber.m_store->add_snap_error(pool.id, head_error);
+      if (head && (head_error.errors || soid_error_count)) {
+        this_chunk->m_inconsistent_objs.push_back(std::move(head_error));
+      }
+
       // Set this as a new head object
       head = soid;
       missing = 0;
index c67a1a43accb2b998106409ffad26ec6e17fb394..6cdcb1e02f564f20a4f747b56a71a304f1da0c1d 100644 (file)
@@ -352,6 +352,8 @@ class ScrubBackend {
 
   const omap_stat_t& this_scrub_omapstats() const { return m_omap_stats; }
 
+  int authoritative_peers_count() const { return m_auth_peers.size(); };
+
   std::ostream& logger_prefix(std::ostream* _dout, const ScrubBackend* t);
 
  private: