osd/scrub: separate between PG state flags and internal scrubber operation

author Ronen Friedman <rfriedma@redhat.com>

Mon, 10 May 2021 13:15:16 +0000 (16:15 +0300)

committer Satoru Takeuchi <satoru.takeuchi@gmail.com>

Thu, 22 Jul 2021 16:26:55 +0000 (16:26 +0000)
author Ronen Friedman <rfriedma@redhat.com>
Mon, 10 May 2021 13:15:16 +0000 (16:15 +0300)
committer Satoru Takeuchi <satoru.takeuchi@gmail.com>
Thu, 22 Jul 2021 16:26:55 +0000 (16:26 +0000)
diff --git a/src/osd/PrimaryLogScrub.cc b/src/osd/PrimaryLogScrub.cc

index 39a16a3a3166fcf4c1e005dd1976ff14ea5cc0c1..1a14dbcd63f3f738de10f1a4406430068faa8d0e 100644 (file)
--- a/src/osd/PrimaryLogScrub.cc
+++ b/src/osd/PrimaryLogScrub.cc
@@ -47,10 +47,6 @@ void PrimaryLogScrub::_scrub_finish()
            << " info stats: " << (info.stats.stats_invalid ? "invalid" : "valid")
            << dendl;
  
-  bool repair = state_test(PG_STATE_REPAIR);
-  bool deep_scrub = state_test(PG_STATE_DEEP_SCRUB);
-  const char* mode = (repair ? "repair" : (deep_scrub ? "deep-scrub" : "scrub"));
-
    if (info.stats.stats_invalid) {
      m_pl_pg->recovery_state.update_stats([=](auto& history, auto& stats) {
        stats.stats = m_scrub_cstat;
@@ -62,7 +58,7 @@ void PrimaryLogScrub::_scrub_finish()
        m_pl_pg->agent_choose_mode();
    }
  
-  dout(10) << mode << " got " << m_scrub_cstat.sum.num_objects << "/"
+  dout(10) << m_mode_desc << " got " << m_scrub_cstat.sum.num_objects << "/"
            << info.stats.stats.sum.num_objects << " objects, "
            << m_scrub_cstat.sum.num_object_clones << "/"
            << info.stats.stats.sum.num_object_clones << " clones, "
@@ -100,7 +96,7 @@ void PrimaryLogScrub::_scrub_finish()
         !info.stats.manifest_stats_invalid) ||
        m_scrub_cstat.sum.num_whiteouts != info.stats.stats.sum.num_whiteouts ||
        m_scrub_cstat.sum.num_bytes != info.stats.stats.sum.num_bytes) {
-    m_osds->clog->error() << info.pgid << " " << mode << " : stat mismatch, got "
+    m_osds->clog->error() << info.pgid << " " << m_mode_desc << " : stat mismatch, got "
                           << m_scrub_cstat.sum.num_objects << "/"
                           << info.stats.stats.sum.num_objects << " objects, "
                           << m_scrub_cstat.sum.num_object_clones << "/"
@@ -125,7 +121,7 @@ void PrimaryLogScrub::_scrub_finish()
                           << " hit_set_archive bytes.";
      ++m_shallow_errors;
  
-    if (repair) {
+    if (m_is_repair) {
        ++m_fixed_count;
        m_pl_pg->recovery_state.update_stats([this](auto& history, auto& stats) {
         stats.stats = m_scrub_cstat;
@@ -142,7 +138,7 @@ void PrimaryLogScrub::_scrub_finish()
      }
    }
    // Clear object context cache to get repair information
-  if (repair)
+  if (m_is_repair)
      m_pl_pg->object_contexts.clear();
  }
  
@@ -157,15 +153,14 @@ void PrimaryLogScrub::log_missing(int missing,
                                   LogChannelRef clog,
                                   const spg_t& pgid,
                                   const char* func,
-                                 const char* mode,
                                   bool allow_incomplete_clones)
  {
    ceph_assert(head);
    if (allow_incomplete_clones) {
-    dout(20) << func << " " << mode << " " << pgid << " " << *head << " skipped "
+    dout(20) << func << " " << m_mode_desc << " " << pgid << " " << *head << " skipped "
              << missing << " clone(s) in cache tier" << dendl;
    } else {
-    clog->info() << mode << " " << pgid << " " << *head << " : " << missing
+    clog->info() << m_mode_desc << " " << pgid << " " << *head << " : " << missing
                  << " missing clone(s)";
    }
  }
@@ -174,7 +169,6 @@ int PrimaryLogScrub::process_clones_to(const std::optional<hobject_t>& head,
                                        const std::optional<SnapSet>& snapset,
                                        LogChannelRef clog,
                                        const spg_t& pgid,
-                                      const char* mode,
                                        bool allow_incomplete_clones,
                                        std::optional<snapid_t> target,
                                        vector<snapid_t>::reverse_iterator* curclone,
@@ -193,7 +187,7 @@ int PrimaryLogScrub::process_clones_to(const std::optional<hobject_t>& head,
      // skip higher-numbered clones in the list.
      if (!allow_incomplete_clones) {
        next_clone.snap = **curclone;
-      clog->error() << mode << " " << pgid << " " << *head << " : expected clone "
+      clog->error() << m_mode_desc << " " << pgid << " " << *head << " : expected clone "
                     << next_clone << " " << m_missing << " missing";
        ++m_shallow_errors;
        e.set_clone_missing(next_clone.snap);
@@ -239,10 +233,6 @@ void PrimaryLogScrub::scrub_snapshot_metadata(ScrubMap& scrubmap,
    const PGPool& pool = m_pl_pg->pool;
    bool allow_incomplete_clones = pool.info.allow_incomplete_clones();
  
-  bool repair = state_test(PG_STATE_REPAIR);
-  bool deep_scrub = state_test(PG_STATE_DEEP_SCRUB);
-  const char* mode = (repair ? "repair" : (deep_scrub ? "deep-scrub" : "scrub"));
-
    std::optional<snapid_t> all_clones;  // Unspecified snapid_t or std::nullopt
  
    // traverse in reverse order.
@@ -274,7 +264,7 @@ void PrimaryLogScrub::scrub_snapshot_metadata(ScrubMap& scrubmap,
      // basic checks.
      if (p->second.attrs.count(OI_ATTR) == 0) {
        oi = std::nullopt;
-      m_osds->clog->error() << mode << " " << info.pgid << " " << soid << " : no '"
+      m_osds->clog->error() << m_mode_desc << " " << info.pgid << " " << soid << " : no '"
                             << OI_ATTR << "' attr";
        ++m_shallow_errors;
        soid_error.set_info_missing();
@@ -286,7 +276,7 @@ void PrimaryLogScrub::scrub_snapshot_metadata(ScrubMap& scrubmap,
         oi->decode(bv);
        } catch (ceph::buffer::error& e) {
         oi = std::nullopt;
-       m_osds->clog->error() << mode << " " << info.pgid << " " << soid
+       m_osds->clog->error() << m_mode_desc << " " << info.pgid << " " << soid
                               << " : can't decode '" << OI_ATTR << "' attr " << e.what();
         ++m_shallow_errors;
         soid_error.set_info_corrupted();
@@ -296,7 +286,7 @@ void PrimaryLogScrub::scrub_snapshot_metadata(ScrubMap& scrubmap,
  
      if (oi) {
        if (m_pl_pg->pgbackend->be_get_ondisk_size(oi->size) != p->second.size) {
-       m_osds->clog->error() << mode << " " << info.pgid << " " << soid
+       m_osds->clog->error() << m_mode_desc << " " << info.pgid << " " << soid
                               << " : on disk size (" << p->second.size
                               << ") does not match object info size (" << oi->size
                               << ") adjusted for ondisk to ("
@@ -305,7 +295,7 @@ void PrimaryLogScrub::scrub_snapshot_metadata(ScrubMap& scrubmap,
         ++m_shallow_errors;
        }
  
-      dout(20) << mode << "  " << soid << " " << *oi << dendl;
+      dout(20) << m_mode_desc << "  " << soid << " " << *oi << dendl;
  
        // A clone num_bytes will be added later when we have snapset
        if (!soid.is_snap()) {
@@ -332,7 +322,7 @@ void PrimaryLogScrub::scrub_snapshot_metadata(ScrubMap& scrubmap,
        // Expecting an object with snap for current head
        if (soid.has_snapset() || soid.get_head() != head->get_head()) {
  
-       dout(10) << __func__ << " " << mode << " " << info.pgid << " new object " << soid
+       dout(10) << __func__ << " " << m_mode_desc << " " << info.pgid << " new object " << soid
                  << " while processing " << *head << dendl;
  
         target = all_clones;
@@ -344,7 +334,7 @@ void PrimaryLogScrub::scrub_snapshot_metadata(ScrubMap& scrubmap,
        // Log any clones we were expecting to be there up to target
        // This will set missing, but will be a no-op if snap.soid == *curclone.
        missing +=
-       process_clones_to(head, snapset, m_osds->clog, info.pgid, mode,
+       process_clones_to(head, snapset, m_osds->clog, info.pgid,
                           allow_incomplete_clones, target, &curclone, head_error);
      }
  
@@ -364,10 +354,10 @@ void PrimaryLogScrub::scrub_snapshot_metadata(ScrubMap& scrubmap,
      if (!expected) {
        // If we couldn't read the head's snapset, just ignore clones
        if (head && !snapset) {
-       m_osds->clog->error() << mode << " " << info.pgid << " " << soid
+       m_osds->clog->error() << m_mode_desc << " " << info.pgid << " " << soid
                               << " : clone ignored due to missing snapset";
        } else {
-       m_osds->clog->error() << mode << " " << info.pgid << " " << soid
+       m_osds->clog->error() << m_mode_desc << " " << info.pgid << " " << soid
                               << " : is an unexpected clone";
        }
        ++m_shallow_errors;
@@ -383,7 +373,7 @@ void PrimaryLogScrub::scrub_snapshot_metadata(ScrubMap& scrubmap,
      if (soid.has_snapset()) {
  
        if (missing) {
-       log_missing(missing, head, m_osds->clog, info.pgid, __func__, mode,
+       log_missing(missing, head, m_osds->clog, info.pgid, __func__,
                     pool.info.allow_incomplete_clones());
        }
  
@@ -396,10 +386,10 @@ void PrimaryLogScrub::scrub_snapshot_metadata(ScrubMap& scrubmap,
        head_error = soid_error;
        soid_error_count = 0;
  
-      dout(20) << __func__ << " " << mode << " new head " << head << dendl;
+      dout(20) << __func__ << " " << m_mode_desc << " new head " << head << dendl;
  
        if (p->second.attrs.count(SS_ATTR) == 0) {
-       m_osds->clog->error() << mode << " " << info.pgid << " " << soid << " : no '"
+       m_osds->clog->error() << m_mode_desc << " " << info.pgid << " " << soid << " : no '"
                               << SS_ATTR << "' attr";
         ++m_shallow_errors;
         snapset = std::nullopt;
@@ -415,7 +405,7 @@ void PrimaryLogScrub::scrub_snapshot_metadata(ScrubMap& scrubmap,
         } catch (ceph::buffer::error& e) {
           snapset = std::nullopt;
           m_osds->clog->error()
-           << mode << " " << info.pgid << " " << soid << " : can't decode '" << SS_ATTR
+           << m_mode_desc << " " << info.pgid << " " << soid << " : can't decode '" << SS_ATTR
             << "' attr " << e.what();
           ++m_shallow_errors;
           head_error.set_snapset_corrupted();
@@ -430,7 +420,7 @@ void PrimaryLogScrub::scrub_snapshot_metadata(ScrubMap& scrubmap,
           dout(20) << "  snapset " << *snapset << dendl;
           if (snapset->seq == 0) {
             m_osds->clog->error()
-             << mode << " " << info.pgid << " " << soid << " : snaps.seq not set";
+             << m_mode_desc << " " << info.pgid << " " << soid << " : snaps.seq not set";
             ++m_shallow_errors;
             head_error.set_snapset_error();
           }
@@ -442,24 +432,24 @@ void PrimaryLogScrub::scrub_snapshot_metadata(ScrubMap& scrubmap,
        ceph_assert(snapset);
        ceph_assert(soid.snap == *curclone);
  
-      dout(20) << __func__ << " " << mode << " matched clone " << soid << dendl;
+      dout(20) << __func__ << " " << m_mode_desc << " matched clone " << soid << dendl;
  
        if (snapset->clone_size.count(soid.snap) == 0) {
-       m_osds->clog->error() << mode << " " << info.pgid << " " << soid
+       m_osds->clog->error() << m_mode_desc << " " << info.pgid << " " << soid
                               << " : is missing in clone_size";
         ++m_shallow_errors;
         soid_error.set_size_mismatch();
        } else {
         if (oi && oi->size != snapset->clone_size[soid.snap]) {
           m_osds->clog->error()
-           << mode << " " << info.pgid << " " << soid << " : size " << oi->size
+           << m_mode_desc << " " << info.pgid << " " << soid << " : size " << oi->size
             << " != clone_size " << snapset->clone_size[*curclone];
           ++m_shallow_errors;
           soid_error.set_size_mismatch();
         }
  
         if (snapset->clone_overlap.count(soid.snap) == 0) {
-         m_osds->clog->error() << mode << " " << info.pgid << " " << soid
+         m_osds->clog->error() << m_mode_desc << " " << info.pgid << " " << soid
                                 << " : is missing in clone_overlap";
           ++m_shallow_errors;
           soid_error.set_size_mismatch();
@@ -482,7 +472,7 @@ void PrimaryLogScrub::scrub_snapshot_metadata(ScrubMap& scrubmap,
           }
  
           if (bad_interval_set) {
-           m_osds->clog->error() << mode << " " << info.pgid << " " << soid
+           m_osds->clog->error() << m_mode_desc << " " << info.pgid << " " << soid
                                   << " : bad interval_set in clone_overlap";
             ++m_shallow_errors;
             soid_error.set_size_mismatch();
@@ -503,18 +493,18 @@ void PrimaryLogScrub::scrub_snapshot_metadata(ScrubMap& scrubmap,
    }
  
    if (doing_clones(snapset, curclone)) {
-    dout(10) << __func__ << " " << mode << " " << info.pgid
+    dout(10) << __func__ << " " << m_mode_desc << " " << info.pgid
              << " No more objects while processing " << *head << dendl;
  
      missing +=
-      process_clones_to(head, snapset, m_osds->clog, info.pgid, mode,
+      process_clones_to(head, snapset, m_osds->clog, info.pgid,
                         allow_incomplete_clones, all_clones, &curclone, head_error);
    }
  
    // There could be missing found by the test above or even
    // before dropping out of the loop for the last head.
    if (missing) {
-    log_missing(missing, head, m_osds->clog, info.pgid, __func__, mode,
+    log_missing(missing, head, m_osds->clog, info.pgid, __func__,
                 allow_incomplete_clones);
    }
    if (head && (head_error.errors || soid_error_count))
@@ -529,12 +519,12 @@ void PrimaryLogScrub::scrub_snapshot_metadata(ScrubMap& scrubmap,
  
      ObjectContextRef obc = m_pl_pg->get_object_context(p->first, false);
      if (!obc) {
-      m_osds->clog->error() << info.pgid << " " << mode
+      m_osds->clog->error() << info.pgid << " " << m_mode_desc
                             << " cannot get object context for object " << p->first;
        continue;
      }
      if (obc->obs.oi.soid != p->first) {
-      m_osds->clog->error() << info.pgid << " " << mode << " " << p->first
+      m_osds->clog->error() << info.pgid << " " << m_mode_desc << " " << p->first
                             << " : object has a valid oi attr with a mismatched name, "
                             << " obc->obs.oi.soid: " << obc->obs.oi.soid;
        continue;
@@ -565,7 +555,7 @@ void PrimaryLogScrub::scrub_snapshot_metadata(ScrubMap& scrubmap,
      m_pl_pg->simple_opc_submit(std::move(ctx));
    }
  
-  dout(10) << __func__ << " (" << mode << ") finish" << dendl;
+  dout(10) << __func__ << " (" << m_mode_desc << ") finish" << dendl;
  }
  
  PrimaryLogScrub::PrimaryLogScrub(PrimaryLogPG* pg) : PgScrubber{pg}, m_pl_pg{pg} {}
diff --git a/src/osd/PrimaryLogScrub.h b/src/osd/PrimaryLogScrub.h

index d24fa09206555ce2f0c05640d568be65145c1ab7..2cf0c71c300f797d5c5beb848c0485cb58412278 100644 (file)
--- a/src/osd/PrimaryLogScrub.h
+++ b/src/osd/PrimaryLogScrub.h
@@ -62,14 +62,12 @@ class PrimaryLogScrub : public PgScrubber {
                    LogChannelRef clog,
                    const spg_t& pgid,
                    const char* func,
-                  const char* mode,
                    bool allow_incomplete_clones);
  
    int process_clones_to(const std::optional<hobject_t>& head,
                         const std::optional<SnapSet>& snapset,
                         LogChannelRef clog,
                         const spg_t& pgid,
-                       const char* mode,
                         bool allow_incomplete_clones,
                         std::optional<snapid_t> target,
                         std::vector<snapid_t>::reverse_iterator* curclone,
diff --git a/src/osd/pg_scrubber.cc b/src/osd/pg_scrubber.cc

index 9a5dba5085f78168461d3ab6118b310de73eba69..089a026b08e502417900348f6786cfe23cc686f9 100644 (file)
--- a/src/osd/pg_scrubber.cc
+++ b/src/osd/pg_scrubber.cc
@@ -333,6 +333,7 @@ void PgScrubber::reset_epoch(epoch_t epoch_queued)
    m_epoch_start = epoch_queued;
    m_needs_sleep = true;
    m_is_deep = state_test(PG_STATE_DEEP_SCRUB);
+  update_op_mode_text();
  }
  
  unsigned int PgScrubber::scrub_requeue_priority(Scrub::scrub_prio_t with_priority) const
@@ -744,6 +745,16 @@ std::string PgScrubber::dump_awaited_maps() const
    return m_maps_status.dump();
  }
  
+void PgScrubber::update_op_mode_text()
+{
+  auto visible_repair = state_test(PG_STATE_REPAIR);
+  m_mode_desc = (visible_repair ? "repair"sv : (m_is_deep ? "deep-scrub"sv : "scrub"sv));
+
+  dout(10) << __func__ << ": repair: visible: " << (visible_repair ? "true" : "false")
+          << ", internal: " << (m_is_repair ? "true" : "false")
+          << ". Displayed: " << m_mode_desc << dendl;
+}
+
  void PgScrubber::_request_scrub_map(pg_shard_t replica,
                                     eversion_t version,
                                     hobject_t start,
@@ -1133,8 +1144,15 @@ void PgScrubber::set_op_parameters(requested_scrub_t& request)
      state_set(PG_STATE_DEEP_SCRUB);
    }
  
-  if (request.must_repair || m_flags.auto_repair) {
+  // m_is_repair is set for either 'must_repair' or 'repair-on-the-go' (i.e.
+  // deep-scrub with the auto_repair configuration flag set). m_is_repair value
+  // determines the scrubber behavior.
+  // PG_STATE_REPAIR, on the other hand, is only used for status reports (inc. the
+  // PG status as appearing in the logs).
+  m_is_repair = request.must_repair || m_flags.auto_repair;
+  if (request.must_repair) {
      state_set(PG_STATE_REPAIR);
+    // not calling update_op_mode_text() yet, as m_is_deep not set yet
    }
  
    // the publishing here seems to be required for tests synchronization
@@ -1191,7 +1209,7 @@ void PgScrubber::scrub_compare_maps()
      ss.clear();
  
      m_pg->get_pgbackend()->be_compare_scrubmaps(
-      maps, master_set, state_test(PG_STATE_REPAIR), m_missing, m_inconsistent,
+      maps, master_set, m_is_repair, m_missing, m_inconsistent,
        authoritative, missing_digest, m_shallow_errors, m_deep_errors, m_store.get(),
        m_pg->info.pgid, m_pg->recovery_state.get_acting(), ss);
      dout(2) << ss.str() << dendl;
@@ -1228,7 +1246,7 @@ void PgScrubber::scrub_compare_maps()
  
    if (!m_store->empty()) {
  
-    if (state_test(PG_STATE_REPAIR)) {
+    if (m_is_repair) {
        dout(10) << __func__ << ": discarding scrub results" << dendl;
        m_store->flush(nullptr);
      } else {
@@ -1439,25 +1457,25 @@ void PgScrubber::unreserve_replicas()
  
  [[nodiscard]] bool PgScrubber::scrub_process_inconsistent()
  {
-  dout(10) << __func__ << ": checking authoritative" << dendl;
-
-  bool repair = state_test(PG_STATE_REPAIR);
-  const bool deep_scrub = state_test(PG_STATE_DEEP_SCRUB);
-  const char* mode = (repair ? "repair" : (deep_scrub ? "deep-scrub" : "scrub"));
-  dout(20) << __func__ << " deep_scrub: " << deep_scrub << " m_is_deep: " << m_is_deep
-          << " repair: " << repair << dendl;
+  dout(10) << __func__ << ": checking authoritative (mode="
+          << m_mode_desc << ", auth remaining #: " << m_authoritative.size()
+          << ")" << dendl;
  
    // authoritative only store objects which are missing or inconsistent.
    if (!m_authoritative.empty()) {
  
      stringstream ss;
-    ss << m_pg->info.pgid << " " << mode << " " << m_missing.size() << " missing, "
+    ss << m_pg->info.pgid << " " << m_mode_desc << " " << m_missing.size() << " missing, "
         << m_inconsistent.size() << " inconsistent objects";
      dout(2) << ss.str() << dendl;
      m_osds->clog->error(ss);
  
-    if (repair) {
+    if (m_is_repair) {
        state_clear(PG_STATE_CLEAN);
+      // we know we have a problem, so it's OK to set the user-visible flag
+      // even if we only reached here via auto-repair
+      state_set(PG_STATE_REPAIR);
+      update_op_mode_text();
  
        for (const auto& [hobj, shrd_list] : m_authoritative) {
  
@@ -1475,7 +1493,7 @@ void PgScrubber::unreserve_replicas()
        }
      }
    }
-  return (!m_authoritative.empty() && repair);
+  return (!m_authoritative.empty() && m_is_repair);
  }
  
  /*
@@ -1492,24 +1510,21 @@ void PgScrubber::scrub_finish()
  
    // if the repair request comes from auto-repair and large number of errors,
    // we would like to cancel auto-repair
-
-  bool repair = state_test(PG_STATE_REPAIR);
-  if (repair && m_flags.auto_repair &&
+  if (m_is_repair && m_flags.auto_repair &&
        m_authoritative.size() > m_pg->cct->_conf->osd_scrub_auto_repair_num_errors) {
  
      dout(10) << __func__ << " undoing the repair" << dendl;
-    state_clear(PG_STATE_REPAIR);
-    repair = false;
+    state_clear(PG_STATE_REPAIR); // not expected to be set, anyway
+    m_is_repair = false;
+    update_op_mode_text();
    }
  
-  bool deep_scrub = state_test(PG_STATE_DEEP_SCRUB);
-  const char* mode = (repair ? "repair" : (deep_scrub ? "deep-scrub" : "scrub"));
    bool do_auto_scrub = false;
  
    // if a regular scrub had errors within the limit, do a deep scrub to auto repair
    if (m_flags.deep_scrub_on_error && m_authoritative.size() &&
        m_authoritative.size() <= m_pg->cct->_conf->osd_scrub_auto_repair_num_errors) {
-    ceph_assert(!deep_scrub);
+    ceph_assert(!m_is_deep);
      do_auto_scrub = true;
      dout(15) << __func__ << " Try to auto repair after scrub errors" << dendl;
    }
@@ -1523,16 +1538,16 @@ void PgScrubber::scrub_finish()
  
    {
      stringstream oss;
-    oss << m_pg->info.pgid.pgid << " " << mode << " ";
+    oss << m_pg->info.pgid.pgid << " " << m_mode_desc << " ";
      int total_errors = m_shallow_errors + m_deep_errors;
      if (total_errors)
        oss << total_errors << " errors";
      else
        oss << "ok";
-    if (!deep_scrub && m_pg->info.stats.stats.sum.num_deep_scrub_errors)
+    if (!m_is_deep && m_pg->info.stats.stats.sum.num_deep_scrub_errors)
        oss << " ( " << m_pg->info.stats.stats.sum.num_deep_scrub_errors
           << " remaining deep scrub error details lost)";
-    if (repair)
+    if (m_is_repair)
        oss << ", " << m_fixed_count << " fixed";
      if (total_errors)
        m_osds->clog->error(oss);
@@ -1542,10 +1557,10 @@ void PgScrubber::scrub_finish()
  
    // Since we don't know which errors were fixed, we can only clear them
    // when every one has been fixed.
-  if (repair) {
+  if (m_is_repair) {
      if (m_fixed_count == m_shallow_errors + m_deep_errors) {
  
-      ceph_assert(deep_scrub);
+      ceph_assert(m_is_deep);
        m_shallow_errors = 0;
        m_deep_errors = 0;
        dout(20) << __func__ << " All may be fixed" << dendl;
@@ -1574,7 +1589,7 @@ void PgScrubber::scrub_finish()
      // finish up
      ObjectStore::Transaction t;
      m_pg->recovery_state.update_stats(
-      [this, deep_scrub](auto& history, auto& stats) {
+      [this](auto& history, auto& stats) {
         dout(10) << "m_pg->recovery_state.update_stats()" << dendl;
         utime_t now = ceph_clock_now();
         history.last_scrub = m_pg->recovery_state.get_info().last_update;
@@ -1584,7 +1599,7 @@ void PgScrubber::scrub_finish()
           history.last_deep_scrub_stamp = now;
         }
  
-       if (deep_scrub) {
+       if (m_is_deep) {
           if ((m_shallow_errors == 0) && (m_deep_errors == 0))
             history.last_clean_scrub_stamp = now;
           stats.stats.sum.num_shallow_scrub_errors = m_shallow_errors;
@@ -1628,7 +1643,9 @@ void PgScrubber::scrub_finish()
      m_pg->queue_peering_event(PGPeeringEventRef(std::make_shared<PGPeeringEvent>(
        get_osdmap_epoch(), get_osdmap_epoch(), PeeringState::DoRecovery())));
    } else {
+    m_is_repair = false;
      state_clear(PG_STATE_REPAIR);
+    update_op_mode_text();
    }
  
    cleanup_on_finish();
diff --git a/src/osd/pg_scrubber.h b/src/osd/pg_scrubber.h

index bbbac38ca7565f2fe06e0cc76fa9468cbf1f813e..176a00a23e9f926102d988363d29e4ccb3ee01e4 100644 (file)
--- a/src/osd/pg_scrubber.h
+++ b/src/osd/pg_scrubber.h
@@ -559,7 +559,7 @@ class PgScrubber : public ScrubPgIF, public ScrubMachineListener {
    /// Maps from objects with errors to missing peers
    HobjToShardSetMapping m_missing;
  
- private:
+ protected:
    /**
     * 'm_is_deep' - is the running scrub a deep one?
     *
@@ -570,6 +570,33 @@ class PgScrubber : public ScrubPgIF, public ScrubMachineListener {
     */
    bool m_is_deep{false};
  
+  /**
+   * If set: affects the backend & scrubber-backend functions called after all
+   * scrub maps are available.
+   *
+   * Replaces code that directly checks PG_STATE_REPAIR (which was meant to be
+   * a "user facing" status display only).
+   */
+  bool m_is_repair{false};
+
+  /**
+   * User-readable summary of the scrubber's current mode of operation. Used for
+   * both osd.*.log and the cluster log.
+   * One of:
+   *    "repair"
+   *    "deep-scrub",
+   *    "scrub
+   *
+   * Note: based on PG_STATE_REPAIR, and not on m_is_repair. I.e. for
+   * auto_repair will show as "deep-scrub" and not as "repair" (until the first error
+   * is detected).
+   */
+  std::string_view m_mode_desc;
+
+  void update_op_mode_text();
+
+private:
+
    /**
     * initiate a deep-scrub after the current scrub ended with errors.
     */
author	Ronen Friedman <rfriedma@redhat.com>
	Mon, 10 May 2021 13:15:16 +0000 (16:15 +0300)
committer	Satoru Takeuchi <satoru.takeuchi@gmail.com>
	Thu, 22 Jul 2021 16:26:55 +0000 (16:26 +0000)
src/osd/PrimaryLogScrub.cc		patch \| blob \| history
src/osd/PrimaryLogScrub.h		patch \| blob \| history
src/osd/pg_scrubber.cc		patch \| blob \| history
src/osd/pg_scrubber.h		patch \| blob \| history