scrubber/pg_scrubber: regularize message interval checks

author Samuel Just <sjust@redhat.com>

Thu, 9 Feb 2023 20:46:00 +0000 (12:46 -0800)

committer Samuel Just <sjust@redhat.com>

Wed, 12 Apr 2023 03:39:19 +0000 (20:39 -0700)
author Samuel Just <sjust@redhat.com>
Thu, 9 Feb 2023 20:46:00 +0000 (12:46 -0800)
committer Samuel Just <sjust@redhat.com>
Wed, 12 Apr 2023 03:39:19 +0000 (20:39 -0700)
diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc

index 93ed71304182959a69181850e87b52a07331b14a..1ff95d19cfea64ad87998fa276610686c5011175 100644 (file)
--- a/src/osd/scrubber/pg_scrubber.cc
+++ b/src/osd/scrubber/pg_scrubber.cc
@@ -94,6 +94,19 @@ bool PgScrubber::check_interval(epoch_t epoch_to_verify) const
    return epoch_to_verify >= m_pg->get_same_interval_since();
  }
  
+bool PgScrubber::should_drop_message(OpRequestRef &op) const
+{
+  if (check_interval(op->sent_epoch)) {
+    return false;
+  } else {
+    dout(10) << __func__ <<  ": discarding message " << *(op->get_req())
+            << " from prior interval, epoch " << op->sent_epoch
+            << ".  Current history.same_interval_since: "
+             << m_pg->info.history.same_interval_since << dendl;
+    return true;
+  }
+}
+
  bool PgScrubber::is_message_relevant(epoch_t epoch_to_verify)
  {
    if (!m_active) {
@@ -1370,18 +1383,7 @@ void PgScrubber::replica_scrub_op(OpRequestRef op)
            << " Msg: map_epoch:" << msg->map_epoch
            << " min_epoch:" << msg->min_epoch << " deep?" << msg->deep << dendl;
  
-  // are we still processing a previous scrub-map request without noticing that
-  // the interval changed? won't see it here, but rather at the reservation
-  // stage.
-
-  if (msg->map_epoch < m_pg->info.history.same_interval_since) {
-    dout(10) << "replica_scrub_op discarding old replica_scrub from "
-            << msg->map_epoch << " < "
-            << m_pg->info.history.same_interval_since << dendl;
-
-    // is there a general sync issue? are we holding a stale reservation?
-    // not checking now - assuming we will actively react to interval change.
-
+  if (should_drop_message(op)) {
      return;
    }
  
@@ -1537,9 +1539,7 @@ void PgScrubber::map_from_replica(OpRequestRef op)
    auto m = op->get_req<MOSDRepScrubMap>();
    dout(15) << __func__ << " " << *m << dendl;
  
-  if (m->map_epoch < m_pg->info.history.same_interval_since) {
-    dout(10) << __func__ << " discarding old from " << m->map_epoch << " < "
-            << m_pg->info.history.same_interval_since << dendl;
+  if (should_drop_message(op)) {
      return;
    }
  
@@ -1571,13 +1571,17 @@ void PgScrubber::handle_scrub_reserve_request(OpRequestRef op)
  {
    dout(10) << __func__ << " " << *op->get_req() << dendl;
    op->mark_started();
-  auto request_ep = op->get_req<MOSDScrubReserve>()->get_map_epoch();
+  auto request_ep = op->sent_epoch;
    dout(20) << fmt::format("{}: request_ep:{} recovery:{}",
                           __func__,
                           request_ep,
                           m_osds->is_recovery_active())
            << dendl;
  
+  if (should_drop_message(op)) {
+    return;
+  }
+
    // Purely a debug check, PgScrubber::on_new_interval should have cleared this
    if (m_remote_osd_resource.has_value()) {
      epoch_t e = m_remote_osd_resource->get_reservation_epoch();
@@ -1587,16 +1591,6 @@ void PgScrubber::handle_scrub_reserve_request(OpRequestRef op)
      }
    }
  
-  if (request_ep < m_pg->get_same_interval_since()) {
-    // will not ack stale requests
-    dout(10) << fmt::format("{}: stale reservation (request ep{} < {}) denied",
-                           __func__,
-                           request_ep,
-                           m_pg->get_same_interval_since())
-            << dendl;
-    return;
-  }
-
    /* The primary may unilaterally restart the scrub process without notifying
     * replicas.  Unconditionally clear any existing state prior to handling
     * the new reservation. */
diff --git a/src/osd/scrubber/pg_scrubber.h b/src/osd/scrubber/pg_scrubber.h

index 5243927c372ee765f3d02275df3fd99c336e999b..c09cdc1d8239b4306dbc81706bed2acb6e172430 100644 (file)
--- a/src/osd/scrubber/pg_scrubber.h
+++ b/src/osd/scrubber/pg_scrubber.h
@@ -689,6 +689,14 @@ class PgScrubber : public ScrubPgIF,
  
    // -----     methods used to verify the relevance of incoming events:
  
+  /**
+   * should_drop_message
+   *
+   * Returns false if message was sent in the current epoch.  Otherwise,
+   * returns true and logs a debug message.
+   */
+  bool should_drop_message(OpRequestRef &op) const;
+
    /**
     *  is the incoming event still relevant and should be forwarded to the FSM?
     *
author	Samuel Just <sjust@redhat.com>
	Thu, 9 Feb 2023 20:46:00 +0000 (12:46 -0800)
committer	Samuel Just <sjust@redhat.com>
	Wed, 12 Apr 2023 03:39:19 +0000 (20:39 -0700)
src/osd/scrubber/pg_scrubber.cc		patch \| blob \| history
src/osd/scrubber/pg_scrubber.h		patch \| blob \| history