osd/: differentiate scheduler class for undersized/degraded vs data movement

author Samuel Just <sjust@redhat.com>

Thu, 6 Apr 2023 05:57:42 +0000 (22:57 -0700)

committer Sridhar Seshasayee <sseshasa@redhat.com>

Mon, 8 May 2023 10:52:00 +0000 (16:22 +0530)
author Samuel Just <sjust@redhat.com>
Thu, 6 Apr 2023 05:57:42 +0000 (22:57 -0700)
committer Sridhar Seshasayee <sseshasa@redhat.com>
Mon, 8 May 2023 10:52:00 +0000 (16:22 +0530)
diff --git a/src/osd/PeeringState.h b/src/osd/PeeringState.h

index f4c9cd9badb7bb9d71bc0ed663eb4845bd908ae4..583499af899de8a13f51ea87adea6ee71f18a065 100644 (file)
--- a/src/osd/PeeringState.h
+++ b/src/osd/PeeringState.h
@@ -1569,11 +1569,43 @@ public:
    unsigned get_delete_priority();
  
  public:
+  /**
+   * recovery_msg_priority_t
+   *
+   * Defines priority values for use with recovery messages.  The values are
+   * chosen to be reasonable for wpq during an upgrade scenarios, but are
+   * actually translated into a class in PGRecoveryMsg::get_scheduler_class()
+   */
+  enum recovery_msg_priority_t : int {
+    FORCED = 20,
+    UNDERSIZED = 15,
+    DEGRADED = 10,
+    BEST_EFFORT = 5
+  };
+
    /// get message priority for recovery messages
    int get_recovery_op_priority() const {
-    int64_t pri = 0;
-    pool.info.opts.get(pool_opts_t::RECOVERY_OP_PRIORITY, &pri);
-    return  pri > 0 ? pri : cct->_conf->osd_recovery_op_priority;
+    if (cct->_conf->osd_op_queue == "mclock_scheduler") {
+      /* For mclock, we use special priority values which will be
+       * translated into op classes within PGRecoveryMsg::get_scheduler_class
+       */
+      if (is_forced_recovery_or_backfill()) {
+       return recovery_msg_priority_t::FORCED;
+      } else if (is_undersized()) {
+       return recovery_msg_priority_t::UNDERSIZED;
+      } else if (is_degraded()) {
+       return recovery_msg_priority_t::DEGRADED;
+      } else {
+       return recovery_msg_priority_t::BEST_EFFORT;
+      }
+    } else {
+      /* For WeightedPriorityQueue, we use pool or osd config settings to
+       * statically set the priority for recovery messages.  This special
+       * handling should probably be removed after Reef */
+      int64_t pri = 0;
+      pool.info.opts.get(pool_opts_t::RECOVERY_OP_PRIORITY, &pri);
+      return  pri > 0 ? pri : cct->_conf->osd_recovery_op_priority;
+    }
    }
  
  private:
diff --git a/src/osd/scheduler/OpSchedulerItem.h b/src/osd/scheduler/OpSchedulerItem.h

index f5e739a318722b038e53bef634949262741ec9ca..d8339353bfb906d69d3db04c1ea35b1ce74b6935 100644 (file)
--- a/src/osd/scheduler/OpSchedulerItem.h
+++ b/src/osd/scheduler/OpSchedulerItem.h
@@ -194,6 +194,17 @@ protected:
    const spg_t& get_pgid() const {
      return pgid;
    }
+
+  static op_scheduler_class priority_to_scheduler_class(int priority) {
+    if (priority >= CEPH_MSG_PRIO_HIGH) {
+      return op_scheduler_class::immediate;
+    } else if (priority >= PeeringState::recovery_msg_priority_t::DEGRADED) {
+      return op_scheduler_class::background_recovery;
+    } else {
+      return op_scheduler_class::background_best_effort;
+    }
+  }
+
  public:
    explicit PGOpQueueable(spg_t pg) : pgid(pg) {}
    uint32_t get_queue_token() const final {
@@ -581,11 +592,7 @@ public:
    }
  
    op_scheduler_class get_scheduler_class() const final {
-    auto priority = op->get_req()->get_priority();
-    if (priority >= CEPH_MSG_PRIO_HIGH) {
-      return op_scheduler_class::immediate;
-    }
-    return op_scheduler_class::background_recovery;
+    return priority_to_scheduler_class(op->get_req()->get_priority());
    }
  
    void run(OSD *osd, OSDShard *sdata, PGRef& pg, ThreadPool::TPHandle &handle) final;
author	Samuel Just <sjust@redhat.com>
	Thu, 6 Apr 2023 05:57:42 +0000 (22:57 -0700)
committer	Sridhar Seshasayee <sseshasa@redhat.com>
	Mon, 8 May 2023 10:52:00 +0000 (16:22 +0530)
src/osd/PeeringState.h		patch \| blob \| history
src/osd/scheduler/OpSchedulerItem.h		patch \| blob \| history