osd/: differentiate priority for PGRecovery[Context]

author Samuel Just <sjust@redhat.com>

Thu, 6 Apr 2023 07:04:05 +0000 (00:04 -0700)

committer Sridhar Seshasayee <sseshasa@redhat.com>

Mon, 8 May 2023 09:16:25 +0000 (14:46 +0530)
author Samuel Just <sjust@redhat.com>
Thu, 6 Apr 2023 07:04:05 +0000 (00:04 -0700)
committer Sridhar Seshasayee <sseshasa@redhat.com>
Mon, 8 May 2023 09:16:25 +0000 (14:46 +0530)
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc

index de9ff3e5fa42af0951dd564093d5935a640682b8..68531133203c8e2da65ad1b92c0846b9146b7c66 100644 (file)
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -1640,7 +1640,8 @@ void OSDService::enqueue_front(OpSchedulerItem&& qi)
  void OSDService::queue_recovery_context(
    PG *pg,
    GenContext<ThreadPool::TPHandle&> *c,
-  uint64_t cost)
+  uint64_t cost,
+  int priority)
  {
    epoch_t e = get_osdmap_epoch();
  
@@ -1660,7 +1661,7 @@ void OSDService::queue_recovery_context(
    enqueue_back(
      OpSchedulerItem(
        unique_ptr<OpSchedulerItem::OpQueueable>(
-       new PGRecoveryContext(pg->get_pgid(), c, e)),
+       new PGRecoveryContext(pg->get_pgid(), c, e, priority)),
        cost_for_queue,
        cct->_conf->osd_recovery_priority,
        ceph_clock_now(),
@@ -2005,7 +2006,8 @@ void OSDService::_queue_for_recovery(
         new PGRecovery(
           p.pg->get_pgid(),
           p.epoch_queued,
-          reserved_pushes)),
+          reserved_pushes,
+         p.priority)),
        cost_for_queue,
        cct->_conf->osd_recovery_priority,
        ceph_clock_now(),
@@ -9670,7 +9672,7 @@ unsigned OSDService::get_target_pg_log_entries() const
  }
  
  void OSD::do_recovery(
-  PG *pg, epoch_t queued, uint64_t reserved_pushes,
+  PG *pg, epoch_t queued, uint64_t reserved_pushes, int priority,
    ThreadPool::TPHandle &handle)
  {
    uint64_t started = 0;
@@ -9687,13 +9689,14 @@ void OSD::do_recovery(
      std::lock_guard l(service.sleep_lock);
      if (recovery_sleep > 0 && service.recovery_needs_sleep) {
        PGRef pgref(pg);
-      auto recovery_requeue_callback = new LambdaContext([this, pgref, queued, reserved_pushes](int r) {
+      auto recovery_requeue_callback = new LambdaContext(
+       [this, pgref, queued, reserved_pushes, priority](int r) {
          dout(20) << "do_recovery wake up at "
                   << ceph_clock_now()
                  << ", re-queuing recovery" << dendl;
         std::lock_guard l(service.sleep_lock);
          service.recovery_needs_sleep = false;
-        service.queue_recovery_after_sleep(pgref.get(), queued, reserved_pushes);
+        service.queue_recovery_after_sleep(pgref.get(), queued, reserved_pushes, priority);
        });
  
        // This is true for the first recovery op and when the previous recovery op
diff --git a/src/osd/OSD.h b/src/osd/OSD.h

index 8f205e5a55f722eadf004255b005b92a965bcec0..a1bb82f07b70153b7e24d079875ed6c5d2854c46 100644 (file)
--- a/src/osd/OSD.h
+++ b/src/osd/OSD.h
@@ -538,7 +538,8 @@ public:
    AsyncReserver<spg_t, Finisher> snap_reserver;
    void queue_recovery_context(PG *pg,
                                GenContext<ThreadPool::TPHandle&> *c,
-                              uint64_t cost);
+                              uint64_t cost,
+                             int priority);
    void queue_for_snap_trim(PG *pg);
    void queue_for_scrub(PG* pg, Scrub::scrub_prio_t with_priority);
  
@@ -618,6 +619,7 @@ private:
      const epoch_t epoch_queued;
      PGRef pg;
      const uint64_t cost_per_object;
+    const int priority;
    };
    std::list<pg_awaiting_throttle_t> awaiting_throttle;
  
@@ -680,25 +682,31 @@ public:
    unsigned get_target_pg_log_entries() const;
  
    // delayed pg activation
-  void queue_for_recovery(PG *pg, uint64_t cost_per_object) {
+  void queue_for_recovery(
+    PG *pg, uint64_t cost_per_object,
+    int priority) {
      std::lock_guard l(recovery_lock);
  
      if (pg->is_forced_recovery_or_backfill()) {
        awaiting_throttle.emplace_front(
          pg_awaiting_throttle_t{
-          pg->get_osdmap()->get_epoch(), pg, cost_per_object});
+          pg->get_osdmap()->get_epoch(), pg, cost_per_object, priority});
      } else {
        awaiting_throttle.emplace_back(
          pg_awaiting_throttle_t{
-          pg->get_osdmap()->get_epoch(), pg, cost_per_object});
+          pg->get_osdmap()->get_epoch(), pg, cost_per_object, priority});
      }
      _maybe_queue_recovery();
    }
-  void queue_recovery_after_sleep(PG *pg, epoch_t queued, uint64_t reserved_pushes) {
+  void queue_recovery_after_sleep(
+    PG *pg, epoch_t queued, uint64_t reserved_pushes,
+    int priority) {
      std::lock_guard l(recovery_lock);
      // Send cost as 1 in pg_awaiting_throttle_t below. The cost is ignored
      // as this path is only applicable for WeightedPriorityQueue scheduler.
-    _queue_for_recovery(pg_awaiting_throttle_t{queued, pg, 1}, reserved_pushes);
+    _queue_for_recovery(
+      pg_awaiting_throttle_t{queued, pg, 1, priority},
+      reserved_pushes);
    }
  
    void queue_check_readable(spg_t spgid,
@@ -1941,6 +1949,7 @@ protected:
  
    // -- pg recovery --
    void do_recovery(PG *pg, epoch_t epoch_queued, uint64_t pushes_reserved,
+                  int priority,
                    ThreadPool::TPHandle &handle);
  
  
diff --git a/src/osd/PG.cc b/src/osd/PG.cc

index 667e9c106b28c08364163a3fb4bae3162eb21c36..3b3df466f7c9f0177ee99d2c613514ce13b1c62b 100644 (file)
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -429,7 +429,9 @@ void PG::queue_recovery()
         1, // ensure objects is non-negative and non-zero
         info.stats.stats.sum.num_objects));
      uint64_t cost_per_object = std::max<uint64_t>(num_bytes / num_objects, 1);
-    osd->queue_for_recovery(this, cost_per_object);
+    osd->queue_for_recovery(
+      this, cost_per_object, recovery_state.get_recovery_op_priority()
+    );
    }
  }
  
diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc

index d1f6a4c52b336c8680b8febee2dddc8e45f217e8..63757e232d8b2f4218821a71e0a54e95e5d07ed0 100644 (file)
--- a/src/osd/PrimaryLogPG.cc
+++ b/src/osd/PrimaryLogPG.cc
@@ -528,7 +528,9 @@ void PrimaryLogPG::schedule_recovery_work(
    GenContext<ThreadPool::TPHandle&> *c,
    uint64_t cost)
  {
-  osd->queue_recovery_context(this, c, cost);
+  osd->queue_recovery_context(
+    this, c, cost,
+    recovery_state.get_recovery_op_priority());
  }
  
  void PrimaryLogPG::replica_clear_repop_obc(
diff --git a/src/osd/scheduler/OpSchedulerItem.cc b/src/osd/scheduler/OpSchedulerItem.cc

index 9f834e90778127c3a56f5563ea7daa87e9bb6c31..4eab93bd9c3506c1b8a7575c0d928c0cf9133051 100644 (file)
--- a/src/osd/scheduler/OpSchedulerItem.cc
+++ b/src/osd/scheduler/OpSchedulerItem.cc
@@ -225,7 +225,7 @@ void PGRecovery::run(
    PGRef& pg,
    ThreadPool::TPHandle &handle)
  {
-  osd->do_recovery(pg.get(), epoch_queued, reserved_pushes, handle);
+  osd->do_recovery(pg.get(), epoch_queued, reserved_pushes, priority, handle);
    pg->unlock();
  }
  
diff --git a/src/osd/scheduler/OpSchedulerItem.h b/src/osd/scheduler/OpSchedulerItem.h

index dff3acb608b812c7c657c21126180dd48b7b3576..2bc251c4633a45136a16ec02a81379325c866cda 100644 (file)
--- a/src/osd/scheduler/OpSchedulerItem.h
+++ b/src/osd/scheduler/OpSchedulerItem.h
@@ -502,14 +502,17 @@ class PGScrubChunkIsFree : public PGScrubItem {
  class PGRecovery : public PGOpQueueable {
    epoch_t epoch_queued;
    uint64_t reserved_pushes;
+  int priority;
  public:
    PGRecovery(
      spg_t pg,
      epoch_t epoch_queued,
-    uint64_t reserved_pushes)
+    uint64_t reserved_pushes,
+    int priority)
      : PGOpQueueable(pg),
        epoch_queued(epoch_queued),
-      reserved_pushes(reserved_pushes) {}
+      reserved_pushes(reserved_pushes),
+      priority(priority) {}
    std::ostream &print(std::ostream &rhs) const final {
      return rhs << "PGRecovery(pgid=" << get_pgid()
                << " epoch_queued=" << epoch_queued
@@ -522,18 +525,20 @@ public:
    void run(
      OSD *osd, OSDShard *sdata, PGRef& pg, ThreadPool::TPHandle &handle) final;
    op_scheduler_class get_scheduler_class() const final {
-    return op_scheduler_class::background_recovery;
+    return priority_to_scheduler_class(priority);
    }
  };
  
  class PGRecoveryContext : public PGOpQueueable {
    std::unique_ptr<GenContext<ThreadPool::TPHandle&>> c;
    epoch_t epoch;
+  int priority;
  public:
    PGRecoveryContext(spg_t pgid,
-                   GenContext<ThreadPool::TPHandle&> *c, epoch_t epoch)
+                   GenContext<ThreadPool::TPHandle&> *c, epoch_t epoch,
+                   int priority)
      : PGOpQueueable(pgid),
-      c(c), epoch(epoch) {}
+      c(c), epoch(epoch), priority(priority) {}
    std::ostream &print(std::ostream &rhs) const final {
      return rhs << "PGRecoveryContext(pgid=" << get_pgid()
                << " c=" << c.get() << " epoch=" << epoch
@@ -542,7 +547,7 @@ public:
    void run(
      OSD *osd, OSDShard *sdata, PGRef& pg, ThreadPool::TPHandle &handle) final;
    op_scheduler_class get_scheduler_class() const final {
-    return op_scheduler_class::background_recovery;
+    return priority_to_scheduler_class(priority);
    }
  };
author	Samuel Just <sjust@redhat.com>
	Thu, 6 Apr 2023 07:04:05 +0000 (00:04 -0700)
committer	Sridhar Seshasayee <sseshasa@redhat.com>
	Mon, 8 May 2023 09:16:25 +0000 (14:46 +0530)
src/osd/OSD.cc		patch \| blob \| history
src/osd/OSD.h		patch \| blob \| history
src/osd/PG.cc		patch \| blob \| history
src/osd/PrimaryLogPG.cc		patch \| blob \| history
src/osd/scheduler/OpSchedulerItem.cc		patch \| blob \| history
src/osd/scheduler/OpSchedulerItem.h		patch \| blob \| history