osd: Update PGSnapTrim op queue item cost to reflect average object size

author Sridhar Seshasayee <sseshasa@redhat.com>

Mon, 20 Nov 2023 13:17:14 +0000 (18:47 +0530)

committer Sridhar Seshasayee <sseshasa@redhat.com>

Tue, 2 Jan 2024 07:44:21 +0000 (13:14 +0530)
author Sridhar Seshasayee <sseshasa@redhat.com>
Mon, 20 Nov 2023 13:17:14 +0000 (18:47 +0530)
committer Sridhar Seshasayee <sseshasa@redhat.com>
Tue, 2 Jan 2024 07:44:21 +0000 (13:14 +0530)
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc

index c61e7d33218abaa74b5ea02d736d03ca0217c4b7..3d2e32ce0ec4ff9fa73b700c12ea0cf085d3ec6b 100644 (file)
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -1726,14 +1726,32 @@ void OSDService::queue_recovery_context(
        e));
  }
  
-void OSDService::queue_for_snap_trim(PG *pg)
+void OSDService::queue_for_snap_trim(PG *pg, uint64_t cost_per_object)
  {
    dout(10) << "queueing " << *pg << " for snaptrim" << dendl;
+  uint64_t cost_for_queue = [this, cost_per_object] {
+    if (cct->_conf->osd_op_queue == "mclock_scheduler") {
+      /* The cost calculation is valid for most snap trim iterations except
+       * for the following cases:
+       * 1) The penultimate iteration which may return 1 object to trim, in
+       *    which case the cost will be off by a factor equivalent to the
+       *    average object size, and,
+       * 2) The final iteration which returns -ENOENT and performs clean-ups.
+       */
+      return cost_per_object * cct->_conf->osd_pg_max_concurrent_snap_trims;
+    } else {
+      /* We retain this legacy behavior for WeightedPriorityQueue.
+       * This branch should be removed after Squid.
+       */
+      return cct->_conf->osd_snap_trim_cost;
+    }
+  }();
+
    enqueue_back(
      OpSchedulerItem(
        unique_ptr<OpSchedulerItem::OpQueueable>(
         new PGSnapTrim(pg->get_pgid(), pg->get_osdmap_epoch())),
-      cct->_conf->osd_snap_trim_cost,
+      cost_for_queue,
        cct->_conf->osd_snap_trim_priority,
        ceph_clock_now(),
        0,
diff --git a/src/osd/OSD.h b/src/osd/OSD.h

index 00fab7ec83ed6947eb9a56ff54471fcfb3b6d368..2db3bc87f3deb22de08a13500b85db37a6d08601 100644 (file)
--- a/src/osd/OSD.h
+++ b/src/osd/OSD.h
@@ -511,7 +511,7 @@ public:
                                GenContext<ThreadPool::TPHandle&> *c,
                                uint64_t cost,
                               int priority);
-  void queue_for_snap_trim(PG *pg);
+  void queue_for_snap_trim(PG *pg, uint64_t cost);
    void queue_for_scrub(PG* pg, Scrub::scrub_prio_t with_priority);
  
    void queue_scrub_after_repair(PG* pg, Scrub::scrub_prio_t with_priority);
diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc

index 075b2f3ef18d88270e51b431427d463dbdc3e2ad..d37f07745ed90c7de80e24cf439ac578d41adf33 100644 (file)
--- a/src/osd/PrimaryLogPG.cc
+++ b/src/osd/PrimaryLogPG.cc
@@ -15627,8 +15627,10 @@ PrimaryLogPG::AwaitAsyncWork::AwaitAsyncWork(my_context ctx)
      NamedState(nullptr, "Trimming/AwaitAsyncWork")
  {
    auto *pg = context< SnapTrimmer >().pg;
+  // Determine cost in terms of the average object size
+  uint64_t cost_per_object = pg->get_average_object_size();
    context< SnapTrimmer >().log_enter(state_name);
-  context< SnapTrimmer >().pg->osd->queue_for_snap_trim(pg);
+  context< SnapTrimmer >().pg->osd->queue_for_snap_trim(pg, cost_per_object);
    pg->state_set(PG_STATE_SNAPTRIM);
    pg->state_clear(PG_STATE_SNAPTRIM_ERROR);
    pg->publish_stats_to_osd();
author	Sridhar Seshasayee <sseshasa@redhat.com>
	Mon, 20 Nov 2023 13:17:14 +0000 (18:47 +0530)
committer	Sridhar Seshasayee <sseshasa@redhat.com>
	Tue, 2 Jan 2024 07:44:21 +0000 (13:14 +0530)
src/osd/OSD.cc		patch \| blob \| history
src/osd/OSD.h		patch \| blob \| history
src/osd/PrimaryLogPG.cc		patch \| blob \| history