]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
os/bluestore: make retry_max and initial_delay configurable for aio submit_batch 60205/head
authorhaoyixing <haoyixing@kuaishou.com>
Tue, 8 Oct 2024 12:38:11 +0000 (12:38 +0000)
committerhaoyixing <haoyixing@kuaishou.com>
Tue, 11 Feb 2025 02:10:54 +0000 (02:10 +0000)
Default aio submit retry (times and delay) sometimes is not enough for e.g. high density hdd osd under pressure.

Signed-off-by: haoyixing <haoyixing@kuaishou.com>
src/blk/aio/aio.cc
src/blk/aio/aio.h
src/blk/kernel/KernelDevice.cc
src/blk/kernel/io_uring.cc
src/blk/kernel/io_uring.h
src/common/options/global.yaml.in

index 1e6b102f3dc3bb4a1cbd11ee0743434845b7f112..21596f1584cd703c1a738a28f60b1ce419523726 100644 (file)
@@ -17,11 +17,11 @@ std::ostream& operator<<(std::ostream& os, const aio_t& aio)
 
 int aio_queue_t::submit_batch(aio_iter begin, aio_iter end, 
                              void *priv,
-                             int *retries)
+                             int *retries, int submit_retries, int initial_delay_us)
 {
-  // 2^16 * 125us = ~8 seconds, so max sleep is ~16 seconds
-  int attempts = 16;
-  int delay = 125;
+  // 2^16 * 125us = ~8 seconds, so default max sleep is ~16 seconds
+  int attempts = submit_retries;
+  uint64_t delay = initial_delay_us;
   int r;
 
   aio_iter cur = begin;
@@ -74,8 +74,8 @@ int aio_queue_t::submit_batch(aio_iter begin, aio_iter end,
     }
     ceph_assert(r > 0);
     done += r;
-    attempts = 16;
-    delay = 125;
+    attempts = submit_retries;
+    delay = initial_delay_us;
     pushed = pulled = 0;
   }
   return done;
index cf21c416731634066cdea1ef7d3e01a9ad2b7713..f4f5bdde6710ac810cc3648767a95700916113aa 100644 (file)
@@ -101,7 +101,7 @@ struct io_queue_t {
   virtual int init(std::vector<int> &fds) = 0;
   virtual void shutdown() = 0;
   virtual int submit_batch(aio_iter begin, aio_iter end,
-                          void *priv, int *retries) = 0;
+                          void *priv, int *retries, int submit_retries, int initial_delay_us) = 0;
   virtual int get_next_completed(int timeout_ms, aio_t **paio, int max) = 0;
 };
 
@@ -154,6 +154,6 @@ struct aio_queue_t final : public io_queue_t {
   }
 
   int submit_batch(aio_iter begin, aio_iter end,
-                  void *priv, int *retries) final;
+                  void *priv, int *retries, int submit_retries, int initial_delay_us) final;
   int get_next_completed(int timeout_ms, aio_t **paio, int max) final;
 };
index 72921e6d9f08bf2fa9317a0e78547ec1a581207c..12a4af2bd577442ffd2f988541290fb9ed2a643b 100644 (file)
@@ -944,9 +944,15 @@ void KernelDevice::aio_submit(IOContext *ioc)
   }
 
   void *priv = static_cast<void*>(ioc);
+  int retry_max = cct->_conf->bdev_aio_submit_retry_max;
+  int initial_delay_us = cct->_conf->bdev_aio_submit_retry_initial_delay_us;
+  dout(20) << __func__
+          << " bdev_aio_submit_retry_max " << retry_max
+          << " bdev_aio_submit_retry_initial_delay_us " << initial_delay_us
+          << dendl;
   int r, retries = 0;
   r = io_queue->submit_batch(ioc->running_aios.begin(), e,
-                            priv, &retries);
+                            priv, &retries, retry_max, initial_delay_us);
 
   if (retries)
     derr << __func__ << " retries " << retries << dendl;
index be63d63aaf264bcc8687c29090aea5897ed1c6be..9cd06e86e15aa91a0b97f32c0819ae7ad2fc96dc 100644 (file)
@@ -177,7 +177,7 @@ void ioring_queue_t::shutdown()
 
 int ioring_queue_t::submit_batch(aio_iter beg, aio_iter end,
                                  void *priv,
-                                 int *retries)
+                                 int *retries, int submit_retries, int initial_delay_us)
 {
   (void)retries;
 
@@ -245,7 +245,7 @@ void ioring_queue_t::shutdown()
 
 int ioring_queue_t::submit_batch(aio_iter beg, aio_iter end,
                                  void *priv,
-                                 int *retries)
+                                 int *retries, int submit_retries, int initial_delay_us)
 {
   ceph_assert(0);
 }
index dd8f874728d76ba52fb1230b3c382d1d7f6f935a..918328d6cb891bd728a6b53fc91642f427096e6a 100644 (file)
@@ -28,6 +28,6 @@ struct ioring_queue_t final : public io_queue_t {
   void shutdown() final;
 
   int submit_batch(aio_iter begin, aio_iter end,
-                   void *priv, int *retries) final;
+                   void *priv, int *retries, int submit_retries, int initial_delay_us) final;
   int get_next_completed(int timeout_ms, aio_t **paio, int max) final;
 };
index 4b7fdf30f81c5413511a502964ccf8e4a0d2690b..4913c775c1f0af5bf8f7d31344aae152274e0e72 100644 (file)
@@ -4048,6 +4048,16 @@ options:
   level: advanced
   default: 16
   with_legacy: true
+- name: bdev_aio_submit_retry_max
+  type: int
+  level: advanced
+  default: 16
+  with_legacy: true
+- name: bdev_aio_submit_retry_initial_delay_us
+  type: int
+  level: advanced
+  default: 125
+  with_legacy: true
 - name: bdev_block_size
   type: size
   level: advanced