From: haoyixing Date: Tue, 8 Oct 2024 12:38:11 +0000 (+0000) Subject: os/bluestore: make retry_max and initial_delay configurable for aio submit_batch X-Git-Tag: v20.0.0~122^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=3667f4ab795c5a3630aa423e66b133b4967d2afe;p=ceph.git os/bluestore: make retry_max and initial_delay configurable for aio submit_batch Default aio submit retry (times and delay) sometimes is not enough for e.g. high density hdd osd under pressure. Signed-off-by: haoyixing --- diff --git a/src/blk/aio/aio.cc b/src/blk/aio/aio.cc index 1e6b102f3dc3..21596f1584cd 100644 --- a/src/blk/aio/aio.cc +++ b/src/blk/aio/aio.cc @@ -17,11 +17,11 @@ std::ostream& operator<<(std::ostream& os, const aio_t& aio) int aio_queue_t::submit_batch(aio_iter begin, aio_iter end, void *priv, - int *retries) + int *retries, int submit_retries, int initial_delay_us) { - // 2^16 * 125us = ~8 seconds, so max sleep is ~16 seconds - int attempts = 16; - int delay = 125; + // 2^16 * 125us = ~8 seconds, so default max sleep is ~16 seconds + int attempts = submit_retries; + uint64_t delay = initial_delay_us; int r; aio_iter cur = begin; @@ -74,8 +74,8 @@ int aio_queue_t::submit_batch(aio_iter begin, aio_iter end, } ceph_assert(r > 0); done += r; - attempts = 16; - delay = 125; + attempts = submit_retries; + delay = initial_delay_us; pushed = pulled = 0; } return done; diff --git a/src/blk/aio/aio.h b/src/blk/aio/aio.h index cf21c4167316..f4f5bdde6710 100644 --- a/src/blk/aio/aio.h +++ b/src/blk/aio/aio.h @@ -101,7 +101,7 @@ struct io_queue_t { virtual int init(std::vector &fds) = 0; virtual void shutdown() = 0; virtual int submit_batch(aio_iter begin, aio_iter end, - void *priv, int *retries) = 0; + void *priv, int *retries, int submit_retries, int initial_delay_us) = 0; virtual int get_next_completed(int timeout_ms, aio_t **paio, int max) = 0; }; @@ -154,6 +154,6 @@ struct aio_queue_t final : public io_queue_t { } int submit_batch(aio_iter begin, aio_iter end, - void *priv, int *retries) final; + void *priv, int *retries, int submit_retries, int initial_delay_us) final; int get_next_completed(int timeout_ms, aio_t **paio, int max) final; }; diff --git a/src/blk/kernel/KernelDevice.cc b/src/blk/kernel/KernelDevice.cc index 72921e6d9f08..12a4af2bd577 100644 --- a/src/blk/kernel/KernelDevice.cc +++ b/src/blk/kernel/KernelDevice.cc @@ -944,9 +944,15 @@ void KernelDevice::aio_submit(IOContext *ioc) } void *priv = static_cast(ioc); + int retry_max = cct->_conf->bdev_aio_submit_retry_max; + int initial_delay_us = cct->_conf->bdev_aio_submit_retry_initial_delay_us; + dout(20) << __func__ + << " bdev_aio_submit_retry_max " << retry_max + << " bdev_aio_submit_retry_initial_delay_us " << initial_delay_us + << dendl; int r, retries = 0; r = io_queue->submit_batch(ioc->running_aios.begin(), e, - priv, &retries); + priv, &retries, retry_max, initial_delay_us); if (retries) derr << __func__ << " retries " << retries << dendl; diff --git a/src/blk/kernel/io_uring.cc b/src/blk/kernel/io_uring.cc index be63d63aaf26..9cd06e86e15a 100644 --- a/src/blk/kernel/io_uring.cc +++ b/src/blk/kernel/io_uring.cc @@ -177,7 +177,7 @@ void ioring_queue_t::shutdown() int ioring_queue_t::submit_batch(aio_iter beg, aio_iter end, void *priv, - int *retries) + int *retries, int submit_retries, int initial_delay_us) { (void)retries; @@ -245,7 +245,7 @@ void ioring_queue_t::shutdown() int ioring_queue_t::submit_batch(aio_iter beg, aio_iter end, void *priv, - int *retries) + int *retries, int submit_retries, int initial_delay_us) { ceph_assert(0); } diff --git a/src/blk/kernel/io_uring.h b/src/blk/kernel/io_uring.h index dd8f874728d7..918328d6cb89 100644 --- a/src/blk/kernel/io_uring.h +++ b/src/blk/kernel/io_uring.h @@ -28,6 +28,6 @@ struct ioring_queue_t final : public io_queue_t { void shutdown() final; int submit_batch(aio_iter begin, aio_iter end, - void *priv, int *retries) final; + void *priv, int *retries, int submit_retries, int initial_delay_us) final; int get_next_completed(int timeout_ms, aio_t **paio, int max) final; }; diff --git a/src/common/options/global.yaml.in b/src/common/options/global.yaml.in index 4b7fdf30f81c..4913c775c1f0 100644 --- a/src/common/options/global.yaml.in +++ b/src/common/options/global.yaml.in @@ -4048,6 +4048,16 @@ options: level: advanced default: 16 with_legacy: true +- name: bdev_aio_submit_retry_max + type: int + level: advanced + default: 16 + with_legacy: true +- name: bdev_aio_submit_retry_initial_delay_us + type: int + level: advanced + default: 125 + with_legacy: true - name: bdev_block_size type: size level: advanced