From: Robin Geuze Date: Tue, 23 Nov 2021 14:16:36 +0000 (+0100) Subject: osd: Remove aios_size argument from submit_batch X-Git-Tag: v18.2.5~234^2~4 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=ae75fa86a52addfc50025922cc5da4ad882526c4;p=ceph.git osd: Remove aios_size argument from submit_batch Due to aios_size being a uint16 and the source value for the actual call being an int there was a possible overflow. This was "fixed" with an assert, however that still causes a crash. This commit removes the need for aios_size completely by iterating over the list and submitting it in max_iodepth batches. Fixes: https://tracker.ceph.com/issues/46366 Signed-off-by: Robin Geuze (cherry picked from commit f87db49b0013088f1c87802886c4c16ce47c5cc2) (cherry picked from commit 7b52409ed70a0c9e382a0fc76a1d7a508aeaf27d) --- diff --git a/src/blk/aio/aio.cc b/src/blk/aio/aio.cc index 00a12bfd16af..e550165a9605 100644 --- a/src/blk/aio/aio.cc +++ b/src/blk/aio/aio.cc @@ -16,7 +16,7 @@ std::ostream& operator<<(std::ostream& os, const aio_t& aio) } int aio_queue_t::submit_batch(aio_iter begin, aio_iter end, - uint16_t aios_size, void *priv, + void *priv, int *retries) { // 2^16 * 125us = ~8 seconds, so max sleep is ~16 seconds @@ -25,33 +25,36 @@ int aio_queue_t::submit_batch(aio_iter begin, aio_iter end, int r; aio_iter cur = begin; - struct aio_t *piocb[aios_size]; - int left = 0; - while (cur != end) { - cur->priv = priv; - *(piocb+left) = &(*cur); - ++left; - ++cur; - } - ceph_assert(aios_size >= left); +#if defined(HAVE_LIBAIO) + struct aio_t *piocb[max_iodepth]; +#endif int done = 0; - while (left > 0) { + while (cur != end) { #if defined(HAVE_LIBAIO) - r = io_submit(ctx, std::min(left, max_iodepth), (struct iocb**)(piocb + done)); + int itemCount = 0; + while (cur != end && itemCount < max_iodepth) { + cur->priv = priv; + piocb[itemCount] = &(*cur); + ++itemCount; + ++cur; + } + r = io_submit(ctx, itemCount, (struct iocb**)piocb); #elif defined(HAVE_POSIXAIO) - if (piocb[done]->n_aiocb == 1) { + cur->priv = priv + if ((cur->n_aiocb == 1) { // TODO: consider batching multiple reads together with lio_listio - piocb[done]->aio.aiocb.aio_sigevent.sigev_notify = SIGEV_KEVENT; - piocb[done]->aio.aiocb.aio_sigevent.sigev_notify_kqueue = ctx; - piocb[done]->aio.aiocb.aio_sigevent.sigev_value.sival_ptr = piocb[done]; - r = aio_read(&piocb[done]->aio.aiocb); + cur->aio.aiocb.aio_sigevent.sigev_notify = SIGEV_KEVENT; + cur->aio.aiocb.aio_sigevent.sigev_notify_kqueue = ctx; + cur->aio.aiocb.aio_sigevent.sigev_value.sival_ptr = &(*cur); + r = aio_read(&cur->aio.aiocb); } else { struct sigevent sev; sev.sigev_notify = SIGEV_KEVENT; sev.sigev_notify_kqueue = ctx; - sev.sigev_value.sival_ptr = piocb[done]; - r = lio_listio(LIO_NOWAIT, &piocb[done]->aio.aiocbp, piocb[done]->n_aiocb, &sev); + sev.sigev_value.sival_ptr = &(*cur); + r = lio_listio(LIO_NOWAIT, &cur->aio.aiocbp, cur->n_aiocb, &sev); } + ++cur; #endif if (r < 0) { if (r == -EAGAIN && attempts-- > 0) { @@ -64,7 +67,6 @@ int aio_queue_t::submit_batch(aio_iter begin, aio_iter end, } ceph_assert(r > 0); done += r; - left -= r; attempts = 16; delay = 125; } diff --git a/src/blk/aio/aio.h b/src/blk/aio/aio.h index 14b89784bc1b..0d8211e7858a 100644 --- a/src/blk/aio/aio.h +++ b/src/blk/aio/aio.h @@ -100,7 +100,7 @@ struct io_queue_t { virtual int init(std::vector &fds) = 0; virtual void shutdown() = 0; - virtual int submit_batch(aio_iter begin, aio_iter end, uint16_t aios_size, + virtual int submit_batch(aio_iter begin, aio_iter end, void *priv, int *retries) = 0; virtual int get_next_completed(int timeout_ms, aio_t **paio, int max) = 0; }; @@ -153,7 +153,7 @@ struct aio_queue_t final : public io_queue_t { } } - int submit_batch(aio_iter begin, aio_iter end, uint16_t aios_size, + int submit_batch(aio_iter begin, aio_iter end, void *priv, int *retries) final; int get_next_completed(int timeout_ms, aio_t **paio, int max) final; }; diff --git a/src/blk/kernel/KernelDevice.cc b/src/blk/kernel/KernelDevice.cc index 31a714a7c8c6..ebc3ec55d9f3 100644 --- a/src/blk/kernel/KernelDevice.cc +++ b/src/blk/kernel/KernelDevice.cc @@ -866,10 +866,8 @@ void KernelDevice::aio_submit(IOContext *ioc) void *priv = static_cast(ioc); int r, retries = 0; - // num of pending aios should not overflow when passed to submit_batch() - ceph_assert(pending <= std::numeric_limits::max()); r = io_queue->submit_batch(ioc->running_aios.begin(), e, - pending, priv, &retries); + priv, &retries); if (retries) derr << __func__ << " retries " << retries << dendl; diff --git a/src/blk/kernel/io_uring.cc b/src/blk/kernel/io_uring.cc index 5e7fd1227045..b33da80ed8ed 100644 --- a/src/blk/kernel/io_uring.cc +++ b/src/blk/kernel/io_uring.cc @@ -176,10 +176,9 @@ void ioring_queue_t::shutdown() } int ioring_queue_t::submit_batch(aio_iter beg, aio_iter end, - uint16_t aios_size, void *priv, + void *priv, int *retries) { - (void)aios_size; (void)retries; pthread_mutex_lock(&d->sq_mutex); diff --git a/src/blk/kernel/io_uring.h b/src/blk/kernel/io_uring.h index e7d0acde0134..dd8f874728d7 100644 --- a/src/blk/kernel/io_uring.h +++ b/src/blk/kernel/io_uring.h @@ -27,7 +27,7 @@ struct ioring_queue_t final : public io_queue_t { int init(std::vector &fds) final; void shutdown() final; - int submit_batch(aio_iter begin, aio_iter end, uint16_t aios_size, + int submit_batch(aio_iter begin, aio_iter end, void *priv, int *retries) final; int get_next_completed(int timeout_ms, aio_t **paio, int max) final; };