From: liangmingyuan Date: Mon, 5 Aug 2024 07:30:33 +0000 (+0800) Subject: rgw/beast: optimize for accept when meeting error in listenning X-Git-Tag: v19.2.1~120^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F60244%2Fhead;p=ceph.git rgw/beast: optimize for accept when meeting error in listenning It is not suitable to stop accept socket when meeting any error in previous socket listen and accept. This will results in radosgw stop work after a occasional case. For example, Too many open files warning may occur at high iops(or just after reshard, sockets opened may increase for doing operations blocked). Signed-off-by: Mingyuan Liang (cherry picked from commit a7090783cf820045633c66ca04926cb3f2b5a4aa) --- diff --git a/src/rgw/rgw_asio_frontend.cc b/src/rgw/rgw_asio_frontend.cc index 86029e7f634e0..b8bcff3a06fa3 100644 --- a/src/rgw/rgw_asio_frontend.cc +++ b/src/rgw/rgw_asio_frontend.cc @@ -67,6 +67,44 @@ auto make_stack_allocator() { return boost::context::protected_fixedsize_stack{512*1024}; } +static constexpr std::chrono::milliseconds BACKOFF_MAX_WAIT(5000); + +class RGWAsioBackoff { + using Clock = ceph::coarse_mono_clock; + using Timer = boost::asio::basic_waitable_timer; + Timer timer; + + ceph::timespan cur_wait; + void update_wait_time(); +public: + explicit RGWAsioBackoff(boost::asio::io_context& context) : + timer(context), + cur_wait(std::chrono::milliseconds(1)) { + } + + void backoff_sleep(boost::asio::yield_context yield); + void reset() { + cur_wait = std::chrono::milliseconds(1); + } +}; + +void RGWAsioBackoff::update_wait_time() +{ + if (cur_wait < BACKOFF_MAX_WAIT) { + cur_wait = cur_wait * 2; + } + if (cur_wait > BACKOFF_MAX_WAIT) { + cur_wait = BACKOFF_MAX_WAIT; + } +} + +void RGWAsioBackoff::backoff_sleep(boost::asio::yield_context yield) +{ + update_wait_time(); + timer.expires_after(cur_wait); + timer.async_wait(yield); +} + using namespace std; template @@ -440,6 +478,7 @@ class AsioFrontend { std::atomic going_down{false}; + RGWAsioBackoff backoff; CephContext* ctx() const { return cct.get(); } std::optional client_counters; std::unique_ptr client_config; @@ -452,7 +491,8 @@ class AsioFrontend { dmc::SchedulerCtx& sched_ctx, boost::asio::io_context& context) : env(env), conf(conf), context(context), - pause_mutex(context.get_executor()) + pause_mutex(context.get_executor()), + backoff(context) { auto sched_t = dmc::get_scheduler_t(ctx()); switch(sched_t){ @@ -1024,9 +1064,19 @@ void AsioFrontend::accept(Listener& l, boost::asio::yield_context yield) return; } else if (ec) { ldout(ctx(), 1) << "accept failed: " << ec.message() << dendl; + if (ec == boost::system::errc::too_many_files_open || + ec == boost::system::errc::too_many_files_open_in_system || + ec == boost::system::errc::no_buffer_space || + ec == boost::system::errc::not_enough_memory) { + // always retry accept() if we hit a resource limit + backoff.backoff_sleep(yield); + continue; + } + ldout(ctx(), 0) << "accept stopped due to error: " << ec.message() << dendl; return; } + backoff.reset(); on_accept(l, std::move(l.socket)); } }