From 8a50329d56c22aeb67793f75db1fba204239b670 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 31 Mar 2022 15:16:49 +0800 Subject: [PATCH] client: stop retrying the request when exceeding 256 times The type of 'retry_attempt' in 'MetaRequest' is 'int', while in 'ceph_mds_request_head' the type of 'num_retry' is '__u8'. So in case the request retries exceeding 256 times, the MDS will receive a incorrect retry seq. In this case it's ususally a bug in MDS and continue retrying the request makes no sense. For now let's limit it to 256. In future this could be fixed in ceph code, so avoid using the hardcode here. Fixes: https://tracker.ceph.com/issues/55144 Signed-off-by: Xiubo Li --- src/client/Client.cc | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/client/Client.cc b/src/client/Client.cc index 7afb41c9e2ad..c35a53521e81 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -2457,6 +2457,9 @@ void Client::send_request(MetaRequest *request, MetaSession *session, ldout(cct, 10) << __func__ << " rebuilding request " << request->get_tid() << " for mds." << mds << dendl; auto r = build_client_request(request); + if (!r) + return; + if (request->dentry()) { r->set_dentry_wanted(); } @@ -2500,6 +2503,28 @@ void Client::send_request(MetaRequest *request, MetaSession *session, ref_t Client::build_client_request(MetaRequest *request) { + /* + * The type of 'retry_attempt' in 'MetaRequest' is 'int', + * while in 'ceph_mds_request_head' the type of 'num_retry' + * is '__u8'. So in case the request retries exceeding 256 + * times, the MDS will receive a incorrect retry seq. + * + * In this case it's ususally a bug in MDS and continue + * retrying the request makes no sense. + * + * In future this could be fixed in ceph code, so avoid + * using the hardcode here. + */ + int max_retry = sizeof(((struct ceph_mds_request_head*)0)->num_retry); + max_retry = 1 << (max_retry * CHAR_BIT); + if (request->retry_attempt >= max_retry) { + request->abort(-EMULTIHOP); + request->caller_cond->notify_all(); + ldout(cct, 1) << __func__ << " request tid " << request->tid + << " seq overflow" << ", abort it" << dendl; + return nullptr; + } + auto req = make_message(request->get_op()); req->set_tid(request->tid); req->set_stamp(request->op_stamp); -- 2.47.3