From: Xiubo Li Date: Thu, 31 Mar 2022 07:16:49 +0000 (+0800) Subject: client: stop retrying the request when exceeding 256 times X-Git-Tag: v18.0.0~827^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=8a50329d56c22aeb67793f75db1fba204239b670;p=ceph.git client: stop retrying the request when exceeding 256 times The type of 'retry_attempt' in 'MetaRequest' is 'int', while in 'ceph_mds_request_head' the type of 'num_retry' is '__u8'. So in case the request retries exceeding 256 times, the MDS will receive a incorrect retry seq. In this case it's ususally a bug in MDS and continue retrying the request makes no sense. For now let's limit it to 256. In future this could be fixed in ceph code, so avoid using the hardcode here. Fixes: https://tracker.ceph.com/issues/55144 Signed-off-by: Xiubo Li --- diff --git a/src/client/Client.cc b/src/client/Client.cc index 7afb41c9e2ad9..c35a53521e813 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -2457,6 +2457,9 @@ void Client::send_request(MetaRequest *request, MetaSession *session, ldout(cct, 10) << __func__ << " rebuilding request " << request->get_tid() << " for mds." << mds << dendl; auto r = build_client_request(request); + if (!r) + return; + if (request->dentry()) { r->set_dentry_wanted(); } @@ -2500,6 +2503,28 @@ void Client::send_request(MetaRequest *request, MetaSession *session, ref_t Client::build_client_request(MetaRequest *request) { + /* + * The type of 'retry_attempt' in 'MetaRequest' is 'int', + * while in 'ceph_mds_request_head' the type of 'num_retry' + * is '__u8'. So in case the request retries exceeding 256 + * times, the MDS will receive a incorrect retry seq. + * + * In this case it's ususally a bug in MDS and continue + * retrying the request makes no sense. + * + * In future this could be fixed in ceph code, so avoid + * using the hardcode here. + */ + int max_retry = sizeof(((struct ceph_mds_request_head*)0)->num_retry); + max_retry = 1 << (max_retry * CHAR_BIT); + if (request->retry_attempt >= max_retry) { + request->abort(-EMULTIHOP); + request->caller_cond->notify_all(); + ldout(cct, 1) << __func__ << " request tid " << request->tid + << " seq overflow" << ", abort it" << dendl; + return nullptr; + } + auto req = make_message(request->get_op()); req->set_tid(request->tid); req->set_stamp(request->op_stamp);