client: switch to use 32 bits ext_num_fwd

author Xiubo Li <xiubli@redhat.com>

Tue, 5 Jul 2022 04:59:11 +0000 (12:59 +0800)

committer Xiubo Li <xiubli@redhat.com>

Thu, 16 Feb 2023 00:49:41 +0000 (08:49 +0800)
author Xiubo Li <xiubli@redhat.com>
Tue, 5 Jul 2022 04:59:11 +0000 (12:59 +0800)
committer Xiubo Li <xiubli@redhat.com>
Thu, 16 Feb 2023 00:49:41 +0000 (08:49 +0800)
diff --git a/src/client/Client.cc b/src/client/Client.cc

index 9a182ddf10ae9ce506d836a7d87ce8a2934ff66b..dd06dfcad2d9057338a6416bb5747163d8de514a 100644 (file)
--- a/src/client/Client.cc
+++ b/src/client/Client.cc
@@ -2578,7 +2578,7 @@ ref_t<MClientRequest> Client::build_client_request(MetaRequest *request, mds_ran
    req->fscrypt_auth = request->fscrypt_auth;
    req->fscrypt_file = request->fscrypt_file;
    req->set_retry_attempt(request->retry_attempt++);
-  req->head.num_fwd = request->num_fwd;
+  req->head.ext_num_fwd = request->num_fwd;
    const gid_t *_gids;
    int gid_count = request->perms.get_gids(&_gids);
    req->set_gid_list(gid_count, _gids);
@@ -2607,32 +2607,20 @@ void Client::handle_client_request_forward(const MConstRef<MClientRequestForward
    ceph_assert(request);
  
    /*
-   * The type of 'num_fwd' in ceph 'MClientRequestForward'
-   * is 'int32_t', while in 'ceph_mds_request_head' the
-   * type is '__u8'. So in case the request bounces between
-   * MDSes exceeding 256 times, the client will get stuck.
-   *
-   * In this case it's ususally a bug in MDS and continue
-   * bouncing the request makes no sense.
+   * Avoid inifinite retrying after overflow.
     *
-   * In future this could be fixed in ceph code, so avoid
-   * using the hardcode here.
+   * The MDS will increase the fwd count and in client side
+   * if the num_fwd is less than the one saved in request
+   * that means the MDS is an old version and overflowed of
+   * 8 bits.
     */
-  int max_fwd = sizeof(((struct ceph_mds_request_head*)0)->num_fwd);
-  max_fwd = (1 << (max_fwd * CHAR_BIT)) - 1;
    auto num_fwd = fwd->get_num_fwd();
-  if (num_fwd <= request->num_fwd || num_fwd >= max_fwd) {
-    if (request->num_fwd >= max_fwd || num_fwd >= max_fwd) {
-      request->abort(-CEPHFS_EMULTIHOP);
-      request->caller_cond->notify_all();
-      ldout(cct, 1) << __func__ << " tid " << tid << " seq overflow"
-                    << ", abort it" << dendl;
-    } else {
-      ldout(cct, 10) << __func__ << " tid " << tid
-                     << " old fwd seq " << fwd->get_num_fwd()
-                     << " <= req fwd " << request->num_fwd
-                     << ", ignore it" << dendl;
-    }
+  if (num_fwd <= request->num_fwd || (uint32_t)num_fwd >= UINT32_MAX) {
+    request->abort(-CEPHFS_EMULTIHOP);
+    request->caller_cond->notify_all();
+    ldout(cct, 0) << __func__ << " request tid " << tid << " new num_fwd "
+      << num_fwd << " old num_fwd " << request->num_fwd << ", fwd seq overflow"
+      << ", abort it" << dendl;
      return;
    }
author	Xiubo Li <xiubli@redhat.com>
	Tue, 5 Jul 2022 04:59:11 +0000 (12:59 +0800)
committer	Xiubo Li <xiubli@redhat.com>
	Thu, 16 Feb 2023 00:49:41 +0000 (08:49 +0800)