]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-client.git/commitdiff
ceph: clear `s_cap_reconnect` when ceph_pagelist_encode_32() fails
authorMax Kellermann <max.kellermann@ionos.com>
Mon, 30 Mar 2026 08:43:19 +0000 (10:43 +0200)
committerViacheslav Dubeyko <Slava.Dubeyko@ibm.com>
Thu, 2 Apr 2026 18:20:36 +0000 (11:20 -0700)
This MDS reconnect error path leaves s_cap_reconnect set.
send_mds_reconnect() sets the bit at the beginning of the reconnect,
but the first failing operation after that, ceph_pagelist_encode_32(),
can jump to `fail:` without clearing it.

__ceph_remove_cap() consults that flag to decide whether cap releases
should be queued. A reconnect-preparation failure therefore leaves the
session in reconnect mode from the cap-release path's point of view
and can strand release work until some later state transition repairs
it.

Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
Tested-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
Reviewed-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
fs/ceph/mds_client.c

index effd4dbf24a1e63900b6c1be6311dc0082703091..12d5c4bded4362bbffe8bb8bb4e29c8b7f8c5983 100644 (file)
@@ -5003,7 +5003,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
        /* placeholder for nr_caps */
        err = ceph_pagelist_encode_32(recon_state.pagelist, 0);
        if (err)
-               goto fail;
+               goto fail_clear_cap_reconnect;
 
        if (test_bit(CEPHFS_FEATURE_MULTI_RECONNECT, &session->s_features)) {
                recon_state.msg_version = 3;
@@ -5093,6 +5093,10 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
        ceph_pagelist_release(recon_state.pagelist);
        return;
 
+fail_clear_cap_reconnect:
+       spin_lock(&session->s_cap_lock);
+       session->s_cap_reconnect = 0;
+       spin_unlock(&session->s_cap_lock);
 fail:
        ceph_msg_put(reply);
        up_read(&mdsc->snap_rwsem);