From: Trond Myklebust Date: Mon, 5 Jan 2026 21:43:21 +0000 (-0500) Subject: NFSv4/pnfs: If the server is down, retry the layout returns on reboot X-Git-Tag: ceph-for-7.1-rc4~102^2~20 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=1805e6b2f49fbf63322a629a36019cbe2c6628e3;p=ceph-client.git NFSv4/pnfs: If the server is down, retry the layout returns on reboot If a layout return is embedded in a CLOSE or DELEGRETURN rpc call, and the metadata server reboots, the expectation now is that the client should resend the layout return once the server comes back up. This patch changes the current behaviour of dropping the layouts on the floor, and instead queues them up for retrying. Signed-off-by: Trond Myklebust --- diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 91bcf67bd743..768de9935ff1 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -9769,16 +9769,26 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) if (!nfs41_sequence_process(task, &lrp->res.seq_res)) return; - if (task->tk_rpc_status == -ETIMEDOUT) { - lrp->rpc_status = -EAGAIN; - lrp->res.lrs_present = 0; - return; - } - /* - * Was there an RPC level error? Assume the call succeeded, - * and that we need to release the layout - */ - if (task->tk_rpc_status != 0 && RPC_WAS_SENT(task)) { + if (task->tk_rpc_status < 0) { + switch (task->tk_rpc_status) { + case -EACCES: + case -EIO: + case -EKEYEXPIRED: + case -ERESTARTSYS: + case -EINTR: + lrp->rpc_status = 0; + break; + case -ENETDOWN: + case -ENETUNREACH: + if (task->tk_flags & RPC_TASK_NETUNREACH_FATAL) + lrp->rpc_status = 0; + else + lrp->rpc_status = -EAGAIN; + break; + default: + lrp->rpc_status = -EAGAIN; + break; + } lrp->res.lrs_present = 0; return; } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index bc13d1e69449..e89e476070a1 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1698,11 +1698,23 @@ int pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp, /* If the call was not sent, let caller handle it */ if (!RPC_WAS_SENT(task)) return 0; - /* - * Otherwise, assume the call succeeded and - * that we need to release the layout - */ - *ret = 0; + switch (task->tk_rpc_status) { + default: + /* + * Defer the layoutreturn if it was due + * to the server being down. + */ + *ret = -NFS4ERR_NOMATCHING_LAYOUT; + break; + case -EACCES: + case -EIO: + case -EKEYEXPIRED: + case -ERESTARTSYS: + case -EINTR: + /* Don't retry */ + *ret = 0; + break; + } (*respp)->lrs_present = 0; retval = 0; break;