From a49a2a1baa0c553c3548a1c414b6a3c005a8deba Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Sat, 22 Nov 2025 12:00:36 +1100 Subject: [PATCH 1/4] lockd: fix vfs_test_lock() calls Usage of vfs_test_lock() is somewhat confused. Documentation suggests it is given a "lock" but this is not the case. It is given a struct file_lock which contains some details of the sort of lock it should be looking for. In particular passing a "file_lock" containing fl_lmops or fl_ops is meaningless and possibly confusing. This is particularly problematic in lockd. nlmsvc_testlock() receives an initialised "file_lock" from xdr-decode, including manager ops and an owner. It then mistakenly passes this to vfs_test_lock() which might replace the owner and the ops. This can lead to confusion when freeing the lock. The primary role of the 'struct file_lock' passed to vfs_test_lock() is to report a conflicting lock that was found, so it makes more sense for nlmsvc_testlock() to pass "conflock", which it uses for returning the conflicting lock. With this change, freeing of the lock is not confused and code in __nlm4svc_proc_test() and __nlmsvc_proc_test() can be simplified. Documentation for vfs_test_lock() is improved to reflect its real purpose, and a WARN_ON_ONCE() is added to avoid a similar problem in the future. Reported-by: Olga Kornievskaia Closes: https://lore.kernel.org/all/20251021130506.45065-1-okorniev@redhat.com Signed-off-by: NeilBrown Fixes: 20fa19027286 ("nfs: add export operations") Cc: stable@vger.kernel.org Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/lockd/svc4proc.c | 4 +--- fs/lockd/svclock.c | 21 ++++++++++++--------- fs/lockd/svcproc.c | 5 +---- fs/locks.c | 12 ++++++++++-- 4 files changed, 24 insertions(+), 18 deletions(-) diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 109e5caae8c7..4b6f18d97734 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -97,7 +97,6 @@ __nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) struct nlm_args *argp = rqstp->rq_argp; struct nlm_host *host; struct nlm_file *file; - struct nlm_lockowner *test_owner; __be32 rc = rpc_success; dprintk("lockd: TEST4 called\n"); @@ -107,7 +106,6 @@ __nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file))) return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; - test_owner = argp->lock.fl.c.flc_owner; /* Now check for conflicting locks */ resp->status = nlmsvc_testlock(rqstp, file, host, &argp->lock, &resp->lock); @@ -116,7 +114,7 @@ __nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) else dprintk("lockd: TEST4 status %d\n", ntohl(resp->status)); - nlmsvc_put_lockowner(test_owner); + nlmsvc_release_lockowner(&argp->lock); nlmsvc_release_host(host); nlm_release_file(file); return rc; diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 3a3d05cfe09a..6bce19fd024c 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -633,7 +633,13 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, } mode = lock_to_openmode(&lock->fl); - error = vfs_test_lock(file->f_file[mode], &lock->fl); + locks_init_lock(&conflock->fl); + /* vfs_test_lock only uses start, end, and owner, but tests flc_file */ + conflock->fl.c.flc_file = lock->fl.c.flc_file; + conflock->fl.fl_start = lock->fl.fl_start; + conflock->fl.fl_end = lock->fl.fl_end; + conflock->fl.c.flc_owner = lock->fl.c.flc_owner; + error = vfs_test_lock(file->f_file[mode], &conflock->fl); if (error) { /* We can't currently deal with deferred test requests */ if (error == FILE_LOCK_DEFERRED) @@ -643,22 +649,19 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, goto out; } - if (lock->fl.c.flc_type == F_UNLCK) { + if (conflock->fl.c.flc_type == F_UNLCK) { ret = nlm_granted; goto out; } dprintk("lockd: conflicting lock(ty=%d, %Ld-%Ld)\n", - lock->fl.c.flc_type, (long long)lock->fl.fl_start, - (long long)lock->fl.fl_end); + conflock->fl.c.flc_type, (long long)conflock->fl.fl_start, + (long long)conflock->fl.fl_end); conflock->caller = "somehost"; /* FIXME */ conflock->len = strlen(conflock->caller); conflock->oh.len = 0; /* don't return OH info */ - conflock->svid = lock->fl.c.flc_pid; - conflock->fl.c.flc_type = lock->fl.c.flc_type; - conflock->fl.fl_start = lock->fl.fl_start; - conflock->fl.fl_end = lock->fl.fl_end; - locks_release_private(&lock->fl); + conflock->svid = conflock->fl.c.flc_pid; + locks_release_private(&conflock->fl); ret = nlm_lck_denied; out: diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index f53d5177f267..5817ef272332 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -117,7 +117,6 @@ __nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) struct nlm_args *argp = rqstp->rq_argp; struct nlm_host *host; struct nlm_file *file; - struct nlm_lockowner *test_owner; __be32 rc = rpc_success; dprintk("lockd: TEST called\n"); @@ -127,8 +126,6 @@ __nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file))) return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; - test_owner = argp->lock.fl.c.flc_owner; - /* Now check for conflicting locks */ resp->status = cast_status(nlmsvc_testlock(rqstp, file, host, &argp->lock, &resp->lock)); @@ -138,7 +135,7 @@ __nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) dprintk("lockd: TEST status %d vers %d\n", ntohl(resp->status), rqstp->rq_vers); - nlmsvc_put_lockowner(test_owner); + nlmsvc_release_lockowner(&argp->lock); nlmsvc_release_host(host); nlm_release_file(file); return rc; diff --git a/fs/locks.c b/fs/locks.c index 04a3f0e20724..bf5e0d05a026 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -2185,13 +2185,21 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) /** * vfs_test_lock - test file byte range lock * @filp: The file to test lock for - * @fl: The lock to test; also used to hold result + * @fl: The byte-range in the file to test; also used to hold result * + * On entry, @fl does not contain a lock, but identifies a range (fl_start, fl_end) + * in the file (c.flc_file), and an owner (c.flc_owner) for whom existing locks + * should be ignored. c.flc_type and c.flc_flags are ignored. + * Both fl_lmops and fl_ops in @fl must be NULL. * Returns -ERRNO on failure. Indicates presence of conflicting lock by - * setting conf->fl_type to something other than F_UNLCK. + * setting fl->fl_type to something other than F_UNLCK. + * + * If vfs_test_lock() does find a lock and return it, the caller must + * use locks_free_lock() or locks_release_private() on the returned lock. */ int vfs_test_lock(struct file *filp, struct file_lock *fl) { + WARN_ON_ONCE(fl->fl_ops || fl->fl_lmops); WARN_ON_ONCE(filp != fl->c.flc_file); if (filp->f_op->lock) return filp->f_op->lock(filp, F_GETLK, fl); From 8072e34e1387d03102b788677d491e2bcceef6f5 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 1 Dec 2025 17:09:55 -0500 Subject: [PATCH 2/4] nfsd: fix nfsd_file reference leak in nfsd4_add_rdaccess_to_wrdeleg() nfsd4_add_rdaccess_to_wrdeleg() unconditionally overwrites fp->fi_fds[O_RDONLY] with a newly acquired nfsd_file. However, if the client already has a SHARE_ACCESS_READ open from a previous OPEN operation, this action overwrites the existing pointer without releasing its reference, orphaning the previous reference. Additionally, the function originally stored the same nfsd_file pointer in both fp->fi_fds[O_RDONLY] and fp->fi_rdeleg_file with only a single reference. When put_deleg_file() runs, it clears fi_rdeleg_file and calls nfs4_file_put_access() to release the file. However, nfs4_file_put_access() only releases fi_fds[O_RDONLY] when the fi_access[O_RDONLY] counter drops to zero. If another READ open exists on the file, the counter remains elevated and the nfsd_file reference from the delegation is never released. This potentially causes open conflicts on that file. Then, on server shutdown, these leaks cause __nfsd_file_cache_purge() to encounter files with an elevated reference count that cannot be cleaned up, ultimately triggering a BUG() in kmem_cache_destroy() because there are still nfsd_file objects allocated in that cache. Fixes: e7a8ebc305f2 ("NFSD: Offer write delegation for OPEN with OPEN4_SHARE_ACCESS_WRITE") Cc: stable@vger.kernel.org Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4state.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 35004568d43e..11877b96dc4c 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1218,8 +1218,10 @@ static void put_deleg_file(struct nfs4_file *fp) if (nf) nfsd_file_put(nf); - if (rnf) + if (rnf) { + nfsd_file_put(rnf); nfs4_file_put_access(fp, NFS4_SHARE_ACCESS_READ); + } } static void nfsd4_finalize_deleg_timestamps(struct nfs4_delegation *dp, struct file *f) @@ -6231,10 +6233,14 @@ nfsd4_add_rdaccess_to_wrdeleg(struct svc_rqst *rqstp, struct nfsd4_open *open, fp = stp->st_stid.sc_file; spin_lock(&fp->fi_lock); __nfs4_file_get_access(fp, NFS4_SHARE_ACCESS_READ); - fp = stp->st_stid.sc_file; - fp->fi_fds[O_RDONLY] = nf; - fp->fi_rdeleg_file = nf; + if (!fp->fi_fds[O_RDONLY]) { + fp->fi_fds[O_RDONLY] = nf; + nf = NULL; + } + fp->fi_rdeleg_file = nfsd_file_get(fp->fi_fds[O_RDONLY]); spin_unlock(&fp->fi_lock); + if (nf) + nfsd_file_put(nf); } return true; } From 8f9e967830ff32ab7756f530a36adf74a9f12b76 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 3 Dec 2025 10:52:15 -0500 Subject: [PATCH 3/4] nfsd: use ATTR_DELEG in nfsd4_finalize_deleg_timestamps() When finalizing timestamps that have never been updated and preparing to release the delegation lease, the notify_change() call can trigger a delegation break, and fail to update the timestamps. When this happens, there will be messages like this in dmesg: [ 2709.375785] Unable to update timestamps on inode 00:39:263: -11 Since this code is going to release the lease just after updating the timestamps, breaking the delegation is undesirable. Fix this by setting ATTR_DELEG in ia_valid, in order to avoid the delegation break. Fixes: e5e9b24ab8fa ("nfsd: freeze c/mtime updates with outstanding WRITE_ATTRS delegation") Cc: stable@vger.kernel.org Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 11877b96dc4c..8145014d70d5 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1226,7 +1226,7 @@ static void put_deleg_file(struct nfs4_file *fp) static void nfsd4_finalize_deleg_timestamps(struct nfs4_delegation *dp, struct file *f) { - struct iattr ia = { .ia_valid = ATTR_ATIME | ATTR_CTIME | ATTR_MTIME }; + struct iattr ia = { .ia_valid = ATTR_ATIME | ATTR_CTIME | ATTR_MTIME | ATTR_DELEG }; struct inode *inode = file_inode(f); int ret; From 1f941b2c23fd34c6f3b76d36f9d0a2528fa92b8f Mon Sep 17 00:00:00 2001 From: Haoxiang Li Date: Sat, 6 Dec 2025 15:38:42 +0800 Subject: [PATCH 4/4] nfsd: Drop the client reference in client_states_open() In error path, call drop_client() to drop the reference obtained by get_nfsdfs_clp(). Fixes: 78599c42ae3c ("nfsd4: add file to display list of client's opens") Cc: stable@vger.kernel.org Reviewed-by: Jeff Layton Signed-off-by: Haoxiang Li Signed-off-by: Chuck Lever --- fs/nfsd/nfs4state.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 8145014d70d5..5b83cb33bf83 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3099,8 +3099,10 @@ static int client_states_open(struct inode *inode, struct file *file) return -ENXIO; ret = seq_open(file, &states_seq_ops); - if (ret) + if (ret) { + drop_client(clp); return ret; + } s = file->private_data; s->private = clp; return 0;