1
0
mirror of https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2026-01-11 17:10:13 +00:00

Compare commits

..

7 Commits

Author SHA1 Message Date
Linus Torvalds
f0b9d8eb98 nfsd-6.19 fixes:
A set of NFSD fixes that arrived after the 6.19 merge window.
 
 Issues that need expedient stable backports:
 - Remove an invalid NFS status code
 - Fix an fstests failure when using pNFS
 - Fix a UAF in v4_end_grace()
 - Fix the administrative interface used to revoke NFSv4 state
 - Fix a memory leak reported by syzbot
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEKLLlsBKG3yQ88j7+M2qzM29mf5cFAmlb2WUACgkQM2qzM29m
 f5cqaA/+MbO1kop63/TiNE0tRc34yTBnApg1XVza4vSmcpSgpB8ZKGZ5xOjnRpwg
 yBw9+/puEJhyogPE6JKEGnLiFr+s3ApInFHaxnXnrGZz1RR1qkqfioKudIcpC0s1
 /pKx7y/fktltgo/5Dl0gp2QH3Oytg375ge+dcSQbopSTQYPbsAw7AmoHDPBQd8Nr
 Q/pIu1q/tAM8R2zyijU3eAiUMyYRCrxNVYnlsdYmj7Dn0ypybOyKufkpVCEaS3kO
 a7SV/QSVKdNbZOf8annwAhW+VN4urFmA9nnnr/yirrLJ0i2h18E0txrPFBszhftf
 xpOvaDR7okfEvzqwrHvVfRsqB4nYq9f0TSvvpPsS8vCtq34pWKZPa6iiSxeVL/jb
 EmFtiesUWClZzTIQSpUdbuU80cST6WEoNJJKDPZwF1XbA2navsDqgxKiYxsczjt6
 M5SStHcafK5LrXPruqOhfco/uKTmHNJJlvBWxUGCMQEDvdXdEJ4MIlg8VxxvoWPR
 FQDwU+iSdPOwlG7L3Tl9/PGSNe0MxJSgvzK6JNoKL3LvDx80FtMErWxPJdqdIL0+
 RpBsW7zaCyX9lwD866Frs4K2H1w2XFeQjOMI0Pz1SG9dZ8NoKJ+lzcwVY7GgHUvq
 NUNJLzL6MVCHytwTfqrSY7PGvUCrDqR102FQusQyplT4edcUv0M=
 =okQh
 -----END PGP SIGNATURE-----

Merge tag 'nfsd-6.19-3' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux

Pull nfsd fixes from Chuck Lever:
 "A set of NFSD fixes for stable that arrived after the merge window:

   - Remove an invalid NFS status code

   - Fix an fstests failure when using pNFS

   - Fix a UAF in v4_end_grace()

   - Fix the administrative interface used to revoke NFSv4 state

   - Fix a memory leak reported by syzbot"

* tag 'nfsd-6.19-3' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux:
  NFSD: net ref data still needs to be freed even if net hasn't startup
  nfsd: check that server is running in unlock_filesystem
  nfsd: use correct loop termination in nfsd4_revoke_states()
  nfsd: provide locking for v4_end_grace
  NFSD: Fix permission check for read access to executable-only files
  NFSD: Remove NFSERR_EAGAIN
2026-01-06 09:12:52 -08:00
Edward Adam Davis
0b88bfa42e NFSD: net ref data still needs to be freed even if net hasn't startup
When the NFSD instance doesn't to startup, the net ref data memory is
not properly reclaimed, which triggers the memory leak issue reported
by syzbot [1].

To avoid the problem reported in [1], the net ref data memory reclamation
action is moved outside of nfsd_net_up when the net is shutdown.

[1]
unreferenced object 0xffff88812a39dfc0 (size 64):
  backtrace (crc a2262fc6):
    percpu_ref_init+0x94/0x1e0 lib/percpu-refcount.c:76
    nfsd_create_serv+0xbe/0x260 fs/nfsd/nfssvc.c:605
    nfsd_nl_listener_set_doit+0x62/0xb00 fs/nfsd/nfsctl.c:1882
    genl_family_rcv_msg_doit+0x11e/0x190 net/netlink/genetlink.c:1115
    genl_family_rcv_msg net/netlink/genetlink.c:1195 [inline]
    genl_rcv_msg+0x2fd/0x440 net/netlink/genetlink.c:1210

BUG: memory leak

Reported-by: syzbot+6ee3b889bdeada0a6226@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=6ee3b889bdeada0a6226
Fixes: 39972494e318 ("nfsd: update percpu_ref to manage references on nfsd_net")
Cc: stable@vger.kernel.org
Signed-off-by: Edward Adam Davis <eadavis@qq.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
2026-01-02 13:50:14 -05:00
Olga Kornievskaia
d0424066fc nfsd: check that server is running in unlock_filesystem
If we are trying to unlock the filesystem via an administrative
interface and nfsd isn't running, it crashes the server. This
happens currently because nfsd4_revoke_states() access state
structures (eg., conf_id_hashtbl) that has been freed as a part
of the server shutdown.

[   59.465072] Call trace:
[   59.465308]  nfsd4_revoke_states+0x1b4/0x898 [nfsd] (P)
[   59.465830]  write_unlock_fs+0x258/0x440 [nfsd]
[   59.466278]  nfsctl_transaction_write+0xb0/0x120 [nfsd]
[   59.466780]  vfs_write+0x1f0/0x938
[   59.467088]  ksys_write+0xfc/0x1f8
[   59.467395]  __arm64_sys_write+0x74/0xb8
[   59.467746]  invoke_syscall.constprop.0+0xdc/0x1e8
[   59.468177]  do_el0_svc+0x154/0x1d8
[   59.468489]  el0_svc+0x40/0xe0
[   59.468767]  el0t_64_sync_handler+0xa0/0xe8
[   59.469138]  el0t_64_sync+0x1ac/0x1b0

Ensure this can't happen by taking the nfsd_mutex and checking that
the server is still up, and then holding the mutex across the call to
nfsd4_revoke_states().

Reviewed-by: NeilBrown <neil@brown.name>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Fixes: 1ac3629bf0125 ("nfsd: prepare for supporting admin-revocation of state")
Cc: stable@vger.kernel.org
Signed-off-by: Olga Kornievskaia <okorniev@redhat.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
2026-01-02 13:49:55 -05:00
NeilBrown
fb321998de nfsd: use correct loop termination in nfsd4_revoke_states()
The loop in nfsd4_revoke_states() stops one too early because
the end value given is CLIENT_HASH_MASK where it should be
CLIENT_HASH_SIZE.

This means that an admin request to drop all locks for a filesystem will
miss locks held by clients which hash to the maximum possible hash value.

Fixes: 1ac3629bf012 ("nfsd: prepare for supporting admin-revocation of state")
Cc: stable@vger.kernel.org
Signed-off-by: NeilBrown <neil@brown.name>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
2026-01-02 13:49:38 -05:00
NeilBrown
2857bd59fe nfsd: provide locking for v4_end_grace
Writing to v4_end_grace can race with server shutdown and result in
memory being accessed after it was freed - reclaim_str_hashtbl in
particularly.

We cannot hold nfsd_mutex across the nfsd4_end_grace() call as that is
held while client_tracking_op->init() is called and that can wait for
an upcall to nfsdcltrack which can write to v4_end_grace, resulting in a
deadlock.

nfsd4_end_grace() is also called by the landromat work queue and this
doesn't require locking as server shutdown will stop the work and wait
for it before freeing anything that nfsd4_end_grace() might access.

However, we must be sure that writing to v4_end_grace doesn't restart
the work item after shutdown has already waited for it.  For this we
add a new flag protected with nn->client_lock.  It is set only while it
is safe to make client tracking calls, and v4_end_grace only schedules
work while the flag is set with the spinlock held.

So this patch adds a nfsd_net field "client_tracking_active" which is
set as described.  Another field "grace_end_forced", is set when
v4_end_grace is written.  After this is set, and providing
client_tracking_active is set, the laundromat is scheduled.
This "grace_end_forced" field bypasses other checks for whether the
grace period has finished.

This resolves a race which can result in use-after-free.

Reported-by: Li Lingfeng <lilingfeng3@huawei.com>
Closes: https://lore.kernel.org/linux-nfs/20250623030015.2353515-1-neil@brown.name/T/#t
Fixes: 7f5ef2e900d9 ("nfsd: add a v4_end_grace file to /proc/fs/nfsd")
Cc: stable@vger.kernel.org
Signed-off-by: NeilBrown <neil@brown.name>
Tested-by: Li Lingfeng <lilingfeng3@huawei.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
2026-01-02 13:48:22 -05:00
Scott Mayhew
e901c7fce5 NFSD: Fix permission check for read access to executable-only files
Commit abc02e5602f7 ("NFSD: Support write delegations in LAYOUTGET")
added NFSD_MAY_OWNER_OVERRIDE to the access flags passed from
nfsd4_layoutget() to fh_verify().  This causes LAYOUTGET to fail for
executable-only files, and causes xfstests generic/126 to fail on
pNFS SCSI.

To allow read access to executable-only files, what we really want is:
1. The "permissions" portion of the access flags (the lower 6 bits)
   must be exactly NFSD_MAY_READ
2. The "hints" portion of the access flags (the upper 26 bits) can
   contain any combination of NFSD_MAY_OWNER_OVERRIDE and
   NFSD_MAY_READ_IF_EXEC

Fixes: abc02e5602f7 ("NFSD: Support write delegations in LAYOUTGET")
Cc: stable@vger.kernel.org # v6.6+
Signed-off-by: Scott Mayhew <smayhew@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: NeilBrown <neil@brown.name>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
2026-01-02 13:44:05 -05:00
Chuck Lever
c6c209ceb8 NFSD: Remove NFSERR_EAGAIN
I haven't found an NFSERR_EAGAIN in RFCs 1094, 1813, 7530, or 8881.
None of these RFCs have an NFS status code that match the numeric
value "11".

Based on the meaning of the EAGAIN errno, I presume the use of this
status in NFSD means NFS4ERR_DELAY. So replace the one usage of
nfserr_eagain, and remove it from NFSD's NFS status conversion
tables.

As far as I can tell, NFSERR_EAGAIN has existed since the pre-git
era, but was not actually used by any code until commit f4e44b393389
("NFSD: delay unmount source's export after inter-server copy
completed."), at which time it become possible for NFSD to return
a status code of 11 (which is not valid NFS protocol).

Fixes: f4e44b393389 ("NFSD: delay unmount source's export after inter-server copy completed.")
Cc: stable@vger.kernel.org
Reviewed-by: NeilBrown <neil@brown.name>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
2026-01-02 13:43:41 -05:00
11 changed files with 74 additions and 34 deletions

View File

@ -17,7 +17,6 @@ static const struct {
{ NFSERR_NOENT, -ENOENT },
{ NFSERR_IO, -EIO },
{ NFSERR_NXIO, -ENXIO },
/* { NFSERR_EAGAIN, -EAGAIN }, */
{ NFSERR_ACCES, -EACCES },
{ NFSERR_EXIST, -EEXIST },
{ NFSERR_XDEV, -EXDEV },

View File

@ -66,6 +66,8 @@ struct nfsd_net {
struct lock_manager nfsd4_manager;
bool grace_ended;
bool grace_end_forced;
bool client_tracking_active;
time64_t boot_time;
struct dentry *nfsd_client_dir;

View File

@ -1502,7 +1502,7 @@ try_again:
(schedule_timeout(20*HZ) == 0)) {
finish_wait(&nn->nfsd_ssc_waitq, &wait);
kfree(work);
return nfserr_eagain;
return nfserr_jukebox;
}
finish_wait(&nn->nfsd_ssc_waitq, &wait);
goto try_again;

View File

@ -84,7 +84,7 @@ static u64 current_sessionid = 1;
/* forward declarations */
static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner);
static void nfs4_free_ol_stateid(struct nfs4_stid *stid);
void nfsd4_end_grace(struct nfsd_net *nn);
static void nfsd4_end_grace(struct nfsd_net *nn);
static void _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps);
static void nfsd4_file_hash_remove(struct nfs4_file *fi);
static void deleg_reaper(struct nfsd_net *nn);
@ -1759,7 +1759,7 @@ static struct nfs4_stid *find_one_sb_stid(struct nfs4_client *clp,
/**
* nfsd4_revoke_states - revoke all nfsv4 states associated with given filesystem
* @net: used to identify instance of nfsd (there is one per net namespace)
* @nn: used to identify instance of nfsd (there is one per net namespace)
* @sb: super_block used to identify target filesystem
*
* All nfs4 states (open, lock, delegation, layout) held by the server instance
@ -1771,16 +1771,15 @@ static struct nfs4_stid *find_one_sb_stid(struct nfs4_client *clp,
* The clients which own the states will subsequently being notified that the
* states have been "admin-revoked".
*/
void nfsd4_revoke_states(struct net *net, struct super_block *sb)
void nfsd4_revoke_states(struct nfsd_net *nn, struct super_block *sb)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
unsigned int idhashval;
unsigned int sc_types;
sc_types = SC_TYPE_OPEN | SC_TYPE_LOCK | SC_TYPE_DELEG | SC_TYPE_LAYOUT;
spin_lock(&nn->client_lock);
for (idhashval = 0; idhashval < CLIENT_HASH_MASK; idhashval++) {
for (idhashval = 0; idhashval < CLIENT_HASH_SIZE; idhashval++) {
struct list_head *head = &nn->conf_id_hashtbl[idhashval];
struct nfs4_client *clp;
retry:
@ -6570,7 +6569,7 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return nfs_ok;
}
void
static void
nfsd4_end_grace(struct nfsd_net *nn)
{
/* do nothing if grace period already ended */
@ -6603,6 +6602,33 @@ nfsd4_end_grace(struct nfsd_net *nn)
*/
}
/**
* nfsd4_force_end_grace - forcibly end the NFSv4 grace period
* @nn: network namespace for the server instance to be updated
*
* Forces bypass of normal grace period completion, then schedules
* the laundromat to end the grace period immediately. Does not wait
* for the grace period to fully terminate before returning.
*
* Return values:
* %true: Grace termination schedule
* %false: No action was taken
*/
bool nfsd4_force_end_grace(struct nfsd_net *nn)
{
if (!nn->client_tracking_ops)
return false;
spin_lock(&nn->client_lock);
if (nn->grace_ended || !nn->client_tracking_active) {
spin_unlock(&nn->client_lock);
return false;
}
WRITE_ONCE(nn->grace_end_forced, true);
mod_delayed_work(laundry_wq, &nn->laundromat_work, 0);
spin_unlock(&nn->client_lock);
return true;
}
/*
* If we've waited a lease period but there are still clients trying to
* reclaim, wait a little longer to give them a chance to finish.
@ -6612,6 +6638,8 @@ static bool clients_still_reclaiming(struct nfsd_net *nn)
time64_t double_grace_period_end = nn->boot_time +
2 * nn->nfsd4_lease;
if (READ_ONCE(nn->grace_end_forced))
return false;
if (nn->track_reclaim_completes &&
atomic_read(&nn->nr_reclaim_complete) ==
nn->reclaim_str_hashtbl_size)
@ -8932,6 +8960,8 @@ static int nfs4_state_create_net(struct net *net)
nn->unconf_name_tree = RB_ROOT;
nn->boot_time = ktime_get_real_seconds();
nn->grace_ended = false;
nn->grace_end_forced = false;
nn->client_tracking_active = false;
nn->nfsd4_manager.block_opens = true;
INIT_LIST_HEAD(&nn->nfsd4_manager.list);
INIT_LIST_HEAD(&nn->client_lru);
@ -9012,6 +9042,10 @@ nfs4_state_start_net(struct net *net)
return ret;
locks_start_grace(net, &nn->nfsd4_manager);
nfsd4_client_tracking_init(net);
/* safe for laundromat to run now */
spin_lock(&nn->client_lock);
nn->client_tracking_active = true;
spin_unlock(&nn->client_lock);
if (nn->track_reclaim_completes && nn->reclaim_str_hashtbl_size == 0)
goto skip_grace;
printk(KERN_INFO "NFSD: starting %lld-second grace period (net %x)\n",
@ -9060,6 +9094,9 @@ nfs4_state_shutdown_net(struct net *net)
shrinker_free(nn->nfsd_client_shrinker);
cancel_work_sync(&nn->nfsd_shrinker_work);
spin_lock(&nn->client_lock);
nn->client_tracking_active = false;
spin_unlock(&nn->client_lock);
cancel_delayed_work_sync(&nn->laundromat_work);
locks_end_grace(&nn->nfsd4_manager);

View File

@ -259,6 +259,7 @@ static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size)
struct path path;
char *fo_path;
int error;
struct nfsd_net *nn;
/* sanity check */
if (size == 0)
@ -285,7 +286,13 @@ static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size)
* 3. Is that directory the root of an exported file system?
*/
error = nlmsvc_unlock_all_by_sb(path.dentry->d_sb);
nfsd4_revoke_states(netns(file), path.dentry->d_sb);
mutex_lock(&nfsd_mutex);
nn = net_generic(netns(file), nfsd_net_id);
if (nn->nfsd_serv)
nfsd4_revoke_states(nn, path.dentry->d_sb);
else
error = -EINVAL;
mutex_unlock(&nfsd_mutex);
path_put(&path);
return error;
@ -1082,10 +1089,9 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size)
case 'Y':
case 'y':
case '1':
if (!nn->nfsd_serv)
if (!nfsd4_force_end_grace(nn))
return -EBUSY;
trace_nfsd_end_grace(netns(file));
nfsd4_end_grace(nn);
break;
default:
return -EINVAL;

View File

@ -233,7 +233,6 @@ void nfsd_lockd_shutdown(void);
#define nfserr_noent cpu_to_be32(NFSERR_NOENT)
#define nfserr_io cpu_to_be32(NFSERR_IO)
#define nfserr_nxio cpu_to_be32(NFSERR_NXIO)
#define nfserr_eagain cpu_to_be32(NFSERR_EAGAIN)
#define nfserr_acces cpu_to_be32(NFSERR_ACCES)
#define nfserr_exist cpu_to_be32(NFSERR_EXIST)
#define nfserr_xdev cpu_to_be32(NFSERR_XDEV)

View File

@ -406,26 +406,26 @@ static void nfsd_shutdown_net(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
if (!nn->nfsd_net_up)
return;
if (nn->nfsd_net_up) {
percpu_ref_kill_and_confirm(&nn->nfsd_net_ref, nfsd_net_done);
wait_for_completion(&nn->nfsd_net_confirm_done);
percpu_ref_kill_and_confirm(&nn->nfsd_net_ref, nfsd_net_done);
wait_for_completion(&nn->nfsd_net_confirm_done);
nfsd_export_flush(net);
nfs4_state_shutdown_net(net);
nfsd_reply_cache_shutdown(nn);
nfsd_file_cache_shutdown_net(net);
if (nn->lockd_up) {
lockd_down(net);
nn->lockd_up = false;
nfsd_export_flush(net);
nfs4_state_shutdown_net(net);
nfsd_reply_cache_shutdown(nn);
nfsd_file_cache_shutdown_net(net);
if (nn->lockd_up) {
lockd_down(net);
nn->lockd_up = false;
}
wait_for_completion(&nn->nfsd_net_free_done);
}
wait_for_completion(&nn->nfsd_net_free_done);
percpu_ref_exit(&nn->nfsd_net_ref);
if (nn->nfsd_net_up)
nfsd_shutdown_generic();
nn->nfsd_net_up = false;
nfsd_shutdown_generic();
}
static DEFINE_SPINLOCK(nfsd_notifier_lock);

View File

@ -841,15 +841,15 @@ static inline void get_nfs4_file(struct nfs4_file *fi)
struct nfsd_file *find_any_file(struct nfs4_file *f);
#ifdef CONFIG_NFSD_V4
void nfsd4_revoke_states(struct net *net, struct super_block *sb);
void nfsd4_revoke_states(struct nfsd_net *nn, struct super_block *sb);
#else
static inline void nfsd4_revoke_states(struct net *net, struct super_block *sb)
static inline void nfsd4_revoke_states(struct nfsd_net *nn, struct super_block *sb)
{
}
#endif
/* grace period management */
void nfsd4_end_grace(struct nfsd_net *nn);
bool nfsd4_force_end_grace(struct nfsd_net *nn);
/* nfs4recover operations */
extern int nfsd4_client_tracking_init(struct net *net);

View File

@ -2865,8 +2865,8 @@ nfsd_permission(struct svc_cred *cred, struct svc_export *exp,
/* Allow read access to binaries even when mode 111 */
if (err == -EACCES && S_ISREG(inode->i_mode) &&
(acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE) ||
acc == (NFSD_MAY_READ | NFSD_MAY_READ_IF_EXEC)))
(((acc & NFSD_MAY_MASK) == NFSD_MAY_READ) &&
(acc & (NFSD_MAY_OWNER_OVERRIDE | NFSD_MAY_READ_IF_EXEC))))
err = inode_permission(&nop_mnt_idmap, inode, MAY_EXEC);
return err? nfserrno(err) : 0;

View File

@ -16,7 +16,6 @@ TRACE_DEFINE_ENUM(NFSERR_PERM);
TRACE_DEFINE_ENUM(NFSERR_NOENT);
TRACE_DEFINE_ENUM(NFSERR_IO);
TRACE_DEFINE_ENUM(NFSERR_NXIO);
TRACE_DEFINE_ENUM(NFSERR_EAGAIN);
TRACE_DEFINE_ENUM(NFSERR_ACCES);
TRACE_DEFINE_ENUM(NFSERR_EXIST);
TRACE_DEFINE_ENUM(NFSERR_XDEV);
@ -52,7 +51,6 @@ TRACE_DEFINE_ENUM(NFSERR_JUKEBOX);
{ NFSERR_NXIO, "NXIO" }, \
{ ECHILD, "CHILD" }, \
{ ETIMEDOUT, "TIMEDOUT" }, \
{ NFSERR_EAGAIN, "AGAIN" }, \
{ NFSERR_ACCES, "ACCES" }, \
{ NFSERR_EXIST, "EXIST" }, \
{ NFSERR_XDEV, "XDEV" }, \

View File

@ -49,7 +49,6 @@
NFSERR_NOENT = 2, /* v2 v3 v4 */
NFSERR_IO = 5, /* v2 v3 v4 */
NFSERR_NXIO = 6, /* v2 v3 v4 */
NFSERR_EAGAIN = 11, /* v2 v3 */
NFSERR_ACCES = 13, /* v2 v3 v4 */
NFSERR_EXIST = 17, /* v2 v3 v4 */
NFSERR_XDEV = 18, /* v3 v4 */