From 20ca475d9860e14cf389f5a7d5ba9c6437d74613 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 16 Jun 2025 20:33:20 +0100 Subject: [PATCH 01/11] mm: rename call_mmap/mmap_prepare to vfs_mmap/mmap_prepare The call_mmap() function violates the existing convention in include/linux/fs.h whereby invocations of virtual file system hooks is performed by functions prefixed with vfs_xxx(). Correct this by renaming call_mmap() to vfs_mmap(). This also avoids confusion as to the fact that f_op->mmap_prepare may be invoked here. Also rename __call_mmap_prepare() function to vfs_mmap_prepare() and adjust to accept a file parameter, this is useful later for nested file systems. Finally, fix up the VMA userland tests and ensure the mmap_prepare -> mmap shim is implemented there. Signed-off-by: Lorenzo Stoakes Link: https://lore.kernel.org/8d389f4994fa736aa8f9172bef8533c10a9e9011.1750099179.git.lorenzo.stoakes@oracle.com Signed-off-by: Christian Brauner --- drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c | 2 +- fs/backing-file.c | 2 +- fs/coda/file.c | 4 +-- include/linux/fs.h | 5 ++-- ipc/shm.c | 2 +- mm/internal.h | 2 +- mm/vma.c | 2 +- tools/testing/vma/vma_internal.h | 32 ++++++++++++++++++---- 8 files changed, 35 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index 05e440643aa2..f4f1c979d1b9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -105,7 +105,7 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct * if (!obj->base.filp) return -ENODEV; - ret = call_mmap(obj->base.filp, vma); + ret = vfs_mmap(obj->base.filp, vma); if (ret) return ret; diff --git a/fs/backing-file.c b/fs/backing-file.c index 763fbe9b72b2..04018679bf69 100644 --- a/fs/backing-file.c +++ b/fs/backing-file.c @@ -339,7 +339,7 @@ int backing_file_mmap(struct file *file, struct vm_area_struct *vma, vma_set_file(vma, file); old_cred = override_creds(ctx->cred); - ret = call_mmap(vma->vm_file, vma); + ret = vfs_mmap(vma->vm_file, vma); revert_creds(old_cred); if (ctx->accessed) diff --git a/fs/coda/file.c b/fs/coda/file.c index 148856a582a9..2e6ea9319b35 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c @@ -199,10 +199,10 @@ coda_file_mmap(struct file *coda_file, struct vm_area_struct *vma) spin_unlock(&cii->c_lock); vma->vm_file = get_file(host_file); - ret = call_mmap(vma->vm_file, vma); + ret = vfs_mmap(vma->vm_file, vma); if (ret) { - /* if call_mmap fails, our caller will put host_file so we + /* if vfs_mmap fails, our caller will put host_file so we * should drop the reference to the coda_file that we got. */ fput(coda_file); diff --git a/include/linux/fs.h b/include/linux/fs.h index 4ec77da65f14..c66f235f9e4d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2276,7 +2276,7 @@ static inline bool file_has_valid_mmap_hooks(struct file *file) int compat_vma_mmap_prepare(struct file *file, struct vm_area_struct *vma); -static inline int call_mmap(struct file *file, struct vm_area_struct *vma) +static inline int vfs_mmap(struct file *file, struct vm_area_struct *vma) { if (file->f_op->mmap_prepare) return compat_vma_mmap_prepare(file, vma); @@ -2284,8 +2284,7 @@ static inline int call_mmap(struct file *file, struct vm_area_struct *vma) return file->f_op->mmap(file, vma); } -static inline int __call_mmap_prepare(struct file *file, - struct vm_area_desc *desc) +static inline int vfs_mmap_prepare(struct file *file, struct vm_area_desc *desc) { return file->f_op->mmap_prepare(desc); } diff --git a/ipc/shm.c b/ipc/shm.c index 492fcc699985..a9310b6dbbc3 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -602,7 +602,7 @@ static int shm_mmap(struct file *file, struct vm_area_struct *vma) if (ret) return ret; - ret = call_mmap(sfd->file, vma); + ret = vfs_mmap(sfd->file, vma); if (ret) { __shm_close(sfd); return ret; diff --git a/mm/internal.h b/mm/internal.h index 6b8ed2017743..0f73ff13c212 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -164,7 +164,7 @@ static inline void *folio_raw_mapping(const struct folio *folio) */ static inline int mmap_file(struct file *file, struct vm_area_struct *vma) { - int err = call_mmap(file, vma); + int err = vfs_mmap(file, vma); if (likely(!err)) return 0; diff --git a/mm/vma.c b/mm/vma.c index fef67a66a095..535b138e26c1 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -2569,7 +2569,7 @@ static int call_mmap_prepare(struct mmap_state *map) }; /* Invoke the hook. */ - err = __call_mmap_prepare(map->file, &desc); + err = vfs_mmap_prepare(map->file, &desc); if (err) return err; diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index 14718ca23a05..7ab04700470f 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -1442,6 +1442,27 @@ static inline void free_anon_vma_name(struct vm_area_struct *vma) (void)vma; } +/* Declared in vma.h. */ +static inline void set_vma_from_desc(struct vm_area_struct *vma, + struct vm_area_desc *desc); + +static inline struct vm_area_desc *vma_to_desc(struct vm_area_struct *vma, + struct vm_area_desc *desc); + +static int compat_vma_mmap_prepare(struct file *file, + struct vm_area_struct *vma) +{ + struct vm_area_desc desc; + int err; + + err = file->f_op->mmap_prepare(vma_to_desc(vma, &desc)); + if (err) + return err; + set_vma_from_desc(vma, &desc); + + return 0; +} + /* Did the driver provide valid mmap hook configuration? */ static inline bool file_has_valid_mmap_hooks(struct file *file) { @@ -1451,22 +1472,21 @@ static inline bool file_has_valid_mmap_hooks(struct file *file) /* Hooks are mutually exclusive. */ if (WARN_ON_ONCE(has_mmap && has_mmap_prepare)) return false; - if (WARN_ON_ONCE(!has_mmap && !has_mmap_prepare)) + if (!has_mmap && !has_mmap_prepare) return false; return true; } -static inline int call_mmap(struct file *file, struct vm_area_struct *vma) +static inline int vfs_mmap(struct file *file, struct vm_area_struct *vma) { - if (WARN_ON_ONCE(file->f_op->mmap_prepare)) - return -EINVAL; + if (file->f_op->mmap_prepare) + return compat_vma_mmap_prepare(file, vma); return file->f_op->mmap(file, vma); } -static inline int __call_mmap_prepare(struct file *file, - struct vm_area_desc *desc) +static inline int vfs_mmap_prepare(struct file *file, struct vm_area_desc *desc) { return file->f_op->mmap_prepare(desc); } From c6900f227f892b36f9c820e60953fe01a4c1b6fa Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 16 Jun 2025 20:33:21 +0100 Subject: [PATCH 02/11] mm/nommu: use file_has_valid_mmap_hooks() helper Since commit c84bf6dd2b83 ("mm: introduce new .mmap_prepare() file callback"), the f_op->mmap() hook has been deprecated in favour of f_op->mmap_prepare(). Therefore, update the check for file operations supporting mmap() by using the file_has_valid_mmap_hooks() helper function, which checks for either f_op->mmap or f_op->mmap_prepare rather than checking only for f_op->mmap directly. Signed-off-by: Lorenzo Stoakes Link: https://lore.kernel.org/5f120b644b5890d1b50202d0f0d4c9f0d6b62873.1750099179.git.lorenzo.stoakes@oracle.com Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- mm/nommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/nommu.c b/mm/nommu.c index b624acec6d2e..38c22ea0a95c 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -719,7 +719,7 @@ static int validate_mmap_request(struct file *file, if (file) { /* files must support mmap */ - if (!file->f_op->mmap) + if (!file_has_valid_mmap_hooks(file)) return -ENODEV; /* work out if what we've got could possibly be shared From b013ed403197f3f8c30ddb3ce66fe05a632b3493 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 16 Jun 2025 20:33:22 +0100 Subject: [PATCH 03/11] fs: consistently use can_mmap_file() helper Since commit c84bf6dd2b83 ("mm: introduce new .mmap_prepare() file callback"), the f_op->mmap() hook has been deprecated in favour of f_op->mmap_prepare(). Additionally, commit bb666b7c2707 ("mm: add mmap_prepare() compatibility layer for nested file systems") permits the use of the .mmap_prepare() hook even in nested filesystems like overlayfs. There are a number of places where we check only for f_op->mmap - this is incorrect now mmap_prepare exists, so update all of these to use the general helper can_mmap_file(). Most notably, this updates the elf logic to allow for the ability to execute binaries on filesystems which have the .mmap_prepare hook, but additionally we update nested filesystems. Signed-off-by: Lorenzo Stoakes Link: https://lore.kernel.org/b68145b609532e62bab603dd9686faa6562046ec.1750099179.git.lorenzo.stoakes@oracle.com Acked-by: Kees Cook Signed-off-by: Christian Brauner --- fs/backing-file.c | 2 +- fs/binfmt_elf.c | 4 ++-- fs/binfmt_elf_fdpic.c | 2 +- fs/coda/file.c | 2 +- fs/ecryptfs/file.c | 2 +- include/linux/fs.h | 2 +- mm/mmap.c | 2 +- mm/nommu.c | 2 +- tools/testing/vma/vma_internal.h | 2 +- 9 files changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/backing-file.c b/fs/backing-file.c index 04018679bf69..29748953a851 100644 --- a/fs/backing-file.c +++ b/fs/backing-file.c @@ -333,7 +333,7 @@ int backing_file_mmap(struct file *file, struct vm_area_struct *vma, if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING))) return -EIO; - if (!file->f_op->mmap) + if (!can_mmap_file(file)) return -ENODEV; vma_set_file(vma, file); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index a43363d593e5..e3b56b603192 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -646,7 +646,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, if (!elf_check_arch(interp_elf_ex) || elf_check_fdpic(interp_elf_ex)) goto out; - if (!interpreter->f_op->mmap) + if (!can_mmap_file(interpreter)) goto out; total_size = total_mapping_size(interp_elf_phdata, @@ -848,7 +848,7 @@ static int load_elf_binary(struct linux_binprm *bprm) goto out; if (elf_check_fdpic(elf_ex)) goto out; - if (!bprm->file->f_op->mmap) + if (!can_mmap_file(bprm->file)) goto out; elf_phdata = load_elf_phdrs(elf_ex, bprm->file); diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 9133f3827f90..59b138062352 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -109,7 +109,7 @@ static int is_elf(struct elfhdr *hdr, struct file *file) return 0; if (!elf_check_arch(hdr)) return 0; - if (!file->f_op->mmap) + if (!can_mmap_file(file)) return 0; return 1; } diff --git a/fs/coda/file.c b/fs/coda/file.c index 2e6ea9319b35..a390b5d21196 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c @@ -160,7 +160,7 @@ coda_file_mmap(struct file *coda_file, struct vm_area_struct *vma) size_t count; int ret; - if (!host_file->f_op->mmap) + if (!can_mmap_file(host_file)) return -ENODEV; if (WARN_ON(coda_file != vma->vm_file)) diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index ce0a3c5ed0ca..5f8f96da09fe 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -193,7 +193,7 @@ static int ecryptfs_mmap(struct file *file, struct vm_area_struct *vma) * natively. If FILESYSTEM_MAX_STACK_DEPTH > 2 or ecryptfs * allows recursive mounting, this will need to be extended. */ - if (!lower_file->f_op->mmap) + if (!can_mmap_file(lower_file)) return -ENODEV; return generic_file_mmap(file, vma); } diff --git a/include/linux/fs.h b/include/linux/fs.h index c66f235f9e4d..d4fa1cb0755a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2260,7 +2260,7 @@ struct inode_operations { } ____cacheline_aligned; /* Did the driver provide valid mmap hook configuration? */ -static inline bool file_has_valid_mmap_hooks(struct file *file) +static inline bool can_mmap_file(struct file *file) { bool has_mmap = file->f_op->mmap; bool has_mmap_prepare = file->f_op->mmap_prepare; diff --git a/mm/mmap.c b/mm/mmap.c index 09c563c95112..12c1d060f104 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -475,7 +475,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr, vm_flags &= ~VM_MAYEXEC; } - if (!file_has_valid_mmap_hooks(file)) + if (!can_mmap_file(file)) return -ENODEV; if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP)) return -EINVAL; diff --git a/mm/nommu.c b/mm/nommu.c index 38c22ea0a95c..56a53de10166 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -719,7 +719,7 @@ static int validate_mmap_request(struct file *file, if (file) { /* files must support mmap */ - if (!file_has_valid_mmap_hooks(file)) + if (!can_mmap_file(file)) return -ENODEV; /* work out if what we've got could possibly be shared diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index 7ab04700470f..816e7e057585 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -1464,7 +1464,7 @@ static int compat_vma_mmap_prepare(struct file *file, } /* Did the driver provide valid mmap hook configuration? */ -static inline bool file_has_valid_mmap_hooks(struct file *file) +static inline bool can_mmap_file(struct file *file) { bool has_mmap = file->f_op->mmap; bool has_mmap_prepare = file->f_op->mmap_prepare; From 0335f6afd3488d1101f3b15014095fa51b978253 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 16 Jun 2025 20:33:23 +0100 Subject: [PATCH 04/11] fs/dax: make it possible to check dev dax support without a VMA This is a prerequisite for adapting those filesystems to use the .mmap_prepare() hook for mmap()'ing which invoke this check as this hook does not have access to a VMA pointer. To effect this, change the signature of daxdev_mapping_supported() and update its callers (ext4 and xfs mmap()'ing hook code). Signed-off-by: Lorenzo Stoakes Link: https://lore.kernel.org/b09de1e8544384074165d92d048e80058d971286.1750099179.git.lorenzo.stoakes@oracle.com Signed-off-by: Christian Brauner --- fs/ext4/file.c | 2 +- fs/xfs/xfs_file.c | 3 ++- include/linux/dax.h | 16 +++++++++------- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 21df81347147..5b8b95936a4c 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -821,7 +821,7 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) * We don't support synchronous mappings for non-DAX files and * for DAX files if underneath dax_device is not synchronous. */ - if (!daxdev_mapping_supported(vma, dax_dev)) + if (!daxdev_mapping_supported(vma->vm_flags, file_inode(vma->vm_file), dax_dev)) return -EOPNOTSUPP; file_accessed(file); diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 48254a72071b..1160eba8727f 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1924,7 +1924,8 @@ xfs_file_mmap( * We don't support synchronous mappings for non-DAX files and * for DAX files if underneath dax_device is not synchronous. */ - if (!daxdev_mapping_supported(vma, target->bt_daxdev)) + if (!daxdev_mapping_supported(vma->vm_flags, file_inode(vma->vm_file), + target->bt_daxdev)) return -EOPNOTSUPP; file_accessed(file); diff --git a/include/linux/dax.h b/include/linux/dax.h index dcc9fcdf14e4..78891518291d 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -65,12 +65,13 @@ size_t dax_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff, /* * Check if given mapping is supported by the file / underlying device. */ -static inline bool daxdev_mapping_supported(struct vm_area_struct *vma, - struct dax_device *dax_dev) +static inline bool daxdev_mapping_supported(vm_flags_t vm_flags, + const struct inode *inode, + struct dax_device *dax_dev) { - if (!(vma->vm_flags & VM_SYNC)) + if (!(vm_flags & VM_SYNC)) return true; - if (!IS_DAX(file_inode(vma->vm_file))) + if (!IS_DAX(inode)) return false; return dax_synchronous(dax_dev); } @@ -110,10 +111,11 @@ static inline void set_dax_nomc(struct dax_device *dax_dev) static inline void set_dax_synchronous(struct dax_device *dax_dev) { } -static inline bool daxdev_mapping_supported(struct vm_area_struct *vma, - struct dax_device *dax_dev) +static inline bool daxdev_mapping_supported(vm_flags_t vm_flags, + const struct inode *inode, + struct dax_device *dax_dev) { - return !(vma->vm_flags & VM_SYNC); + return !(vm_flags & VM_SYNC); } static inline size_t dax_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) From 8c90ae8fe5e34a27c500abdff76111c24c321871 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 16 Jun 2025 20:33:24 +0100 Subject: [PATCH 05/11] fs/ext4: transition from deprecated .mmap hook to .mmap_prepare Since commit c84bf6dd2b83 ("mm: introduce new .mmap_prepare() file callback"), the f_op->mmap() hook has been deprecated in favour of f_op->mmap_prepare(). This callback is invoked in the mmap() logic far earlier, so error handling can be performed more safely without complicated and bug-prone state unwinding required should an error arise. This hook also avoids passing a pointer to a not-yet-correctly-established VMA avoiding any issues with referencing this data structure. It rather provides a pointer to the new struct vm_area_desc descriptor type which contains all required state and allows easy setting of required parameters without any consideration needing to be paid to locking or reference counts. Note that nested filesystems like overlayfs are compatible with an .mmap_prepare() callback since commit bb666b7c2707 ("mm: add mmap_prepare() compatibility layer for nested file systems"). Signed-off-by: Lorenzo Stoakes Link: https://lore.kernel.org/5abfe526032a6698fd1bcd074a74165cda7ea57c.1750099179.git.lorenzo.stoakes@oracle.com Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- fs/ext4/file.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 5b8b95936a4c..992b4630eabc 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -804,9 +804,10 @@ static const struct vm_operations_struct ext4_file_vm_ops = { .page_mkwrite = ext4_page_mkwrite, }; -static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) +static int ext4_file_mmap_prepare(struct vm_area_desc *desc) { int ret; + struct file *file = desc->file; struct inode *inode = file->f_mapping->host; struct dax_device *dax_dev = EXT4_SB(inode->i_sb)->s_daxdev; @@ -821,15 +822,15 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) * We don't support synchronous mappings for non-DAX files and * for DAX files if underneath dax_device is not synchronous. */ - if (!daxdev_mapping_supported(vma->vm_flags, file_inode(vma->vm_file), dax_dev)) + if (!daxdev_mapping_supported(desc->vm_flags, file_inode(file), dax_dev)) return -EOPNOTSUPP; file_accessed(file); if (IS_DAX(file_inode(file))) { - vma->vm_ops = &ext4_dax_vm_ops; - vm_flags_set(vma, VM_HUGEPAGE); + desc->vm_ops = &ext4_dax_vm_ops; + desc->vm_flags |= VM_HUGEPAGE; } else { - vma->vm_ops = &ext4_file_vm_ops; + desc->vm_ops = &ext4_file_vm_ops; } return 0; } @@ -968,7 +969,7 @@ const struct file_operations ext4_file_operations = { #ifdef CONFIG_COMPAT .compat_ioctl = ext4_compat_ioctl, #endif - .mmap = ext4_file_mmap, + .mmap_prepare = ext4_file_mmap_prepare, .open = ext4_file_open, .release = ext4_release_file, .fsync = ext4_sync_file, From 6528d29b46d8835f0e7b8b66d052ffbaaf7d5d2d Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 16 Jun 2025 20:33:25 +0100 Subject: [PATCH 06/11] fs/xfs: transition from deprecated .mmap hook to .mmap_prepare Since commit c84bf6dd2b83 ("mm: introduce new .mmap_prepare() file callback"), the f_op->mmap() hook has been deprecated in favour of f_op->mmap_prepare(). This callback is invoked in the mmap() logic far earlier, so error handling can be performed more safely without complicated and bug-prone state unwinding required should an error arise. This hook also avoids passing a pointer to a not-yet-correctly-established VMA avoiding any issues with referencing this data structure. It rather provides a pointer to the new struct vm_area_desc descriptor type which contains all required state and allows easy setting of required parameters without any consideration needing to be paid to locking or reference counts. Note that nested filesystems like overlayfs are compatible with an .mmap_prepare() callback since commit bb666b7c2707 ("mm: add mmap_prepare() compatibility layer for nested file systems"). Signed-off-by: Lorenzo Stoakes Link: https://lore.kernel.org/cba8b29ba5f225df8f63f50182d5f6e0fcf94456.1750099179.git.lorenzo.stoakes@oracle.com Signed-off-by: Christian Brauner --- fs/xfs/xfs_file.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 1160eba8727f..afdc7fe83e44 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1913,10 +1913,10 @@ static const struct vm_operations_struct xfs_file_vm_ops = { }; STATIC int -xfs_file_mmap( - struct file *file, - struct vm_area_struct *vma) +xfs_file_mmap_prepare( + struct vm_area_desc *desc) { + struct file *file = desc->file; struct inode *inode = file_inode(file); struct xfs_buftarg *target = xfs_inode_buftarg(XFS_I(inode)); @@ -1924,14 +1924,14 @@ xfs_file_mmap( * We don't support synchronous mappings for non-DAX files and * for DAX files if underneath dax_device is not synchronous. */ - if (!daxdev_mapping_supported(vma->vm_flags, file_inode(vma->vm_file), + if (!daxdev_mapping_supported(desc->vm_flags, file_inode(file), target->bt_daxdev)) return -EOPNOTSUPP; file_accessed(file); - vma->vm_ops = &xfs_file_vm_ops; + desc->vm_ops = &xfs_file_vm_ops; if (IS_DAX(inode)) - vm_flags_set(vma, VM_HUGEPAGE); + desc->vm_flags |= VM_HUGEPAGE; return 0; } @@ -1946,7 +1946,7 @@ const struct file_operations xfs_file_operations = { #ifdef CONFIG_COMPAT .compat_ioctl = xfs_file_compat_ioctl, #endif - .mmap = xfs_file_mmap, + .mmap_prepare = xfs_file_mmap_prepare, .open = xfs_file_open, .release = xfs_file_release, .fsync = xfs_file_fsync, From 5b44297bcfa49ee197cdb8ca6164bef120c4e73c Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 16 Jun 2025 20:33:26 +0100 Subject: [PATCH 07/11] mm/filemap: introduce generic_file_*_mmap_prepare() helpers Since commit c84bf6dd2b83 ("mm: introduce new .mmap_prepare() file callback"), the f_op->mmap() hook has been deprecated in favour of f_op->mmap_prepare(). The generic mmap handlers are very simple, so we can very easily convert these in advance of converting file systems which use them. This patch does so. Signed-off-by: Lorenzo Stoakes Link: https://lore.kernel.org/30622c1f0b98c66840bc8c02668bda276a810b70.1750099179.git.lorenzo.stoakes@oracle.com Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/fs.h | 6 ++++-- mm/filemap.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index d4fa1cb0755a..fd5e7409489d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3393,8 +3393,10 @@ extern void inode_add_lru(struct inode *inode); extern int sb_set_blocksize(struct super_block *, int); extern int sb_min_blocksize(struct super_block *, int); -extern int generic_file_mmap(struct file *, struct vm_area_struct *); -extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); +int generic_file_mmap(struct file *, struct vm_area_struct *); +int generic_file_mmap_prepare(struct vm_area_desc *desc); +int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); +int generic_file_readonly_mmap_prepare(struct vm_area_desc *desc); extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *); int generic_write_checks_count(struct kiocb *iocb, loff_t *count); extern int generic_write_check_limits(struct file *file, loff_t pos, diff --git a/mm/filemap.c b/mm/filemap.c index bada249b9fb7..77e1bac30490 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3814,6 +3814,18 @@ int generic_file_mmap(struct file *file, struct vm_area_struct *vma) return 0; } +int generic_file_mmap_prepare(struct vm_area_desc *desc) +{ + struct file *file = desc->file; + struct address_space *mapping = file->f_mapping; + + if (!mapping->a_ops->read_folio) + return -ENOEXEC; + file_accessed(file); + desc->vm_ops = &generic_file_vm_ops; + return 0; +} + /* * This is for filesystems which do not implement ->writepage. */ @@ -3823,6 +3835,13 @@ int generic_file_readonly_mmap(struct file *file, struct vm_area_struct *vma) return -EINVAL; return generic_file_mmap(file, vma); } + +int generic_file_readonly_mmap_prepare(struct vm_area_desc *desc) +{ + if (is_shared_maywrite(desc->vm_flags)) + return -EINVAL; + return generic_file_mmap_prepare(desc); +} #else vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf) { @@ -3832,15 +3851,25 @@ int generic_file_mmap(struct file *file, struct vm_area_struct *vma) { return -ENOSYS; } +int generic_file_mmap_prepare(struct vm_area_desc *desc) +{ + return -ENOSYS; +} int generic_file_readonly_mmap(struct file *file, struct vm_area_struct *vma) { return -ENOSYS; } +int generic_file_readonly_mmap_prepare(struct vm_area_desc *desc) +{ + return -ENOSYS; +} #endif /* CONFIG_MMU */ EXPORT_SYMBOL(filemap_page_mkwrite); EXPORT_SYMBOL(generic_file_mmap); +EXPORT_SYMBOL(generic_file_mmap_prepare); EXPORT_SYMBOL(generic_file_readonly_mmap); +EXPORT_SYMBOL(generic_file_readonly_mmap_prepare); static struct folio *do_read_cache_folio(struct address_space *mapping, pgoff_t index, filler_t filler, struct file *file, gfp_t gfp) From 951ea2f4844c22833f8c3201103c7ed817e7e377 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 16 Jun 2025 20:33:27 +0100 Subject: [PATCH 08/11] fs: convert simple use of generic_file_*_mmap() to .mmap_prepare() Since commit c84bf6dd2b83 ("mm: introduce new .mmap_prepare() file callback"), the f_op->mmap() hook has been deprecated in favour of f_op->mmap_prepare(). We have provided generic .mmap_prepare() equivalents, so update all file systems that specify these directly in their file_operations structures. This updates 9p, adfs, affs, bfs, fat, hfs, hfsplus, hostfs, hpfs, jffs2, jfs, minix, omfs, ramfs and ufs file systems directly. It updates generic_ro_fops which impacts qnx4, cramfs, befs, squashfs, frebxfs, qnx6, efs, romfs, erofs and isofs file systems. There are remaining file systems which use generic hooks in a less direct way which we address in a subsequent commit. Signed-off-by: Lorenzo Stoakes Link: https://lore.kernel.org/c7dc90e44a9e75e750939ea369290d6e441a18e6.1750099179.git.lorenzo.stoakes@oracle.com Reviewed-by: Jan Kara Reviewed-by: Viacheslav Dubeyko Signed-off-by: Christian Brauner --- fs/9p/vfs_file.c | 2 +- fs/adfs/file.c | 2 +- fs/affs/file.c | 2 +- fs/bfs/file.c | 2 +- fs/fat/file.c | 2 +- fs/hfs/inode.c | 2 +- fs/hfsplus/inode.c | 2 +- fs/hostfs/hostfs_kern.c | 2 +- fs/hpfs/file.c | 2 +- fs/jffs2/file.c | 2 +- fs/jfs/file.c | 2 +- fs/minix/file.c | 2 +- fs/omfs/file.c | 2 +- fs/ramfs/file-mmu.c | 2 +- fs/read_write.c | 2 +- fs/ufs/file.c | 2 +- 16 files changed, 16 insertions(+), 16 deletions(-) diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 348cc90bf9c5..2ff3e0ac7266 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -516,7 +516,7 @@ const struct file_operations v9fs_file_operations = { .open = v9fs_file_open, .release = v9fs_dir_release, .lock = v9fs_file_lock, - .mmap = generic_file_readonly_mmap, + .mmap_prepare = generic_file_readonly_mmap_prepare, .splice_read = v9fs_file_splice_read, .splice_write = iter_file_splice_write, .fsync = v9fs_file_fsync, diff --git a/fs/adfs/file.c b/fs/adfs/file.c index ee80718aaeec..cd13165fd904 100644 --- a/fs/adfs/file.c +++ b/fs/adfs/file.c @@ -25,7 +25,7 @@ const struct file_operations adfs_file_operations = { .llseek = generic_file_llseek, .read_iter = generic_file_read_iter, - .mmap = generic_file_mmap, + .mmap_prepare = generic_file_mmap_prepare, .fsync = generic_file_fsync, .write_iter = generic_file_write_iter, .splice_read = filemap_splice_read, diff --git a/fs/affs/file.c b/fs/affs/file.c index 7a71018e3f67..fbac204b7055 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -999,7 +999,7 @@ const struct file_operations affs_file_operations = { .llseek = generic_file_llseek, .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, - .mmap = generic_file_mmap, + .mmap_prepare = generic_file_mmap_prepare, .open = affs_file_open, .release = affs_file_release, .fsync = affs_file_fsync, diff --git a/fs/bfs/file.c b/fs/bfs/file.c index fa66a09e496a..6685c3411fe7 100644 --- a/fs/bfs/file.c +++ b/fs/bfs/file.c @@ -27,7 +27,7 @@ const struct file_operations bfs_file_operations = { .llseek = generic_file_llseek, .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, - .mmap = generic_file_mmap, + .mmap_prepare = generic_file_mmap_prepare, .splice_read = filemap_splice_read, }; diff --git a/fs/fat/file.c b/fs/fat/file.c index e887e9ab7472..4fc49a614fb8 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -204,7 +204,7 @@ const struct file_operations fat_file_operations = { .llseek = generic_file_llseek, .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, - .mmap = generic_file_mmap, + .mmap_prepare = generic_file_mmap_prepare, .release = fat_file_release, .unlocked_ioctl = fat_generic_ioctl, .compat_ioctl = compat_ptr_ioctl, diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index a81ce7a740b9..d419586d668d 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -690,7 +690,7 @@ static const struct file_operations hfs_file_operations = { .llseek = generic_file_llseek, .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, - .mmap = generic_file_mmap, + .mmap_prepare = generic_file_mmap_prepare, .splice_read = filemap_splice_read, .fsync = hfs_file_fsync, .open = hfs_file_open, diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index f331e9574217..0af7e302730c 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -366,7 +366,7 @@ static const struct file_operations hfsplus_file_operations = { .llseek = generic_file_llseek, .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, - .mmap = generic_file_mmap, + .mmap_prepare = generic_file_mmap_prepare, .splice_read = filemap_splice_read, .fsync = hfsplus_file_fsync, .open = hfsplus_file_open, diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 702c41317589..bc22b6cc72af 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -382,7 +382,7 @@ static const struct file_operations hostfs_file_fops = { .splice_write = iter_file_splice_write, .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, - .mmap = generic_file_mmap, + .mmap_prepare = generic_file_mmap_prepare, .open = hostfs_open, .release = hostfs_file_release, .fsync = hostfs_fsync, diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index 449a3fc1b8d9..a1a44e3edb19 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c @@ -255,7 +255,7 @@ const struct file_operations hpfs_file_ops = .llseek = generic_file_llseek, .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, - .mmap = generic_file_mmap, + .mmap_prepare = generic_file_mmap_prepare, .release = hpfs_file_release, .fsync = hpfs_file_fsync, .splice_read = filemap_splice_read, diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index 13c18ccc13b0..1e05f7fe5dd4 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c @@ -54,7 +54,7 @@ const struct file_operations jffs2_file_operations = .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, .unlocked_ioctl=jffs2_ioctl, - .mmap = generic_file_readonly_mmap, + .mmap_prepare = generic_file_readonly_mmap_prepare, .fsync = jffs2_fsync, .splice_read = filemap_splice_read, .splice_write = iter_file_splice_write, diff --git a/fs/jfs/file.c b/fs/jfs/file.c index 01b6912e60f8..5e47951db630 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c @@ -143,7 +143,7 @@ const struct file_operations jfs_file_operations = { .llseek = generic_file_llseek, .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, - .mmap = generic_file_mmap, + .mmap_prepare = generic_file_mmap_prepare, .splice_read = filemap_splice_read, .splice_write = iter_file_splice_write, .fsync = jfs_fsync, diff --git a/fs/minix/file.c b/fs/minix/file.c index 906d192ab7f3..dca7ac71f049 100644 --- a/fs/minix/file.c +++ b/fs/minix/file.c @@ -17,7 +17,7 @@ const struct file_operations minix_file_operations = { .llseek = generic_file_llseek, .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, - .mmap = generic_file_mmap, + .mmap_prepare = generic_file_mmap_prepare, .fsync = generic_file_fsync, .splice_read = filemap_splice_read, }; diff --git a/fs/omfs/file.c b/fs/omfs/file.c index 98358d405b6a..319c04e63964 100644 --- a/fs/omfs/file.c +++ b/fs/omfs/file.c @@ -332,7 +332,7 @@ const struct file_operations omfs_file_operations = { .llseek = generic_file_llseek, .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, - .mmap = generic_file_mmap, + .mmap_prepare = generic_file_mmap_prepare, .fsync = generic_file_fsync, .splice_read = filemap_splice_read, }; diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c index b45c7edc3225..b11f5b20b78b 100644 --- a/fs/ramfs/file-mmu.c +++ b/fs/ramfs/file-mmu.c @@ -41,7 +41,7 @@ static unsigned long ramfs_mmu_get_unmapped_area(struct file *file, const struct file_operations ramfs_file_operations = { .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, - .mmap = generic_file_mmap, + .mmap_prepare = generic_file_mmap_prepare, .fsync = noop_fsync, .splice_read = filemap_splice_read, .splice_write = iter_file_splice_write, diff --git a/fs/read_write.c b/fs/read_write.c index 0ef70e128c4a..80fdab99f9e4 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -28,7 +28,7 @@ const struct file_operations generic_ro_fops = { .llseek = generic_file_llseek, .read_iter = generic_file_read_iter, - .mmap = generic_file_readonly_mmap, + .mmap_prepare = generic_file_readonly_mmap_prepare, .splice_read = filemap_splice_read, }; diff --git a/fs/ufs/file.c b/fs/ufs/file.c index 487ad1fc2de6..c2a391c17df7 100644 --- a/fs/ufs/file.c +++ b/fs/ufs/file.c @@ -38,7 +38,7 @@ const struct file_operations ufs_file_operations = { .llseek = generic_file_llseek, .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, - .mmap = generic_file_mmap, + .mmap_prepare = generic_file_mmap_prepare, .open = generic_file_open, .fsync = generic_file_fsync, .splice_read = filemap_splice_read, From 9d5403b1036cdcd4be0f9f5568612c0e60e73d79 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 16 Jun 2025 20:33:28 +0100 Subject: [PATCH 09/11] fs: convert most other generic_file_*mmap() users to .mmap_prepare() Update nearly all generic_file_mmap() and generic_file_readonly_mmap() callers to use generic_file_mmap_prepare() and generic_file_readonly_mmap_prepare() respectively. We update blkdev, 9p, afs, erofs, ext2, nfs, ntfs3, smb, ubifs and vboxsf file systems this way. Remaining users we cannot yet update are ecryptfs, fuse and cramfs. The former two are nested file systems that must support any underlying file ssytem, and cramfs inserts a mixed mapping which currently requires a VMA. Once all file systems have been converted to mmap_prepare(), we can then update nested file systems. Signed-off-by: Lorenzo Stoakes Link: https://lore.kernel.org/08db85970d89b17a995d2cffae96fb4cc462377f.1750099179.git.lorenzo.stoakes@oracle.com Signed-off-by: Christian Brauner --- block/fops.c | 12 ++++++------ fs/9p/vfs_file.c | 11 ++++++----- fs/afs/file.c | 12 ++++++------ fs/erofs/data.c | 16 ++++++++-------- fs/ext2/file.c | 12 +++++++----- fs/nfs/file.c | 13 +++++++------ fs/nfs/internal.h | 2 +- fs/nfs/nfs4file.c | 2 +- fs/ntfs3/file.c | 15 ++++++++------- fs/smb/client/cifsfs.c | 12 ++++++------ fs/smb/client/cifsfs.h | 4 ++-- fs/smb/client/file.c | 16 ++++++++-------- fs/ubifs/file.c | 10 +++++----- fs/vboxsf/file.c | 8 ++++---- 14 files changed, 75 insertions(+), 70 deletions(-) diff --git a/block/fops.c b/block/fops.c index 1309861d4c2c..a0bf4274ce12 100644 --- a/block/fops.c +++ b/block/fops.c @@ -911,14 +911,14 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start, return error; } -static int blkdev_mmap(struct file *file, struct vm_area_struct *vma) +static int blkdev_mmap_prepare(struct vm_area_desc *desc) { - struct inode *bd_inode = bdev_file_inode(file); + struct file *file = desc->file; - if (bdev_read_only(I_BDEV(bd_inode))) - return generic_file_readonly_mmap(file, vma); + if (bdev_read_only(I_BDEV(bdev_file_inode(file)))) + return generic_file_readonly_mmap_prepare(desc); - return generic_file_mmap(file, vma); + return generic_file_mmap_prepare(desc); } const struct file_operations def_blk_fops = { @@ -928,7 +928,7 @@ const struct file_operations def_blk_fops = { .read_iter = blkdev_read_iter, .write_iter = blkdev_write_iter, .iopoll = iocb_bio_iopoll, - .mmap = blkdev_mmap, + .mmap_prepare = blkdev_mmap_prepare, .fsync = blkdev_fsync, .unlocked_ioctl = blkdev_ioctl, #ifdef CONFIG_COMPAT diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 2ff3e0ac7266..eb0b083da269 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -454,9 +454,10 @@ int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end, } static int -v9fs_file_mmap(struct file *filp, struct vm_area_struct *vma) +v9fs_file_mmap_prepare(struct vm_area_desc *desc) { int retval; + struct file *filp = desc->file; struct inode *inode = file_inode(filp); struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); @@ -464,12 +465,12 @@ v9fs_file_mmap(struct file *filp, struct vm_area_struct *vma) if (!(v9ses->cache & CACHE_WRITEBACK)) { p9_debug(P9_DEBUG_CACHE, "(read-only mmap mode)"); - return generic_file_readonly_mmap(filp, vma); + return generic_file_readonly_mmap_prepare(desc); } - retval = generic_file_mmap(filp, vma); + retval = generic_file_mmap_prepare(desc); if (!retval) - vma->vm_ops = &v9fs_mmap_file_vm_ops; + desc->vm_ops = &v9fs_mmap_file_vm_ops; return retval; } @@ -531,7 +532,7 @@ const struct file_operations v9fs_file_operations_dotl = { .release = v9fs_dir_release, .lock = v9fs_file_lock_dotl, .flock = v9fs_file_flock_dotl, - .mmap = v9fs_file_mmap, + .mmap_prepare = v9fs_file_mmap_prepare, .splice_read = v9fs_file_splice_read, .splice_write = iter_file_splice_write, .fsync = v9fs_file_fsync_dotl, diff --git a/fs/afs/file.c b/fs/afs/file.c index fc15497608c6..f66a92294284 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -19,7 +19,7 @@ #include #include "internal.h" -static int afs_file_mmap(struct file *file, struct vm_area_struct *vma); +static int afs_file_mmap_prepare(struct vm_area_desc *desc); static ssize_t afs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter); static ssize_t afs_file_splice_read(struct file *in, loff_t *ppos, @@ -35,7 +35,7 @@ const struct file_operations afs_file_operations = { .llseek = generic_file_llseek, .read_iter = afs_file_read_iter, .write_iter = netfs_file_write_iter, - .mmap = afs_file_mmap, + .mmap_prepare = afs_file_mmap_prepare, .splice_read = afs_file_splice_read, .splice_write = iter_file_splice_write, .fsync = afs_fsync, @@ -492,16 +492,16 @@ static void afs_drop_open_mmap(struct afs_vnode *vnode) /* * Handle setting up a memory mapping on an AFS file. */ -static int afs_file_mmap(struct file *file, struct vm_area_struct *vma) +static int afs_file_mmap_prepare(struct vm_area_desc *desc) { - struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); + struct afs_vnode *vnode = AFS_FS_I(file_inode(desc->file)); int ret; afs_add_open_mmap(vnode); - ret = generic_file_mmap(file, vma); + ret = generic_file_mmap_prepare(desc); if (ret == 0) - vma->vm_ops = &afs_vm_ops; + desc->vm_ops = &afs_vm_ops; else afs_drop_open_mmap(vnode); return ret; diff --git a/fs/erofs/data.c b/fs/erofs/data.c index 6a329c329f43..2430588156b0 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -409,20 +409,20 @@ static const struct vm_operations_struct erofs_dax_vm_ops = { .huge_fault = erofs_dax_huge_fault, }; -static int erofs_file_mmap(struct file *file, struct vm_area_struct *vma) +static int erofs_file_mmap_prepare(struct vm_area_desc *desc) { - if (!IS_DAX(file_inode(file))) - return generic_file_readonly_mmap(file, vma); + if (!IS_DAX(file_inode(desc->file))) + return generic_file_readonly_mmap_prepare(desc); - if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) + if ((desc->vm_flags & VM_SHARED) && (desc->vm_flags & VM_MAYWRITE)) return -EINVAL; - vma->vm_ops = &erofs_dax_vm_ops; - vm_flags_set(vma, VM_HUGEPAGE); + desc->vm_ops = &erofs_dax_vm_ops; + desc->vm_flags |= VM_HUGEPAGE; return 0; } #else -#define erofs_file_mmap generic_file_readonly_mmap +#define erofs_file_mmap_prepare generic_file_readonly_mmap_prepare #endif static loff_t erofs_file_llseek(struct file *file, loff_t offset, int whence) @@ -452,7 +452,7 @@ static loff_t erofs_file_llseek(struct file *file, loff_t offset, int whence) const struct file_operations erofs_file_fops = { .llseek = erofs_file_llseek, .read_iter = erofs_file_read_iter, - .mmap = erofs_file_mmap, + .mmap_prepare = erofs_file_mmap_prepare, .get_unmapped_area = thp_get_unmapped_area, .splice_read = filemap_splice_read, }; diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 10b061ac5bc0..76bddce462fc 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -122,17 +122,19 @@ static const struct vm_operations_struct ext2_dax_vm_ops = { .pfn_mkwrite = ext2_dax_fault, }; -static int ext2_file_mmap(struct file *file, struct vm_area_struct *vma) +static int ext2_file_mmap_prepare(struct vm_area_desc *desc) { + struct file *file = desc->file; + if (!IS_DAX(file_inode(file))) - return generic_file_mmap(file, vma); + return generic_file_mmap_prepare(desc); file_accessed(file); - vma->vm_ops = &ext2_dax_vm_ops; + desc->vm_ops = &ext2_dax_vm_ops; return 0; } #else -#define ext2_file_mmap generic_file_mmap +#define ext2_file_mmap_prepare generic_file_mmap_prepare #endif /* @@ -316,7 +318,7 @@ const struct file_operations ext2_file_operations = { #ifdef CONFIG_COMPAT .compat_ioctl = ext2_compat_ioctl, #endif - .mmap = ext2_file_mmap, + .mmap_prepare = ext2_file_mmap_prepare, .open = ext2_file_open, .release = ext2_release_file, .fsync = ext2_fsync, diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 033feeab8c34..b51b75cf981d 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -207,24 +207,25 @@ nfs_file_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe EXPORT_SYMBOL_GPL(nfs_file_splice_read); int -nfs_file_mmap(struct file *file, struct vm_area_struct *vma) +nfs_file_mmap_prepare(struct vm_area_desc *desc) { + struct file *file = desc->file; struct inode *inode = file_inode(file); int status; dprintk("NFS: mmap(%pD2)\n", file); - /* Note: generic_file_mmap() returns ENOSYS on nommu systems + /* Note: generic_file_mmap_prepare() returns ENOSYS on nommu systems * so we call that before revalidating the mapping */ - status = generic_file_mmap(file, vma); + status = generic_file_mmap_prepare(desc); if (!status) { - vma->vm_ops = &nfs_file_vm_ops; + desc->vm_ops = &nfs_file_vm_ops; status = nfs_revalidate_mapping(inode, file->f_mapping); } return status; } -EXPORT_SYMBOL_GPL(nfs_file_mmap); +EXPORT_SYMBOL_GPL(nfs_file_mmap_prepare); /* * Flush any dirty pages for this process, and check for write errors. @@ -899,7 +900,7 @@ const struct file_operations nfs_file_operations = { .llseek = nfs_file_llseek, .read_iter = nfs_file_read, .write_iter = nfs_file_write, - .mmap = nfs_file_mmap, + .mmap_prepare = nfs_file_mmap_prepare, .open = nfs_file_open, .flush = nfs_file_flush, .release = nfs_file_release, diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 69c2c10ee658..26551ff09a52 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -432,7 +432,7 @@ loff_t nfs_file_llseek(struct file *, loff_t, int); ssize_t nfs_file_read(struct kiocb *, struct iov_iter *); ssize_t nfs_file_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); -int nfs_file_mmap(struct file *, struct vm_area_struct *); +int nfs_file_mmap_prepare(struct vm_area_desc *); ssize_t nfs_file_write(struct kiocb *, struct iov_iter *); int nfs_file_release(struct inode *, struct file *); int nfs_lock(struct file *, int, struct file_lock *); diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 5e9d66f3466c..5c749b6117bb 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -456,7 +456,7 @@ static int nfs4_setlease(struct file *file, int arg, struct file_lease **lease, const struct file_operations nfs4_file_operations = { .read_iter = nfs_file_read, .write_iter = nfs_file_write, - .mmap = nfs_file_mmap, + .mmap_prepare = nfs_file_mmap_prepare, .open = nfs4_file_open, .flush = nfs4_file_flush, .release = nfs_file_release, diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index 1e99a35691cd..7f2ec1c7106c 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -261,14 +261,15 @@ out: } /* - * ntfs_file_mmap - file_operations::mmap + * ntfs_file_mmap_prepare - file_operations::mmap_prepare */ -static int ntfs_file_mmap(struct file *file, struct vm_area_struct *vma) +static int ntfs_file_mmap_prepare(struct vm_area_desc *desc) { + struct file *file = desc->file; struct inode *inode = file_inode(file); struct ntfs_inode *ni = ntfs_i(inode); - u64 from = ((u64)vma->vm_pgoff << PAGE_SHIFT); - bool rw = vma->vm_flags & VM_WRITE; + u64 from = ((u64)desc->pgoff << PAGE_SHIFT); + bool rw = desc->vm_flags & VM_WRITE; int err; if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) @@ -291,7 +292,7 @@ static int ntfs_file_mmap(struct file *file, struct vm_area_struct *vma) if (rw) { u64 to = min_t(loff_t, i_size_read(inode), - from + vma->vm_end - vma->vm_start); + from + desc->end - desc->start); if (is_sparsed(ni)) { /* Allocate clusters for rw map. */ @@ -319,7 +320,7 @@ static int ntfs_file_mmap(struct file *file, struct vm_area_struct *vma) } } - err = generic_file_mmap(file, vma); + err = generic_file_mmap_prepare(desc); out: return err; } @@ -1331,7 +1332,7 @@ const struct file_operations ntfs_file_operations = { #endif .splice_read = ntfs_file_splice_read, .splice_write = ntfs_file_splice_write, - .mmap = ntfs_file_mmap, + .mmap_prepare = ntfs_file_mmap_prepare, .open = ntfs_file_open, .fsync = generic_file_fsync, .fallocate = ntfs_fallocate, diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index 0a5266ecfd15..d1e6b5cf7d99 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -1525,7 +1525,7 @@ const struct file_operations cifs_file_ops = { .flock = cifs_flock, .fsync = cifs_fsync, .flush = cifs_flush, - .mmap = cifs_file_mmap, + .mmap_prepare = cifs_file_mmap_prepare, .splice_read = filemap_splice_read, .splice_write = iter_file_splice_write, .llseek = cifs_llseek, @@ -1545,7 +1545,7 @@ const struct file_operations cifs_file_strict_ops = { .flock = cifs_flock, .fsync = cifs_strict_fsync, .flush = cifs_flush, - .mmap = cifs_file_strict_mmap, + .mmap_prepare = cifs_file_strict_mmap_prepare, .splice_read = filemap_splice_read, .splice_write = iter_file_splice_write, .llseek = cifs_llseek, @@ -1565,7 +1565,7 @@ const struct file_operations cifs_file_direct_ops = { .flock = cifs_flock, .fsync = cifs_fsync, .flush = cifs_flush, - .mmap = cifs_file_mmap, + .mmap_prepare = cifs_file_mmap_prepare, .splice_read = copy_splice_read, .splice_write = iter_file_splice_write, .unlocked_ioctl = cifs_ioctl, @@ -1583,7 +1583,7 @@ const struct file_operations cifs_file_nobrl_ops = { .release = cifs_close, .fsync = cifs_fsync, .flush = cifs_flush, - .mmap = cifs_file_mmap, + .mmap_prepare = cifs_file_mmap_prepare, .splice_read = filemap_splice_read, .splice_write = iter_file_splice_write, .llseek = cifs_llseek, @@ -1601,7 +1601,7 @@ const struct file_operations cifs_file_strict_nobrl_ops = { .release = cifs_close, .fsync = cifs_strict_fsync, .flush = cifs_flush, - .mmap = cifs_file_strict_mmap, + .mmap_prepare = cifs_file_strict_mmap_prepare, .splice_read = filemap_splice_read, .splice_write = iter_file_splice_write, .llseek = cifs_llseek, @@ -1619,7 +1619,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = { .release = cifs_close, .fsync = cifs_fsync, .flush = cifs_flush, - .mmap = cifs_file_mmap, + .mmap_prepare = cifs_file_mmap_prepare, .splice_read = copy_splice_read, .splice_write = iter_file_splice_write, .unlocked_ioctl = cifs_ioctl, diff --git a/fs/smb/client/cifsfs.h b/fs/smb/client/cifsfs.h index b9ec9fe16a98..487f39cff77e 100644 --- a/fs/smb/client/cifsfs.h +++ b/fs/smb/client/cifsfs.h @@ -103,8 +103,8 @@ extern int cifs_lock(struct file *, int, struct file_lock *); extern int cifs_fsync(struct file *, loff_t, loff_t, int); extern int cifs_strict_fsync(struct file *, loff_t, loff_t, int); extern int cifs_flush(struct file *, fl_owner_t id); -extern int cifs_file_mmap(struct file *file, struct vm_area_struct *vma); -extern int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma); +int cifs_file_mmap_prepare(struct vm_area_desc *desc); +int cifs_file_strict_mmap_prepare(struct vm_area_desc *desc); extern const struct file_operations cifs_dir_ops; extern int cifs_readdir(struct file *file, struct dir_context *ctx); diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 9835672267d2..3778d5099f8d 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -2995,38 +2995,38 @@ static const struct vm_operations_struct cifs_file_vm_ops = { .page_mkwrite = cifs_page_mkwrite, }; -int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma) +int cifs_file_strict_mmap_prepare(struct vm_area_desc *desc) { int xid, rc = 0; - struct inode *inode = file_inode(file); + struct inode *inode = file_inode(desc->file); xid = get_xid(); if (!CIFS_CACHE_READ(CIFS_I(inode))) rc = cifs_zap_mapping(inode); if (!rc) - rc = generic_file_mmap(file, vma); + rc = generic_file_mmap_prepare(desc); if (!rc) - vma->vm_ops = &cifs_file_vm_ops; + desc->vm_ops = &cifs_file_vm_ops; free_xid(xid); return rc; } -int cifs_file_mmap(struct file *file, struct vm_area_struct *vma) +int cifs_file_mmap_prepare(struct vm_area_desc *desc) { int rc, xid; xid = get_xid(); - rc = cifs_revalidate_file(file); + rc = cifs_revalidate_file(desc->file); if (rc) cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n", rc); if (!rc) - rc = generic_file_mmap(file, vma); + rc = generic_file_mmap_prepare(desc); if (!rc) - vma->vm_ops = &cifs_file_vm_ops; + desc->vm_ops = &cifs_file_vm_ops; free_xid(xid); return rc; diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index bf311c38d9a8..e8121960b829 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1579,17 +1579,17 @@ static const struct vm_operations_struct ubifs_file_vm_ops = { .page_mkwrite = ubifs_vm_page_mkwrite, }; -static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma) +static int ubifs_file_mmap_prepare(struct vm_area_desc *desc) { int err; - err = generic_file_mmap(file, vma); + err = generic_file_mmap_prepare(desc); if (err) return err; - vma->vm_ops = &ubifs_file_vm_ops; + desc->vm_ops = &ubifs_file_vm_ops; if (IS_ENABLED(CONFIG_UBIFS_ATIME_SUPPORT)) - file_accessed(file); + file_accessed(desc->file); return 0; } @@ -1652,7 +1652,7 @@ const struct file_operations ubifs_file_operations = { .llseek = generic_file_llseek, .read_iter = generic_file_read_iter, .write_iter = ubifs_write_iter, - .mmap = ubifs_file_mmap, + .mmap_prepare = ubifs_file_mmap_prepare, .fsync = ubifs_fsync, .unlocked_ioctl = ubifs_ioctl, .splice_read = filemap_splice_read, diff --git a/fs/vboxsf/file.c b/fs/vboxsf/file.c index b492794f8e9a..82afb9430033 100644 --- a/fs/vboxsf/file.c +++ b/fs/vboxsf/file.c @@ -165,13 +165,13 @@ static const struct vm_operations_struct vboxsf_file_vm_ops = { .map_pages = filemap_map_pages, }; -static int vboxsf_file_mmap(struct file *file, struct vm_area_struct *vma) +static int vboxsf_file_mmap_prepare(struct vm_area_desc *desc) { int err; - err = generic_file_mmap(file, vma); + err = generic_file_mmap_prepare(desc); if (!err) - vma->vm_ops = &vboxsf_file_vm_ops; + desc->vm_ops = &vboxsf_file_vm_ops; return err; } @@ -213,7 +213,7 @@ const struct file_operations vboxsf_reg_fops = { .llseek = generic_file_llseek, .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, - .mmap = vboxsf_file_mmap, + .mmap_prepare = vboxsf_file_mmap_prepare, .open = vboxsf_file_open, .release = vboxsf_file_release, .fsync = noop_fsync, From 2e3b37a7e48f8a52fb708cdbeec9d8af0a5af0c1 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 16 Jun 2025 20:33:29 +0100 Subject: [PATCH 10/11] fs: replace mmap hook with .mmap_prepare for simple mappings Since commit c84bf6dd2b83 ("mm: introduce new .mmap_prepare() file callback"), the f_op->mmap() hook has been deprecated in favour of f_op->mmap_prepare(). This callback is invoked in the mmap() logic far earlier, so error handling can be performed more safely without complicated and bug-prone state unwinding required should an error arise. This hook also avoids passing a pointer to a not-yet-correctly-established VMA avoiding any issues with referencing this data structure. It rather provides a pointer to the new struct vm_area_desc descriptor type which contains all required state and allows easy setting of required parameters without any consideration needing to be paid to locking or reference counts. Note that nested filesystems like overlayfs are compatible with an .mmap_prepare() callback since commit bb666b7c2707 ("mm: add mmap_prepare() compatibility layer for nested file systems"). In this patch we apply this change to file systems with relatively simple mmap() hook logic - exfat, ceph, f2fs, bcachefs, zonefs, btrfs, ocfs2, orangefs, nilfs2, romfs, ramfs and aio. Signed-off-by: Lorenzo Stoakes Link: https://lore.kernel.org/f528ac4f35b9378931bd800920fee53fc0c5c74d.1750099179.git.lorenzo.stoakes@oracle.com Acked-by: Damien Le Moal Reviewed-by: Jan Kara Reviewed-by: Viacheslav Dubeyko Signed-off-by: Christian Brauner --- fs/aio.c | 8 ++++---- fs/bcachefs/fs.c | 8 ++++---- fs/btrfs/file.c | 7 ++++--- fs/ceph/addr.c | 6 +++--- fs/ceph/file.c | 2 +- fs/ceph/super.h | 2 +- fs/exfat/file.c | 10 ++++++---- fs/f2fs/file.c | 7 ++++--- fs/nilfs2/file.c | 8 ++++---- fs/ocfs2/file.c | 4 ++-- fs/ocfs2/mmap.c | 5 +++-- fs/ocfs2/mmap.h | 2 +- fs/orangefs/file.c | 10 ++++++---- fs/ramfs/file-nommu.c | 12 ++++++------ fs/romfs/mmap-nommu.c | 6 +++--- fs/zonefs/file.c | 10 ++++++---- 16 files changed, 58 insertions(+), 49 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 793b7b15ec4b..7fc7b6221312 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -392,15 +392,15 @@ static const struct vm_operations_struct aio_ring_vm_ops = { #endif }; -static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma) +static int aio_ring_mmap_prepare(struct vm_area_desc *desc) { - vm_flags_set(vma, VM_DONTEXPAND); - vma->vm_ops = &aio_ring_vm_ops; + desc->vm_flags |= VM_DONTEXPAND; + desc->vm_ops = &aio_ring_vm_ops; return 0; } static const struct file_operations aio_ring_fops = { - .mmap = aio_ring_mmap, + .mmap_prepare = aio_ring_mmap_prepare, }; #if IS_ENABLED(CONFIG_MIGRATION) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 3063a8ddc2df..9c2238edc0e3 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1553,11 +1553,11 @@ static const struct vm_operations_struct bch_vm_ops = { .page_mkwrite = bch2_page_mkwrite, }; -static int bch2_mmap(struct file *file, struct vm_area_struct *vma) +static int bch2_mmap_prepare(struct vm_area_desc *desc) { - file_accessed(file); + file_accessed(desc->file); - vma->vm_ops = &bch_vm_ops; + desc->vm_ops = &bch_vm_ops; return 0; } @@ -1740,7 +1740,7 @@ static const struct file_operations bch_file_operations = { .llseek = bch2_llseek, .read_iter = bch2_read_iter, .write_iter = bch2_write_iter, - .mmap = bch2_mmap, + .mmap_prepare = bch2_mmap_prepare, .get_unmapped_area = thp_get_unmapped_area, .fsync = bch2_fsync, .splice_read = filemap_splice_read, diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 8ce6f45f45e0..06bd30b35b95 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1978,15 +1978,16 @@ static const struct vm_operations_struct btrfs_file_vm_ops = { .page_mkwrite = btrfs_page_mkwrite, }; -static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) +static int btrfs_file_mmap_prepare(struct vm_area_desc *desc) { + struct file *filp = desc->file; struct address_space *mapping = filp->f_mapping; if (!mapping->a_ops->read_folio) return -ENOEXEC; file_accessed(filp); - vma->vm_ops = &btrfs_file_vm_ops; + desc->vm_ops = &btrfs_file_vm_ops; return 0; } @@ -3765,7 +3766,7 @@ const struct file_operations btrfs_file_operations = { .splice_read = filemap_splice_read, .write_iter = btrfs_file_write_iter, .splice_write = iter_file_splice_write, - .mmap = btrfs_file_mmap, + .mmap_prepare = btrfs_file_mmap_prepare, .open = btrfs_file_open, .release = btrfs_release_file, .get_unmapped_area = thp_get_unmapped_area, diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 60a621b00c65..3a6459356e70 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -2330,13 +2330,13 @@ static const struct vm_operations_struct ceph_vmops = { .page_mkwrite = ceph_page_mkwrite, }; -int ceph_mmap(struct file *file, struct vm_area_struct *vma) +int ceph_mmap_prepare(struct vm_area_desc *desc) { - struct address_space *mapping = file->f_mapping; + struct address_space *mapping = desc->file->f_mapping; if (!mapping->a_ops->read_folio) return -ENOEXEC; - vma->vm_ops = &ceph_vmops; + desc->vm_ops = &ceph_vmops; return 0; } diff --git a/fs/ceph/file.c b/fs/ceph/file.c index a7254cab44cc..8c06dc4655a8 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -3171,7 +3171,7 @@ const struct file_operations ceph_file_fops = { .llseek = ceph_llseek, .read_iter = ceph_read_iter, .write_iter = ceph_write_iter, - .mmap = ceph_mmap, + .mmap_prepare = ceph_mmap_prepare, .fsync = ceph_fsync, .lock = ceph_lock, .setlease = simple_nosetlease, diff --git a/fs/ceph/super.h b/fs/ceph/super.h index bb0db0cc8003..cf176aab0f82 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -1286,7 +1286,7 @@ extern void __ceph_touch_fmode(struct ceph_inode_info *ci, /* addr.c */ extern const struct address_space_operations ceph_aops; extern const struct netfs_request_ops ceph_netfs_ops; -extern int ceph_mmap(struct file *file, struct vm_area_struct *vma); +int ceph_mmap_prepare(struct vm_area_desc *desc); extern int ceph_uninline_data(struct file *file); extern int ceph_pool_perm_check(struct inode *inode, int need); extern void ceph_pool_perm_destroy(struct ceph_mds_client* mdsc); diff --git a/fs/exfat/file.c b/fs/exfat/file.c index 841a5b18e3df..fbd37245f8c4 100644 --- a/fs/exfat/file.c +++ b/fs/exfat/file.c @@ -683,13 +683,15 @@ static const struct vm_operations_struct exfat_file_vm_ops = { .page_mkwrite = exfat_page_mkwrite, }; -static int exfat_file_mmap(struct file *file, struct vm_area_struct *vma) +static int exfat_file_mmap_prepare(struct vm_area_desc *desc) { - if (unlikely(exfat_forced_shutdown(file_inode(file)->i_sb))) + struct file *file = desc->file; + + if (unlikely(exfat_forced_shutdown(file_inode(desc->file)->i_sb))) return -EIO; file_accessed(file); - vma->vm_ops = &exfat_file_vm_ops; + desc->vm_ops = &exfat_file_vm_ops; return 0; } @@ -710,7 +712,7 @@ const struct file_operations exfat_file_operations = { #ifdef CONFIG_COMPAT .compat_ioctl = exfat_compat_ioctl, #endif - .mmap = exfat_file_mmap, + .mmap_prepare = exfat_file_mmap_prepare, .fsync = exfat_file_fsync, .splice_read = exfat_splice_read, .splice_write = iter_file_splice_write, diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 6bd3de64f2a8..7af2b49b7e8a 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -532,8 +532,9 @@ static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence) return -EINVAL; } -static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) +static int f2fs_file_mmap_prepare(struct vm_area_desc *desc) { + struct file *file = desc->file; struct inode *inode = file_inode(file); if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) @@ -543,7 +544,7 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) return -EOPNOTSUPP; file_accessed(file); - vma->vm_ops = &f2fs_file_vm_ops; + desc->vm_ops = &f2fs_file_vm_ops; f2fs_down_read(&F2FS_I(inode)->i_sem); set_inode_flag(inode, FI_MMAP_FILE); @@ -5376,7 +5377,7 @@ const struct file_operations f2fs_file_operations = { .iopoll = iocb_bio_iopoll, .open = f2fs_file_open, .release = f2fs_release_file, - .mmap = f2fs_file_mmap, + .mmap_prepare = f2fs_file_mmap_prepare, .flush = f2fs_file_flush, .fsync = f2fs_sync_file, .fallocate = f2fs_fallocate, diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index 0e3fc5ba33c7..1b8d754db44d 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -125,10 +125,10 @@ static const struct vm_operations_struct nilfs_file_vm_ops = { .page_mkwrite = nilfs_page_mkwrite, }; -static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma) +static int nilfs_file_mmap_prepare(struct vm_area_desc *desc) { - file_accessed(file); - vma->vm_ops = &nilfs_file_vm_ops; + file_accessed(desc->file); + desc->vm_ops = &nilfs_file_vm_ops; return 0; } @@ -144,7 +144,7 @@ const struct file_operations nilfs_file_operations = { #ifdef CONFIG_COMPAT .compat_ioctl = nilfs_compat_ioctl, #endif /* CONFIG_COMPAT */ - .mmap = nilfs_file_mmap, + .mmap_prepare = nilfs_file_mmap_prepare, .open = generic_file_open, /* .release = nilfs_release_file, */ .fsync = nilfs_sync_file, diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 2056cf08ac1e..21d797ccccd0 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2800,7 +2800,7 @@ const struct inode_operations ocfs2_special_file_iops = { */ const struct file_operations ocfs2_fops = { .llseek = ocfs2_file_llseek, - .mmap = ocfs2_mmap, + .mmap_prepare = ocfs2_mmap_prepare, .fsync = ocfs2_sync_file, .release = ocfs2_file_release, .open = ocfs2_file_open, @@ -2850,7 +2850,7 @@ const struct file_operations ocfs2_dops = { */ const struct file_operations ocfs2_fops_no_plocks = { .llseek = ocfs2_file_llseek, - .mmap = ocfs2_mmap, + .mmap_prepare = ocfs2_mmap_prepare, .fsync = ocfs2_sync_file, .release = ocfs2_file_release, .open = ocfs2_file_open, diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index 6a314e9f2b49..50e2faf64c19 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c @@ -159,8 +159,9 @@ static const struct vm_operations_struct ocfs2_file_vm_ops = { .page_mkwrite = ocfs2_page_mkwrite, }; -int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) +int ocfs2_mmap_prepare(struct vm_area_desc *desc) { + struct file *file = desc->file; int ret = 0, lock_level = 0; ret = ocfs2_inode_lock_atime(file_inode(file), @@ -171,7 +172,7 @@ int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) } ocfs2_inode_unlock(file_inode(file), lock_level); out: - vma->vm_ops = &ocfs2_file_vm_ops; + desc->vm_ops = &ocfs2_file_vm_ops; return 0; } diff --git a/fs/ocfs2/mmap.h b/fs/ocfs2/mmap.h index 1051507cc684..d21c30de6b8c 100644 --- a/fs/ocfs2/mmap.h +++ b/fs/ocfs2/mmap.h @@ -2,6 +2,6 @@ #ifndef OCFS2_MMAP_H #define OCFS2_MMAP_H -int ocfs2_mmap(struct file *file, struct vm_area_struct *vma); +int ocfs2_mmap_prepare(struct vm_area_desc *desc); #endif /* OCFS2_MMAP_H */ diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index 90c49c0de243..919f99b16834 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -398,8 +398,9 @@ static const struct vm_operations_struct orangefs_file_vm_ops = { /* * Memory map a region of a file. */ -static int orangefs_file_mmap(struct file *file, struct vm_area_struct *vma) +static int orangefs_file_mmap_prepare(struct vm_area_desc *desc) { + struct file *file = desc->file; int ret; ret = orangefs_revalidate_mapping(file_inode(file)); @@ -410,10 +411,11 @@ static int orangefs_file_mmap(struct file *file, struct vm_area_struct *vma) "orangefs_file_mmap: called on %pD\n", file); /* set the sequential readahead hint */ - vm_flags_mod(vma, VM_SEQ_READ, VM_RAND_READ); + desc->vm_flags |= VM_SEQ_READ; + desc->vm_flags &= ~VM_RAND_READ; file_accessed(file); - vma->vm_ops = &orangefs_file_vm_ops; + desc->vm_ops = &orangefs_file_vm_ops; return 0; } @@ -574,7 +576,7 @@ const struct file_operations orangefs_file_operations = { .read_iter = orangefs_file_read_iter, .write_iter = orangefs_file_write_iter, .lock = orangefs_lock, - .mmap = orangefs_file_mmap, + .mmap_prepare = orangefs_file_mmap_prepare, .open = generic_file_open, .splice_read = orangefs_file_splice_read, .splice_write = iter_file_splice_write, diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index 7a6d980e614d..77b8ca2757e0 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c @@ -28,7 +28,7 @@ static unsigned long ramfs_nommu_get_unmapped_area(struct file *file, unsigned long len, unsigned long pgoff, unsigned long flags); -static int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma); +static int ramfs_nommu_mmap_prepare(struct vm_area_desc *desc); static unsigned ramfs_mmap_capabilities(struct file *file) { @@ -38,7 +38,7 @@ static unsigned ramfs_mmap_capabilities(struct file *file) const struct file_operations ramfs_file_operations = { .mmap_capabilities = ramfs_mmap_capabilities, - .mmap = ramfs_nommu_mmap, + .mmap_prepare = ramfs_nommu_mmap_prepare, .get_unmapped_area = ramfs_nommu_get_unmapped_area, .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, @@ -262,12 +262,12 @@ out: /* * set up a mapping for shared memory segments */ -static int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma) +static int ramfs_nommu_mmap_prepare(struct vm_area_desc *desc) { - if (!is_nommu_shared_mapping(vma->vm_flags)) + if (!is_nommu_shared_mapping(desc->vm_flags)) return -ENOSYS; - file_accessed(file); - vma->vm_ops = &generic_file_vm_ops; + file_accessed(desc->file); + desc->vm_ops = &generic_file_vm_ops; return 0; } diff --git a/fs/romfs/mmap-nommu.c b/fs/romfs/mmap-nommu.c index 4520ca413867..4b77c6dc4418 100644 --- a/fs/romfs/mmap-nommu.c +++ b/fs/romfs/mmap-nommu.c @@ -61,9 +61,9 @@ static unsigned long romfs_get_unmapped_area(struct file *file, * permit a R/O mapping to be made directly through onto an MTD device if * possible */ -static int romfs_mmap(struct file *file, struct vm_area_struct *vma) +static int romfs_mmap_prepare(struct vm_area_desc *desc) { - return is_nommu_shared_mapping(vma->vm_flags) ? 0 : -ENOSYS; + return is_nommu_shared_mapping(desc->vm_flags) ? 0 : -ENOSYS; } static unsigned romfs_mmap_capabilities(struct file *file) @@ -79,7 +79,7 @@ const struct file_operations romfs_ro_fops = { .llseek = generic_file_llseek, .read_iter = generic_file_read_iter, .splice_read = filemap_splice_read, - .mmap = romfs_mmap, + .mmap_prepare = romfs_mmap_prepare, .get_unmapped_area = romfs_get_unmapped_area, .mmap_capabilities = romfs_mmap_capabilities, }; diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c index 42e2c0065bb3..c1848163b378 100644 --- a/fs/zonefs/file.c +++ b/fs/zonefs/file.c @@ -312,8 +312,10 @@ static const struct vm_operations_struct zonefs_file_vm_ops = { .page_mkwrite = zonefs_filemap_page_mkwrite, }; -static int zonefs_file_mmap(struct file *file, struct vm_area_struct *vma) +static int zonefs_file_mmap_prepare(struct vm_area_desc *desc) { + struct file *file = desc->file; + /* * Conventional zones accept random writes, so their files can support * shared writable mappings. For sequential zone files, only read @@ -321,11 +323,11 @@ static int zonefs_file_mmap(struct file *file, struct vm_area_struct *vma) * ordering between msync() and page cache writeback. */ if (zonefs_inode_is_seq(file_inode(file)) && - (vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) + (desc->vm_flags & VM_SHARED) && (desc->vm_flags & VM_MAYWRITE)) return -EINVAL; file_accessed(file); - vma->vm_ops = &zonefs_file_vm_ops; + desc->vm_ops = &zonefs_file_vm_ops; return 0; } @@ -850,7 +852,7 @@ const struct file_operations zonefs_file_operations = { .open = zonefs_file_open, .release = zonefs_file_release, .fsync = zonefs_file_fsync, - .mmap = zonefs_file_mmap, + .mmap_prepare = zonefs_file_mmap_prepare, .llseek = zonefs_file_llseek, .read_iter = zonefs_file_read_iter, .write_iter = zonefs_file_write_iter, From 425c8bb39b032bfb338857476eff5bbee324343e Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Wed, 23 Jul 2025 13:30:36 +0100 Subject: [PATCH 11/11] doc: update porting, vfs documentation to describe mmap_prepare() Now that we have established .mmap_prepare() as the preferred means by which filesystems establish state upon memory mapping of a file, update the VFS and porting documentation to reflect this. As part of this change, additionally update the VFS documentation to contain the current state of the file_operations struct. Signed-off-by: Lorenzo Stoakes Link: https://lore.kernel.org/20250723123036.35472-1-lorenzo.stoakes@oracle.com Signed-off-by: Christian Brauner --- Documentation/filesystems/porting.rst | 12 ++++++++++++ Documentation/filesystems/vfs.rst | 22 ++++++++++++++++++---- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst index 3616d7161dab..48fff4c407f3 100644 --- a/Documentation/filesystems/porting.rst +++ b/Documentation/filesystems/porting.rst @@ -1249,3 +1249,15 @@ Using try_lookup_noperm() will require linux/namei.h to be included. Calling conventions for ->d_automount() have changed; we should *not* grab an extra reference to new mount - it should be returned with refcount 1. + +--- + +**highly recommended** + +The file operations mmap() callback is deprecated in favour of +mmap_prepare(). This passes a pointer to a vm_area_desc to the callback +rather than a VMA, as the VMA at this stage is not yet valid. + +The vm_area_desc provides the minimum required information for a filesystem +to initialise state upon memory mapping of a file-backed region, and output +parameters for the file system to set this state. diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst index fd32a9a17bfb..c002f50a9cbc 100644 --- a/Documentation/filesystems/vfs.rst +++ b/Documentation/filesystems/vfs.rst @@ -1071,12 +1071,14 @@ This describes how the VFS can manipulate an open file. As of kernel struct file_operations { struct module *owner; + fop_flags_t fop_flags; loff_t (*llseek) (struct file *, loff_t, int); ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); ssize_t (*read_iter) (struct kiocb *, struct iov_iter *); ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); - int (*iopoll)(struct kiocb *kiocb, bool spin); + int (*iopoll)(struct kiocb *kiocb, struct io_comp_batch *, + unsigned int flags); int (*iterate_shared) (struct file *, struct dir_context *); __poll_t (*poll) (struct file *, struct poll_table_struct *); long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); @@ -1093,18 +1095,24 @@ This describes how the VFS can manipulate an open file. As of kernel int (*flock) (struct file *, int, struct file_lock *); ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); - int (*setlease)(struct file *, long, struct file_lock **, void **); + void (*splice_eof)(struct file *file); + int (*setlease)(struct file *, int, struct file_lease **, void **); long (*fallocate)(struct file *file, int mode, loff_t offset, loff_t len); void (*show_fdinfo)(struct seq_file *m, struct file *f); #ifndef CONFIG_MMU unsigned (*mmap_capabilities)(struct file *); #endif - ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int); + ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, + loff_t, size_t, unsigned int); loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t len, unsigned int remap_flags); int (*fadvise)(struct file *, loff_t, loff_t, int); + int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags); + int (*uring_cmd_iopoll)(struct io_uring_cmd *, struct io_comp_batch *, + unsigned int poll_flags); + int (*mmap_prepare)(struct vm_area_desc *); }; Again, all methods are called without any locks being held, unless @@ -1144,7 +1152,8 @@ otherwise noted. used on 64 bit kernels. ``mmap`` - called by the mmap(2) system call + called by the mmap(2) system call. Deprecated in favour of + ``mmap_prepare``. ``open`` called by the VFS when an inode should be opened. When the VFS @@ -1221,6 +1230,11 @@ otherwise noted. ``fadvise`` possibly called by the fadvise64() system call. +``mmap_prepare`` + Called by the mmap(2) system call. Allows a VFS to set up a + file-backed memory mapping, most notably establishing relevant + private state and VMA callbacks. + Note that the file operations are implemented by the specific filesystem in which the inode resides. When opening a device node (character or block special) most filesystems will call special