mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-01-11 09:00:12 +00:00
- The 6 patch series "panic: sys_info: Refactor and fix a potential
issue" from Andy Shevchenko fixes a build issue and does some cleanup in
ib/sys_info.c.
- The 9 patch series "Implement mul_u64_u64_div_u64_roundup()" from
David Laight enhances the 64-bit math code on behalf of a PWM driver and
beefs up the test module for these library functions.
- The 2 patch series "scripts/gdb/symbols: make BPF debug info available
to GDB" from Ilya Leoshkevich makes BPF symbol names, sizes, and line
numbers available to the GDB debugger.
- The 4 patch series "Enable hung_task and lockup cases to dump system
info on demand" from Feng Tang adds a sysctl which can be used to cause
additional info dumping when the hung-task and lockup detectors fire.
- The 6 patch series "lib/base64: add generic encoder/decoder, migrate
users" from Kuan-Wei Chiu adds a general base64 encoder/decoder to lib/
and migrates several users away from their private implementations.
- The 2 patch series "rbree: inline rb_first() and rb_last()" from Eric
Dumazet makes TCP a little faster.
- The 9 patch series "liveupdate: Rework KHO for in-kernel users" from
Pasha Tatashin reworks the KEXEC Handover interfaces in preparation for
Live Update Orchestrator (LUO), and possibly for other future clients.
- The 13 patch series "kho: simplify state machine and enable dynamic
updates" from Pasha Tatashin increases the flexibility of KEXEC
Handover. Also preparation for LUO.
- The 18 patch series "Live Update Orchestrator" from Pasha Tatashin is
a major new feature targeted at cloud environments. Quoting the [0/N]:
This series introduces the Live Update Orchestrator, a kernel subsystem
designed to facilitate live kernel updates using a kexec-based reboot.
This capability is critical for cloud environments, allowing hypervisors
to be updated with minimal downtime for running virtual machines. LUO
achieves this by preserving the state of selected resources, such as
memory, devices and their dependencies, across the kernel transition.
As a key feature, this series includes support for preserving memfd file
descriptors, which allows critical in-memory data, such as guest RAM or
any other large memory region, to be maintained in RAM across the kexec
reboot.
Mike Rappaport merits a mention here, for his extensive review and
testing work.
- The 3 patch series "kexec: reorganize kexec and kdump sysfs" from
Sourabh Jain moves the kexec and kdump sysfs entries from /sys/kernel/
to /sys/kernel/kexec/ and adds back-compatibility symlinks which can
hopefully be removed one day.
- The 2 patch series "kho: fixes for vmalloc restoration" from Mike
Rapoport fixes a BUG which was being hit during KHO restoration of
vmalloc() regions.
-----BEGIN PGP SIGNATURE-----
iHUEABYKAB0WIQTTMBEPP41GrTpTJgfdBJ7gKXxAjgUCaTSAkQAKCRDdBJ7gKXxA
jrkiAP9QKfsRv46XZaM5raScjY1ayjP+gqb2rgt6BQ/gZvb2+wD/cPAYOR6BiX52
n0pVpQmG5P/KyOmpLztn96ejL4heKwQ=
=JY96
-----END PGP SIGNATURE-----
Merge tag 'mm-nonmm-stable-2025-12-06-11-14' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull non-MM updates from Andrew Morton:
- "panic: sys_info: Refactor and fix a potential issue" (Andy Shevchenko)
fixes a build issue and does some cleanup in ib/sys_info.c
- "Implement mul_u64_u64_div_u64_roundup()" (David Laight)
enhances the 64-bit math code on behalf of a PWM driver and beefs up
the test module for these library functions
- "scripts/gdb/symbols: make BPF debug info available to GDB" (Ilya Leoshkevich)
makes BPF symbol names, sizes, and line numbers available to the GDB
debugger
- "Enable hung_task and lockup cases to dump system info on demand" (Feng Tang)
adds a sysctl which can be used to cause additional info dumping when
the hung-task and lockup detectors fire
- "lib/base64: add generic encoder/decoder, migrate users" (Kuan-Wei Chiu)
adds a general base64 encoder/decoder to lib/ and migrates several
users away from their private implementations
- "rbree: inline rb_first() and rb_last()" (Eric Dumazet)
makes TCP a little faster
- "liveupdate: Rework KHO for in-kernel users" (Pasha Tatashin)
reworks the KEXEC Handover interfaces in preparation for Live Update
Orchestrator (LUO), and possibly for other future clients
- "kho: simplify state machine and enable dynamic updates" (Pasha Tatashin)
increases the flexibility of KEXEC Handover. Also preparation for LUO
- "Live Update Orchestrator" (Pasha Tatashin)
is a major new feature targeted at cloud environments. Quoting the
cover letter:
This series introduces the Live Update Orchestrator, a kernel
subsystem designed to facilitate live kernel updates using a
kexec-based reboot. This capability is critical for cloud
environments, allowing hypervisors to be updated with minimal
downtime for running virtual machines. LUO achieves this by
preserving the state of selected resources, such as memory,
devices and their dependencies, across the kernel transition.
As a key feature, this series includes support for preserving
memfd file descriptors, which allows critical in-memory data, such
as guest RAM or any other large memory region, to be maintained in
RAM across the kexec reboot.
Mike Rappaport merits a mention here, for his extensive review and
testing work.
- "kexec: reorganize kexec and kdump sysfs" (Sourabh Jain)
moves the kexec and kdump sysfs entries from /sys/kernel/ to
/sys/kernel/kexec/ and adds back-compatibility symlinks which can
hopefully be removed one day
- "kho: fixes for vmalloc restoration" (Mike Rapoport)
fixes a BUG which was being hit during KHO restoration of vmalloc()
regions
* tag 'mm-nonmm-stable-2025-12-06-11-14' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (139 commits)
calibrate: update header inclusion
Reinstate "resource: avoid unnecessary lookups in find_next_iomem_res()"
vmcoreinfo: track and log recoverable hardware errors
kho: fix restoring of contiguous ranges of order-0 pages
kho: kho_restore_vmalloc: fix initialization of pages array
MAINTAINERS: TPM DEVICE DRIVER: update the W-tag
init: replace simple_strtoul with kstrtoul to improve lpj_setup
KHO: fix boot failure due to kmemleak access to non-PRESENT pages
Documentation/ABI: new kexec and kdump sysfs interface
Documentation/ABI: mark old kexec sysfs deprecated
kexec: move sysfs entries to /sys/kernel/kexec
test_kho: always print restore status
kho: free chunks using free_page() instead of kfree()
selftests/liveupdate: add kexec test for multiple and empty sessions
selftests/liveupdate: add simple kexec-based selftest for LUO
selftests/liveupdate: add userspace API selftests
docs: add documentation for memfd preservation via LUO
mm: memfd_luo: allow preserving memfd
liveupdate: luo_file: add private argument to store runtime state
mm: shmem: export some functions to internal.h
...
254 lines
8.7 KiB
C
254 lines
8.7 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef __SHMEM_FS_H
|
|
#define __SHMEM_FS_H
|
|
|
|
#include <linux/file.h>
|
|
#include <linux/swap.h>
|
|
#include <linux/mempolicy.h>
|
|
#include <linux/pagemap.h>
|
|
#include <linux/percpu_counter.h>
|
|
#include <linux/xattr.h>
|
|
#include <linux/fs_parser.h>
|
|
#include <linux/userfaultfd_k.h>
|
|
#include <linux/bits.h>
|
|
|
|
struct swap_iocb;
|
|
|
|
/* inode in-kernel data */
|
|
|
|
#ifdef CONFIG_TMPFS_QUOTA
|
|
#define SHMEM_MAXQUOTAS 2
|
|
#endif
|
|
|
|
/* Suppress pre-accounting of the entire object size. */
|
|
#define SHMEM_F_NORESERVE BIT(0)
|
|
/* Disallow swapping. */
|
|
#define SHMEM_F_LOCKED BIT(1)
|
|
/*
|
|
* Disallow growing, shrinking, or hole punching in the inode. Combined with
|
|
* folio pinning, makes sure the inode's mapping stays fixed.
|
|
*
|
|
* In some ways similar to F_SEAL_GROW | F_SEAL_SHRINK, but can be removed and
|
|
* isn't directly visible to userspace.
|
|
*/
|
|
#define SHMEM_F_MAPPING_FROZEN BIT(2)
|
|
|
|
struct shmem_inode_info {
|
|
spinlock_t lock;
|
|
unsigned int seals; /* shmem seals */
|
|
unsigned long flags;
|
|
unsigned long alloced; /* data pages alloced to file */
|
|
unsigned long swapped; /* subtotal assigned to swap */
|
|
union {
|
|
struct offset_ctx dir_offsets; /* stable directory offsets */
|
|
struct {
|
|
struct list_head shrinklist; /* shrinkable hpage inodes */
|
|
struct list_head swaplist; /* chain of maybes on swap */
|
|
};
|
|
};
|
|
struct timespec64 i_crtime; /* file creation time */
|
|
struct shared_policy policy; /* NUMA memory alloc policy */
|
|
struct simple_xattrs xattrs; /* list of xattrs */
|
|
pgoff_t fallocend; /* highest fallocate endindex */
|
|
unsigned int fsflags; /* for FS_IOC_[SG]ETFLAGS */
|
|
atomic_t stop_eviction; /* hold when working on inode */
|
|
#ifdef CONFIG_TMPFS_QUOTA
|
|
struct dquot __rcu *i_dquot[MAXQUOTAS];
|
|
#endif
|
|
struct inode vfs_inode;
|
|
};
|
|
|
|
#define SHMEM_FL_USER_VISIBLE (FS_FL_USER_VISIBLE | FS_CASEFOLD_FL)
|
|
#define SHMEM_FL_USER_MODIFIABLE \
|
|
(FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL | FS_NOATIME_FL | FS_CASEFOLD_FL)
|
|
#define SHMEM_FL_INHERITED (FS_NODUMP_FL | FS_NOATIME_FL | FS_CASEFOLD_FL)
|
|
|
|
struct shmem_quota_limits {
|
|
qsize_t usrquota_bhardlimit; /* Default user quota block hard limit */
|
|
qsize_t usrquota_ihardlimit; /* Default user quota inode hard limit */
|
|
qsize_t grpquota_bhardlimit; /* Default group quota block hard limit */
|
|
qsize_t grpquota_ihardlimit; /* Default group quota inode hard limit */
|
|
};
|
|
|
|
struct shmem_sb_info {
|
|
unsigned long max_blocks; /* How many blocks are allowed */
|
|
struct percpu_counter used_blocks; /* How many are allocated */
|
|
unsigned long max_inodes; /* How many inodes are allowed */
|
|
unsigned long free_ispace; /* How much ispace left for allocation */
|
|
raw_spinlock_t stat_lock; /* Serialize shmem_sb_info changes */
|
|
umode_t mode; /* Mount mode for root directory */
|
|
unsigned char huge; /* Whether to try for hugepages */
|
|
kuid_t uid; /* Mount uid for root directory */
|
|
kgid_t gid; /* Mount gid for root directory */
|
|
bool full_inums; /* If i_ino should be uint or ino_t */
|
|
bool noswap; /* ignores VM reclaim / swap requests */
|
|
ino_t next_ino; /* The next per-sb inode number to use */
|
|
ino_t __percpu *ino_batch; /* The next per-cpu inode number to use */
|
|
struct mempolicy *mpol; /* default memory policy for mappings */
|
|
spinlock_t shrinklist_lock; /* Protects shrinklist */
|
|
struct list_head shrinklist; /* List of shinkable inodes */
|
|
unsigned long shrinklist_len; /* Length of shrinklist */
|
|
struct shmem_quota_limits qlimits; /* Default quota limits */
|
|
};
|
|
|
|
static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)
|
|
{
|
|
return container_of(inode, struct shmem_inode_info, vfs_inode);
|
|
}
|
|
|
|
/*
|
|
* Functions in mm/shmem.c called directly from elsewhere:
|
|
*/
|
|
extern const struct fs_parameter_spec shmem_fs_parameters[];
|
|
extern void shmem_init(void);
|
|
extern int shmem_init_fs_context(struct fs_context *fc);
|
|
extern struct file *shmem_file_setup(const char *name,
|
|
loff_t size, unsigned long flags);
|
|
extern struct file *shmem_kernel_file_setup(const char *name, loff_t size,
|
|
unsigned long flags);
|
|
extern struct file *shmem_file_setup_with_mnt(struct vfsmount *mnt,
|
|
const char *name, loff_t size, unsigned long flags);
|
|
int shmem_zero_setup(struct vm_area_struct *vma);
|
|
int shmem_zero_setup_desc(struct vm_area_desc *desc);
|
|
extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr,
|
|
unsigned long len, unsigned long pgoff, unsigned long flags);
|
|
extern int shmem_lock(struct file *file, int lock, struct ucounts *ucounts);
|
|
#ifdef CONFIG_SHMEM
|
|
bool shmem_mapping(const struct address_space *mapping);
|
|
#else
|
|
static inline bool shmem_mapping(const struct address_space *mapping)
|
|
{
|
|
return false;
|
|
}
|
|
#endif /* CONFIG_SHMEM */
|
|
void shmem_unlock_mapping(struct address_space *mapping);
|
|
struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
|
|
pgoff_t index, gfp_t gfp_mask);
|
|
int shmem_writeout(struct folio *folio, struct swap_iocb **plug,
|
|
struct list_head *folio_list);
|
|
void shmem_truncate_range(struct inode *inode, loff_t start, uoff_t end);
|
|
int shmem_unuse(unsigned int type);
|
|
|
|
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
|
unsigned long shmem_allowable_huge_orders(struct inode *inode,
|
|
struct vm_area_struct *vma, pgoff_t index,
|
|
loff_t write_end, bool shmem_huge_force);
|
|
bool shmem_hpage_pmd_enabled(void);
|
|
#else
|
|
static inline unsigned long shmem_allowable_huge_orders(struct inode *inode,
|
|
struct vm_area_struct *vma, pgoff_t index,
|
|
loff_t write_end, bool shmem_huge_force)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline bool shmem_hpage_pmd_enabled(void)
|
|
{
|
|
return false;
|
|
}
|
|
#endif
|
|
|
|
#ifdef CONFIG_SHMEM
|
|
extern unsigned long shmem_swap_usage(struct vm_area_struct *vma);
|
|
extern void shmem_uncharge(struct inode *inode, long pages);
|
|
#else
|
|
static inline unsigned long shmem_swap_usage(struct vm_area_struct *vma)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline void shmem_uncharge(struct inode *inode, long pages)
|
|
{
|
|
}
|
|
#endif
|
|
extern unsigned long shmem_partial_swap_usage(struct address_space *mapping,
|
|
pgoff_t start, pgoff_t end);
|
|
|
|
/* Flag allocation requirements to shmem_get_folio */
|
|
enum sgp_type {
|
|
SGP_READ, /* don't exceed i_size, don't allocate page */
|
|
SGP_NOALLOC, /* similar, but fail on hole or use fallocated page */
|
|
SGP_CACHE, /* don't exceed i_size, may allocate page */
|
|
SGP_WRITE, /* may exceed i_size, may allocate !Uptodate page */
|
|
SGP_FALLOC, /* like SGP_WRITE, but make existing page Uptodate */
|
|
};
|
|
|
|
int shmem_get_folio(struct inode *inode, pgoff_t index, loff_t write_end,
|
|
struct folio **foliop, enum sgp_type sgp);
|
|
struct folio *shmem_read_folio_gfp(struct address_space *mapping,
|
|
pgoff_t index, gfp_t gfp);
|
|
|
|
static inline struct folio *shmem_read_folio(struct address_space *mapping,
|
|
pgoff_t index)
|
|
{
|
|
return shmem_read_folio_gfp(mapping, index, mapping_gfp_mask(mapping));
|
|
}
|
|
|
|
static inline struct page *shmem_read_mapping_page(
|
|
struct address_space *mapping, pgoff_t index)
|
|
{
|
|
return shmem_read_mapping_page_gfp(mapping, index,
|
|
mapping_gfp_mask(mapping));
|
|
}
|
|
|
|
static inline bool shmem_file(struct file *file)
|
|
{
|
|
if (!IS_ENABLED(CONFIG_SHMEM))
|
|
return false;
|
|
if (!file || !file->f_mapping)
|
|
return false;
|
|
return shmem_mapping(file->f_mapping);
|
|
}
|
|
|
|
/* Must be called with inode lock taken exclusive. */
|
|
static inline void shmem_freeze(struct inode *inode, bool freeze)
|
|
{
|
|
if (freeze)
|
|
SHMEM_I(inode)->flags |= SHMEM_F_MAPPING_FROZEN;
|
|
else
|
|
SHMEM_I(inode)->flags &= ~SHMEM_F_MAPPING_FROZEN;
|
|
}
|
|
|
|
/*
|
|
* If fallocate(FALLOC_FL_KEEP_SIZE) has been used, there may be pages
|
|
* beyond i_size's notion of EOF, which fallocate has committed to reserving:
|
|
* which split_huge_page() must therefore not delete. This use of a single
|
|
* "fallocend" per inode errs on the side of not deleting a reservation when
|
|
* in doubt: there are plenty of cases when it preserves unreserved pages.
|
|
*/
|
|
static inline pgoff_t shmem_fallocend(struct inode *inode, pgoff_t eof)
|
|
{
|
|
return max(eof, SHMEM_I(inode)->fallocend);
|
|
}
|
|
|
|
extern bool shmem_charge(struct inode *inode, long pages);
|
|
|
|
#ifdef CONFIG_USERFAULTFD
|
|
#ifdef CONFIG_SHMEM
|
|
extern int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
|
|
struct vm_area_struct *dst_vma,
|
|
unsigned long dst_addr,
|
|
unsigned long src_addr,
|
|
uffd_flags_t flags,
|
|
struct folio **foliop);
|
|
#else /* !CONFIG_SHMEM */
|
|
#define shmem_mfill_atomic_pte(dst_pmd, dst_vma, dst_addr, \
|
|
src_addr, flags, foliop) ({ BUG(); 0; })
|
|
#endif /* CONFIG_SHMEM */
|
|
#endif /* CONFIG_USERFAULTFD */
|
|
|
|
/*
|
|
* Used space is stored as unsigned 64-bit value in bytes but
|
|
* quota core supports only signed 64-bit values so use that
|
|
* as a limit
|
|
*/
|
|
#define SHMEM_QUOTA_MAX_SPC_LIMIT 0x7fffffffffffffffLL /* 2^63-1 */
|
|
#define SHMEM_QUOTA_MAX_INO_LIMIT 0x7fffffffffffffffLL
|
|
|
|
#ifdef CONFIG_TMPFS_QUOTA
|
|
extern const struct dquot_operations shmem_quota_operations;
|
|
extern struct quota_format_type shmem_quota_format;
|
|
#endif /* CONFIG_TMPFS_QUOTA */
|
|
|
|
#endif
|