mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-01-12 01:20:14 +00:00
Patch series "ksm: fix exec/fork inheritance", v2.
This series fixes exec/fork inheritance. See the detailed description of
the issue below.
This patch (of 2):
Background
==========
commit d7597f59d1d33 ("mm: add new api to enable ksm per process")
introduced MMF_VM_MERGE_ANY for mm->flags, and allowed user to set it by
prctl() so that the process's VMAs are forcibly scanned by ksmd.
Subsequently, the 3c6f33b7273a ("mm/ksm: support fork/exec for prctl")
supported inheriting the MMF_VM_MERGE_ANY flag when a task calls execve().
Finally, commit 3a9e567ca45fb ("mm/ksm: fix ksm exec support for prctl")
fixed the issue that ksmd doesn't scan the mm_struct with MMF_VM_MERGE_ANY
by adding the mm_slot to ksm_mm_head in __bprm_mm_init().
Problem
=======
In some extreme scenarios, however, this inheritance of MMF_VM_MERGE_ANY
during exec/fork can fail. For example, when the scanning frequency of
ksmd is tuned extremely high, a process carrying MMF_VM_MERGE_ANY may
still fail to pass it to the newly exec'd process. This happens because
ksm_execve() is executed too early in the do_execve flow (prematurely
adding the new mm_struct to the ksm_mm_slot list).
As a result, before do_execve completes, ksmd may have already performed a
scan and found that this new mm_struct has no VM_MERGEABLE VMAs, thus
clearing its MMF_VM_MERGE_ANY flag. Consequently, when the new program
executes, the flag MMF_VM_MERGE_ANY inheritance missed.
Root reason
===========
commit d7597f59d1d33 ("mm: add new api to enable ksm per process") clear
the flag MMF_VM_MERGE_ANY when ksmd found no VM_MERGEABLE VMAs.
Solution
========
Firstly, Don't clear MMF_VM_MERGE_ANY when ksmd found no VM_MERGEABLE
VMAs, because perhaps their mm_struct has just been added to ksm_mm_slot
list, and its process has not yet officially started running or has not
yet performed mmap/brk to allocate anonymous VMAS.
Secondly, recheck MMF_VM_MERGEABLE again if a process takes
MMF_VM_MERGE_ANY, and create a mm_slot and join it into ksm_scan_list
again.
Link: https://lkml.kernel.org/r/20251007182504440BJgK8VXRHh8TD7IGSUIY4@zte.com.cn
Link: https://lkml.kernel.org/r/20251007182821572h_SoFqYZXEP1mvWI4n9VL@zte.com.cn
Fixes: 3c6f33b7273a ("mm/ksm: support fork/exec for prctl")
Fixes: d7597f59d1d3 ("mm: add new api to enable ksm per process")
Signed-off-by: xu xin <xu.xin16@zte.com.cn>
Cc: Stefan Roesch <shr@devkernel.io>
Cc: David Hildenbrand <david@redhat.com>
Cc: Jinjiang Tu <tujinjiang@huawei.com>
Cc: Wang Yaxin <wang.yaxin@zte.com.cn>
Cc: Yang Yang <yang.yang29@zte.com.cn>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
165 lines
4.4 KiB
C
165 lines
4.4 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef __LINUX_KSM_H
|
|
#define __LINUX_KSM_H
|
|
/*
|
|
* Memory merging support.
|
|
*
|
|
* This code enables dynamic sharing of identical pages found in different
|
|
* memory areas, even if they are not shared by fork().
|
|
*/
|
|
|
|
#include <linux/bitops.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/pagemap.h>
|
|
#include <linux/rmap.h>
|
|
#include <linux/sched.h>
|
|
|
|
#ifdef CONFIG_KSM
|
|
int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
|
|
unsigned long end, int advice, vm_flags_t *vm_flags);
|
|
vm_flags_t ksm_vma_flags(struct mm_struct *mm, const struct file *file,
|
|
vm_flags_t vm_flags);
|
|
int ksm_enable_merge_any(struct mm_struct *mm);
|
|
int ksm_disable_merge_any(struct mm_struct *mm);
|
|
int ksm_disable(struct mm_struct *mm);
|
|
|
|
int __ksm_enter(struct mm_struct *mm);
|
|
void __ksm_exit(struct mm_struct *mm);
|
|
/*
|
|
* To identify zeropages that were mapped by KSM, we reuse the dirty bit
|
|
* in the PTE. If the PTE is dirty, the zeropage was mapped by KSM when
|
|
* deduplicating memory.
|
|
*/
|
|
#define is_ksm_zero_pte(pte) (is_zero_pfn(pte_pfn(pte)) && pte_dirty(pte))
|
|
|
|
extern atomic_long_t ksm_zero_pages;
|
|
|
|
static inline void ksm_map_zero_page(struct mm_struct *mm)
|
|
{
|
|
atomic_long_inc(&ksm_zero_pages);
|
|
atomic_long_inc(&mm->ksm_zero_pages);
|
|
}
|
|
|
|
static inline void ksm_might_unmap_zero_page(struct mm_struct *mm, pte_t pte)
|
|
{
|
|
if (is_ksm_zero_pte(pte)) {
|
|
atomic_long_dec(&ksm_zero_pages);
|
|
atomic_long_dec(&mm->ksm_zero_pages);
|
|
}
|
|
}
|
|
|
|
static inline long mm_ksm_zero_pages(struct mm_struct *mm)
|
|
{
|
|
return atomic_long_read(&mm->ksm_zero_pages);
|
|
}
|
|
|
|
static inline void ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
|
|
{
|
|
/* Adding mm to ksm is best effort on fork. */
|
|
if (mm_flags_test(MMF_VM_MERGEABLE, oldmm)) {
|
|
long nr_ksm_zero_pages = atomic_long_read(&mm->ksm_zero_pages);
|
|
|
|
mm->ksm_merging_pages = 0;
|
|
mm->ksm_rmap_items = 0;
|
|
atomic_long_add(nr_ksm_zero_pages, &ksm_zero_pages);
|
|
__ksm_enter(mm);
|
|
}
|
|
}
|
|
|
|
static inline int ksm_execve(struct mm_struct *mm)
|
|
{
|
|
if (mm_flags_test(MMF_VM_MERGE_ANY, mm))
|
|
return __ksm_enter(mm);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static inline void ksm_exit(struct mm_struct *mm)
|
|
{
|
|
if (mm_flags_test(MMF_VM_MERGEABLE, mm))
|
|
__ksm_exit(mm);
|
|
}
|
|
|
|
/*
|
|
* When do_swap_page() first faults in from swap what used to be a KSM page,
|
|
* no problem, it will be assigned to this vma's anon_vma; but thereafter,
|
|
* it might be faulted into a different anon_vma (or perhaps to a different
|
|
* offset in the same anon_vma). do_swap_page() cannot do all the locking
|
|
* needed to reconstitute a cross-anon_vma KSM page: for now it has to make
|
|
* a copy, and leave remerging the pages to a later pass of ksmd.
|
|
*
|
|
* We'd like to make this conditional on vma->vm_flags & VM_MERGEABLE,
|
|
* but what if the vma was unmerged while the page was swapped out?
|
|
*/
|
|
struct folio *ksm_might_need_to_copy(struct folio *folio,
|
|
struct vm_area_struct *vma, unsigned long addr);
|
|
|
|
void rmap_walk_ksm(struct folio *folio, struct rmap_walk_control *rwc);
|
|
void folio_migrate_ksm(struct folio *newfolio, struct folio *folio);
|
|
void collect_procs_ksm(const struct folio *folio, const struct page *page,
|
|
struct list_head *to_kill, int force_early);
|
|
long ksm_process_profit(struct mm_struct *);
|
|
bool ksm_process_mergeable(struct mm_struct *mm);
|
|
|
|
#else /* !CONFIG_KSM */
|
|
|
|
static inline vm_flags_t ksm_vma_flags(struct mm_struct *mm,
|
|
const struct file *file, vm_flags_t vm_flags)
|
|
{
|
|
return vm_flags;
|
|
}
|
|
|
|
static inline int ksm_disable(struct mm_struct *mm)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline void ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
|
|
{
|
|
}
|
|
|
|
static inline int ksm_execve(struct mm_struct *mm)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline void ksm_exit(struct mm_struct *mm)
|
|
{
|
|
}
|
|
|
|
static inline void ksm_might_unmap_zero_page(struct mm_struct *mm, pte_t pte)
|
|
{
|
|
}
|
|
|
|
static inline void collect_procs_ksm(const struct folio *folio,
|
|
const struct page *page, struct list_head *to_kill,
|
|
int force_early)
|
|
{
|
|
}
|
|
|
|
#ifdef CONFIG_MMU
|
|
static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
|
|
unsigned long end, int advice, vm_flags_t *vm_flags)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline struct folio *ksm_might_need_to_copy(struct folio *folio,
|
|
struct vm_area_struct *vma, unsigned long addr)
|
|
{
|
|
return folio;
|
|
}
|
|
|
|
static inline void rmap_walk_ksm(struct folio *folio,
|
|
struct rmap_walk_control *rwc)
|
|
{
|
|
}
|
|
|
|
static inline void folio_migrate_ksm(struct folio *newfolio, struct folio *old)
|
|
{
|
|
}
|
|
#endif /* CONFIG_MMU */
|
|
#endif /* !CONFIG_KSM */
|
|
|
|
#endif /* __LINUX_KSM_H */
|