mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-01-11 17:10:13 +00:00
Patch series "Add and use memdesc_flags_t". At some point struct page will be separated from struct slab and struct folio. This is a step towards that by introducing a type for the 'flags' word of all three structures. This gives us a certain amount of type safety by establishing that some of these unsigned longs are different from other unsigned longs in that they contain things like node ID, section number and zone number in the upper bits. That lets us have functions that can be easily called by anyone who has a slab, folio or page (but not easily by anyone else) to get the node or zone. There's going to be some unusual merge problems with this as some odd bits of the kernel decide they want to print out the flags value or something similar by writing page->flags and now they'll need to write page->flags.f instead. That's most of the churn here. Maybe we should be removing these things from the debug output? This patch (of 11): Wrap the unsigned long flags in a typedef. In upcoming patches, this will provide a strong hint that you can't just pass a random unsigned long to functions which take this as an argument. [willy@infradead.org: s/flags/flags.f/ in several architectures] Link: https://lkml.kernel.org/r/aKMgPRLD-WnkPxYm@casper.infradead.org [nicola.vetrini@gmail.com: mips: fix compilation error] Link: https://lore.kernel.org/lkml/CA+G9fYvkpmqGr6wjBNHY=dRp71PLCoi2341JxOudi60yqaeUdg@mail.gmail.com/ Link: https://lkml.kernel.org/r/20250825214245.1838158-1-nicola.vetrini@gmail.com Link: https://lkml.kernel.org/r/20250805172307.1302730-1-willy@infradead.org Link: https://lkml.kernel.org/r/20250805172307.1302730-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> Acked-by: Zi Yan <ziy@nvidia.com> Cc: Shakeel Butt <shakeel.butt@linux.dev> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
259 lines
7.7 KiB
C
259 lines
7.7 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* IBM System z Huge TLB Page Support for Kernel.
|
|
*
|
|
* Copyright IBM Corp. 2007,2020
|
|
* Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
|
|
*/
|
|
|
|
#define KMSG_COMPONENT "hugetlb"
|
|
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
|
|
|
|
#include <linux/cpufeature.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/hugetlb.h>
|
|
#include <linux/mman.h>
|
|
#include <linux/sched/mm.h>
|
|
#include <linux/security.h>
|
|
#include <asm/pgalloc.h>
|
|
|
|
/*
|
|
* If the bit selected by single-bit bitmask "a" is set within "x", move
|
|
* it to the position indicated by single-bit bitmask "b".
|
|
*/
|
|
#define move_set_bit(x, a, b) (((x) & (a)) >> ilog2(a) << ilog2(b))
|
|
|
|
static inline unsigned long __pte_to_rste(pte_t pte)
|
|
{
|
|
swp_entry_t arch_entry;
|
|
unsigned long rste;
|
|
|
|
/*
|
|
* Convert encoding pte bits pmd / pud bits
|
|
* lIR.uswrdy.p dy..R...I...wr
|
|
* empty 010.000000.0 -> 00..0...1...00
|
|
* prot-none, clean, old 111.000000.1 -> 00..1...1...00
|
|
* prot-none, clean, young 111.000001.1 -> 01..1...1...00
|
|
* prot-none, dirty, old 111.000010.1 -> 10..1...1...00
|
|
* prot-none, dirty, young 111.000011.1 -> 11..1...1...00
|
|
* read-only, clean, old 111.000100.1 -> 00..1...1...01
|
|
* read-only, clean, young 101.000101.1 -> 01..1...0...01
|
|
* read-only, dirty, old 111.000110.1 -> 10..1...1...01
|
|
* read-only, dirty, young 101.000111.1 -> 11..1...0...01
|
|
* read-write, clean, old 111.001100.1 -> 00..1...1...11
|
|
* read-write, clean, young 101.001101.1 -> 01..1...0...11
|
|
* read-write, dirty, old 110.001110.1 -> 10..0...1...11
|
|
* read-write, dirty, young 100.001111.1 -> 11..0...0...11
|
|
* HW-bits: R read-only, I invalid
|
|
* SW-bits: p present, y young, d dirty, r read, w write, s special,
|
|
* u unused, l large
|
|
*/
|
|
if (pte_present(pte)) {
|
|
rste = pte_val(pte) & PAGE_MASK;
|
|
rste |= _SEGMENT_ENTRY_PRESENT;
|
|
rste |= move_set_bit(pte_val(pte), _PAGE_READ,
|
|
_SEGMENT_ENTRY_READ);
|
|
rste |= move_set_bit(pte_val(pte), _PAGE_WRITE,
|
|
_SEGMENT_ENTRY_WRITE);
|
|
rste |= move_set_bit(pte_val(pte), _PAGE_INVALID,
|
|
_SEGMENT_ENTRY_INVALID);
|
|
rste |= move_set_bit(pte_val(pte), _PAGE_PROTECT,
|
|
_SEGMENT_ENTRY_PROTECT);
|
|
rste |= move_set_bit(pte_val(pte), _PAGE_DIRTY,
|
|
_SEGMENT_ENTRY_DIRTY);
|
|
rste |= move_set_bit(pte_val(pte), _PAGE_YOUNG,
|
|
_SEGMENT_ENTRY_YOUNG);
|
|
#ifdef CONFIG_MEM_SOFT_DIRTY
|
|
rste |= move_set_bit(pte_val(pte), _PAGE_SOFT_DIRTY,
|
|
_SEGMENT_ENTRY_SOFT_DIRTY);
|
|
#endif
|
|
rste |= move_set_bit(pte_val(pte), _PAGE_NOEXEC,
|
|
_SEGMENT_ENTRY_NOEXEC);
|
|
} else if (!pte_none(pte)) {
|
|
/* swap pte */
|
|
arch_entry = __pte_to_swp_entry(pte);
|
|
rste = mk_swap_rste(__swp_type(arch_entry), __swp_offset(arch_entry));
|
|
} else
|
|
rste = _SEGMENT_ENTRY_EMPTY;
|
|
return rste;
|
|
}
|
|
|
|
static inline pte_t __rste_to_pte(unsigned long rste)
|
|
{
|
|
swp_entry_t arch_entry;
|
|
unsigned long pteval;
|
|
int present, none;
|
|
pte_t pte;
|
|
|
|
if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) {
|
|
present = pud_present(__pud(rste));
|
|
none = pud_none(__pud(rste));
|
|
} else {
|
|
present = pmd_present(__pmd(rste));
|
|
none = pmd_none(__pmd(rste));
|
|
}
|
|
|
|
/*
|
|
* Convert encoding pmd / pud bits pte bits
|
|
* dy..R...I...wr lIR.uswrdy.p
|
|
* empty 00..0...1...00 -> 010.000000.0
|
|
* prot-none, clean, old 00..1...1...00 -> 111.000000.1
|
|
* prot-none, clean, young 01..1...1...00 -> 111.000001.1
|
|
* prot-none, dirty, old 10..1...1...00 -> 111.000010.1
|
|
* prot-none, dirty, young 11..1...1...00 -> 111.000011.1
|
|
* read-only, clean, old 00..1...1...01 -> 111.000100.1
|
|
* read-only, clean, young 01..1...0...01 -> 101.000101.1
|
|
* read-only, dirty, old 10..1...1...01 -> 111.000110.1
|
|
* read-only, dirty, young 11..1...0...01 -> 101.000111.1
|
|
* read-write, clean, old 00..1...1...11 -> 111.001100.1
|
|
* read-write, clean, young 01..1...0...11 -> 101.001101.1
|
|
* read-write, dirty, old 10..0...1...11 -> 110.001110.1
|
|
* read-write, dirty, young 11..0...0...11 -> 100.001111.1
|
|
* HW-bits: R read-only, I invalid
|
|
* SW-bits: p present, y young, d dirty, r read, w write, s special,
|
|
* u unused, l large
|
|
*/
|
|
if (present) {
|
|
pteval = rste & _SEGMENT_ENTRY_ORIGIN_LARGE;
|
|
pteval |= _PAGE_LARGE | _PAGE_PRESENT;
|
|
pteval |= move_set_bit(rste, _SEGMENT_ENTRY_READ, _PAGE_READ);
|
|
pteval |= move_set_bit(rste, _SEGMENT_ENTRY_WRITE, _PAGE_WRITE);
|
|
pteval |= move_set_bit(rste, _SEGMENT_ENTRY_INVALID, _PAGE_INVALID);
|
|
pteval |= move_set_bit(rste, _SEGMENT_ENTRY_PROTECT, _PAGE_PROTECT);
|
|
pteval |= move_set_bit(rste, _SEGMENT_ENTRY_DIRTY, _PAGE_DIRTY);
|
|
pteval |= move_set_bit(rste, _SEGMENT_ENTRY_YOUNG, _PAGE_YOUNG);
|
|
#ifdef CONFIG_MEM_SOFT_DIRTY
|
|
pteval |= move_set_bit(rste, _SEGMENT_ENTRY_SOFT_DIRTY, _PAGE_SOFT_DIRTY);
|
|
#endif
|
|
pteval |= move_set_bit(rste, _SEGMENT_ENTRY_NOEXEC, _PAGE_NOEXEC);
|
|
} else if (!none) {
|
|
/* swap rste */
|
|
arch_entry = __rste_to_swp_entry(rste);
|
|
pte = mk_swap_pte(__swp_type_rste(arch_entry), __swp_offset_rste(arch_entry));
|
|
pteval = pte_val(pte);
|
|
} else
|
|
pteval = _PAGE_INVALID;
|
|
return __pte(pteval);
|
|
}
|
|
|
|
static void clear_huge_pte_skeys(struct mm_struct *mm, unsigned long rste)
|
|
{
|
|
struct folio *folio;
|
|
unsigned long size, paddr;
|
|
|
|
if (!mm_uses_skeys(mm) ||
|
|
rste & _SEGMENT_ENTRY_INVALID)
|
|
return;
|
|
|
|
if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) {
|
|
folio = page_folio(pud_page(__pud(rste)));
|
|
size = PUD_SIZE;
|
|
paddr = rste & PUD_MASK;
|
|
} else {
|
|
folio = page_folio(pmd_page(__pmd(rste)));
|
|
size = PMD_SIZE;
|
|
paddr = rste & PMD_MASK;
|
|
}
|
|
|
|
if (!test_and_set_bit(PG_arch_1, &folio->flags.f))
|
|
__storage_key_init_range(paddr, paddr + size);
|
|
}
|
|
|
|
void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
|
|
pte_t *ptep, pte_t pte)
|
|
{
|
|
unsigned long rste;
|
|
|
|
rste = __pte_to_rste(pte);
|
|
|
|
/* Set correct table type for 2G hugepages */
|
|
if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) {
|
|
if (likely(pte_present(pte)))
|
|
rste |= _REGION3_ENTRY_LARGE;
|
|
rste |= _REGION_ENTRY_TYPE_R3;
|
|
} else if (likely(pte_present(pte)))
|
|
rste |= _SEGMENT_ENTRY_LARGE;
|
|
|
|
clear_huge_pte_skeys(mm, rste);
|
|
set_pte(ptep, __pte(rste));
|
|
}
|
|
|
|
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
|
|
pte_t *ptep, pte_t pte, unsigned long sz)
|
|
{
|
|
__set_huge_pte_at(mm, addr, ptep, pte);
|
|
}
|
|
|
|
pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
|
|
{
|
|
return __rste_to_pte(pte_val(*ptep));
|
|
}
|
|
|
|
pte_t __huge_ptep_get_and_clear(struct mm_struct *mm,
|
|
unsigned long addr, pte_t *ptep)
|
|
{
|
|
pte_t pte = huge_ptep_get(mm, addr, ptep);
|
|
pmd_t *pmdp = (pmd_t *) ptep;
|
|
pud_t *pudp = (pud_t *) ptep;
|
|
|
|
if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
|
|
pudp_xchg_direct(mm, addr, pudp, __pud(_REGION3_ENTRY_EMPTY));
|
|
else
|
|
pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
|
|
return pte;
|
|
}
|
|
|
|
pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
unsigned long addr, unsigned long sz)
|
|
{
|
|
pgd_t *pgdp;
|
|
p4d_t *p4dp;
|
|
pud_t *pudp;
|
|
pmd_t *pmdp = NULL;
|
|
|
|
pgdp = pgd_offset(mm, addr);
|
|
p4dp = p4d_alloc(mm, pgdp, addr);
|
|
if (p4dp) {
|
|
pudp = pud_alloc(mm, p4dp, addr);
|
|
if (pudp) {
|
|
if (sz == PUD_SIZE)
|
|
return (pte_t *) pudp;
|
|
else if (sz == PMD_SIZE)
|
|
pmdp = pmd_alloc(mm, pudp, addr);
|
|
}
|
|
}
|
|
return (pte_t *) pmdp;
|
|
}
|
|
|
|
pte_t *huge_pte_offset(struct mm_struct *mm,
|
|
unsigned long addr, unsigned long sz)
|
|
{
|
|
pgd_t *pgdp;
|
|
p4d_t *p4dp;
|
|
pud_t *pudp;
|
|
pmd_t *pmdp = NULL;
|
|
|
|
pgdp = pgd_offset(mm, addr);
|
|
if (pgd_present(*pgdp)) {
|
|
p4dp = p4d_offset(pgdp, addr);
|
|
if (p4d_present(*p4dp)) {
|
|
pudp = pud_offset(p4dp, addr);
|
|
if (sz == PUD_SIZE)
|
|
return (pte_t *)pudp;
|
|
if (pud_present(*pudp))
|
|
pmdp = pmd_offset(pudp, addr);
|
|
}
|
|
}
|
|
return (pte_t *) pmdp;
|
|
}
|
|
|
|
bool __init arch_hugetlb_valid_size(unsigned long size)
|
|
{
|
|
if (cpu_has_edat1() && size == PMD_SIZE)
|
|
return true;
|
|
else if (cpu_has_edat2() && size == PUD_SIZE)
|
|
return true;
|
|
else
|
|
return false;
|
|
}
|