mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-01-12 09:32:12 +00:00
slab updates for 6.17
-----BEGIN PGP SIGNATURE----- iQEzBAABCAAdFiEEe7vIQRWZI0iWSE3xu+CwddJFiJoFAmiHj+8ACgkQu+CwddJF iJrnOggAjBwzwvJsUWB3YBaF0wyLipLcdNsbbDOqvLYShQifaEuwN/i8FYO+D7a3 DyBR3NK4pWcZtxrSJVHcAuy06yQq5sqeU9Dc5iJ+ADCXnqYshUFp5ARtNVaputGy b4990JMIG0YxEBD3Gx01kicCdae9JkU5FGZKFk65oHalaGQk7GtMfG+e/obh4z9D e9R5Ub+9zM9Efwl/DD7qkETWKAq0gBjvbj0dYO0E7ctO/WNr93Z1FsnbxiUcPiG3 ED1LwTuNYYccBf/8iPGy/cp0WcWTwGtjbPEUk3lyY0KcrpgGT+cyvJj8G0GfnvV4 V/OLZrzwVZw2k3MopbFl/RdgWGf0bA== =gZB5 -----END PGP SIGNATURE----- Merge tag 'slab-for-6.17' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab Pull slab updates from Vlastimil Babka: - Convert struct slab to its own flags instead of referencing page flags, which is another preparation step before separating it from struct page completely. Along with that, a bunch of documentation fixes and cleanups (Matthew Wilcox) - Convert large kmalloc to use frozen pages in order to be consistent with non-large kmalloc slabs (Vlastimil Babka) - MAINTAINERS updates (Matthew Wilcox, Lorenzo Stoakes) - Restore NUMA policy support for large kmalloc, broken by mistake in v6.1 (Vlastimil Babka) * tag 'slab-for-6.17' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab: MAINTAINERS: add missing files to slab section slab: Update MAINTAINERS entry memcg_slabinfo: Fix use of PG_slab kfence: Remove mention of PG_slab vmcoreinfo: Remove documentation of PG_slab and PG_hugetlb doc: Add slab internal kernel-doc slub: Fix a documentation build error for krealloc() slab: Add SL_pfmemalloc flag slab: Add SL_partial flag slab: Rename slab->__page_flags to slab->flags doc: Move SLUB documentation to the admin guide mm, slab: use frozen pages for large kmalloc mm, slab: restore NUMA policy support for large kmalloc
This commit is contained in:
commit
e8d780dcd9
@ -37,7 +37,8 @@ Description:
|
||||
The alloc_calls file is read-only and lists the kernel code
|
||||
locations from which allocations for this cache were performed.
|
||||
The alloc_calls file only contains information if debugging is
|
||||
enabled for that cache (see Documentation/mm/slub.rst).
|
||||
enabled for that cache (see
|
||||
Documentation/admin-guide/mm/slab.rst).
|
||||
|
||||
What: /sys/kernel/slab/<cache>/alloc_fastpath
|
||||
Date: February 2008
|
||||
@ -219,7 +220,7 @@ Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Description:
|
||||
The free_calls file is read-only and lists the locations of
|
||||
object frees if slab debugging is enabled (see
|
||||
Documentation/mm/slub.rst).
|
||||
Documentation/admin-guide/mm/slab.rst).
|
||||
|
||||
What: /sys/kernel/slab/<cache>/free_fastpath
|
||||
Date: February 2008
|
||||
|
||||
@ -325,14 +325,14 @@ NR_FREE_PAGES
|
||||
On linux-2.6.21 or later, the number of free pages is in
|
||||
vm_stat[NR_FREE_PAGES]. Used to get the number of free pages.
|
||||
|
||||
PG_lru|PG_private|PG_swapcache|PG_swapbacked|PG_slab|PG_hwpoision|PG_head_mask|PG_hugetlb
|
||||
-----------------------------------------------------------------------------------------
|
||||
PG_lru|PG_private|PG_swapcache|PG_swapbacked|PG_hwpoison|PG_head_mask
|
||||
--------------------------------------------------------------------------
|
||||
|
||||
Page attributes. These flags are used to filter various unnecessary for
|
||||
dumping pages.
|
||||
|
||||
PAGE_BUDDY_MAPCOUNT_VALUE(~PG_buddy)|PAGE_OFFLINE_MAPCOUNT_VALUE(~PG_offline)|PAGE_OFFLINE_MAPCOUNT_VALUE(~PG_unaccepted)
|
||||
-------------------------------------------------------------------------------------------------------------------------
|
||||
PAGE_SLAB_MAPCOUNT_VALUE|PAGE_BUDDY_MAPCOUNT_VALUE|PAGE_OFFLINE_MAPCOUNT_VALUE|PAGE_HUGETLB_MAPCOUNT_VALUE|PAGE_UNACCEPTED_MAPCOUNT_VALUE
|
||||
------------------------------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
More page attributes. These flags are used to filter various unnecessary for
|
||||
dumping pages.
|
||||
|
||||
@ -6587,14 +6587,14 @@
|
||||
slab_debug can create guard zones around objects and
|
||||
may poison objects when not in use. Also tracks the
|
||||
last alloc / free. For more information see
|
||||
Documentation/mm/slub.rst.
|
||||
Documentation/admin-guide/mm/slab.rst.
|
||||
(slub_debug legacy name also accepted for now)
|
||||
|
||||
slab_max_order= [MM]
|
||||
Determines the maximum allowed order for slabs.
|
||||
A high setting may cause OOMs due to memory
|
||||
fragmentation. For more information see
|
||||
Documentation/mm/slub.rst.
|
||||
Documentation/admin-guide/mm/slab.rst.
|
||||
(slub_max_order legacy name also accepted for now)
|
||||
|
||||
slab_merge [MM]
|
||||
@ -6609,13 +6609,14 @@
|
||||
the number of objects indicated. The higher the number
|
||||
of objects the smaller the overhead of tracking slabs
|
||||
and the less frequently locks need to be acquired.
|
||||
For more information see Documentation/mm/slub.rst.
|
||||
For more information see
|
||||
Documentation/admin-guide/mm/slab.rst.
|
||||
(slub_min_objects legacy name also accepted for now)
|
||||
|
||||
slab_min_order= [MM]
|
||||
Determines the minimum page order for slabs. Must be
|
||||
lower or equal to slab_max_order. For more information see
|
||||
Documentation/mm/slub.rst.
|
||||
Documentation/admin-guide/mm/slab.rst.
|
||||
(slub_min_order legacy name also accepted for now)
|
||||
|
||||
slab_nomerge [MM]
|
||||
@ -6629,7 +6630,8 @@
|
||||
cache (risks via metadata attacks are mostly
|
||||
unchanged). Debug options disable merging on their
|
||||
own.
|
||||
For more information see Documentation/mm/slub.rst.
|
||||
For more information see
|
||||
Documentation/admin-guide/mm/slab.rst.
|
||||
(slub_nomerge legacy name also accepted for now)
|
||||
|
||||
slab_strict_numa [MM]
|
||||
|
||||
@ -37,6 +37,7 @@ the Linux memory management.
|
||||
numaperf
|
||||
pagemap
|
||||
shrinker_debugfs
|
||||
slab
|
||||
soft-dirty
|
||||
swap_numa
|
||||
transhuge
|
||||
|
||||
@ -1,13 +1,12 @@
|
||||
==========================
|
||||
Short users guide for SLUB
|
||||
==========================
|
||||
========================================
|
||||
Short users guide for the slab allocator
|
||||
========================================
|
||||
|
||||
The basic philosophy of SLUB is very different from SLAB. SLAB
|
||||
requires rebuilding the kernel to activate debug options for all
|
||||
slab caches. SLUB always includes full debugging but it is off by default.
|
||||
SLUB can enable debugging only for selected slabs in order to avoid
|
||||
an impact on overall system performance which may make a bug more
|
||||
difficult to find.
|
||||
The slab allocator includes full debugging support (when built with
|
||||
CONFIG_SLUB_DEBUG=y) but it is off by default (unless built with
|
||||
CONFIG_SLUB_DEBUG_ON=y). You can enable debugging only for selected
|
||||
slabs in order to avoid an impact on overall system performance which
|
||||
may make a bug more difficult to find.
|
||||
|
||||
In order to switch debugging on one can add an option ``slab_debug``
|
||||
to the kernel command line. That will enable full debugging for
|
||||
@ -56,7 +56,6 @@ documentation, or deleted if it has served its purpose.
|
||||
page_owner
|
||||
page_table_check
|
||||
remap_file_pages
|
||||
slub
|
||||
split_page_table_lock
|
||||
transhuge
|
||||
unevictable-lru
|
||||
|
||||
@ -3,3 +3,10 @@
|
||||
===============
|
||||
Slab Allocation
|
||||
===============
|
||||
|
||||
Functions and structures
|
||||
========================
|
||||
|
||||
.. kernel-doc:: mm/slab.h
|
||||
.. kernel-doc:: mm/slub.c
|
||||
:internal:
|
||||
|
||||
17
MAINTAINERS
17
MAINTAINERS
@ -23015,17 +23015,24 @@ F: Documentation/devicetree/bindings/nvmem/layouts/kontron,sl28-vpd.yaml
|
||||
F: drivers/nvmem/layouts/sl28vpd.c
|
||||
|
||||
SLAB ALLOCATOR
|
||||
M: Christoph Lameter <cl@gentwo.org>
|
||||
M: David Rientjes <rientjes@google.com>
|
||||
M: Andrew Morton <akpm@linux-foundation.org>
|
||||
M: Vlastimil Babka <vbabka@suse.cz>
|
||||
M: Andrew Morton <akpm@linux-foundation.org>
|
||||
R: Christoph Lameter <cl@gentwo.org>
|
||||
R: David Rientjes <rientjes@google.com>
|
||||
R: Roman Gushchin <roman.gushchin@linux.dev>
|
||||
R: Harry Yoo <harry.yoo@oracle.com>
|
||||
L: linux-mm@kvack.org
|
||||
S: Maintained
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab.git
|
||||
F: include/linux/sl?b*.h
|
||||
F: mm/sl?b*
|
||||
F: Documentation/admin-guide/mm/slab.rst
|
||||
F: Documentation/mm/slab.rst
|
||||
F: include/linux/mempool.h
|
||||
F: include/linux/slab.h
|
||||
F: mm/failslab.c
|
||||
F: mm/mempool.c
|
||||
F: mm/slab.h
|
||||
F: mm/slab_common.c
|
||||
F: mm/slub.c
|
||||
|
||||
SLCAN CAN NETWORK DRIVER
|
||||
M: Dario Binacchi <dario.binacchi@amarulasolutions.com>
|
||||
|
||||
@ -1325,6 +1325,8 @@ static inline void get_page(struct page *page)
|
||||
struct folio *folio = page_folio(page);
|
||||
if (WARN_ON_ONCE(folio_test_slab(folio)))
|
||||
return;
|
||||
if (WARN_ON_ONCE(folio_test_large_kmalloc(folio)))
|
||||
return;
|
||||
folio_get(folio);
|
||||
}
|
||||
|
||||
@ -1419,7 +1421,7 @@ static inline void put_page(struct page *page)
|
||||
{
|
||||
struct folio *folio = page_folio(page);
|
||||
|
||||
if (folio_test_slab(folio))
|
||||
if (folio_test_slab(folio) || folio_test_large_kmalloc(folio))
|
||||
return;
|
||||
|
||||
folio_put(folio);
|
||||
|
||||
@ -605,8 +605,8 @@ static unsigned long kfence_init_pool(void)
|
||||
pages = virt_to_page(__kfence_pool);
|
||||
|
||||
/*
|
||||
* Set up object pages: they must have PG_slab set, to avoid freeing
|
||||
* these as real pages.
|
||||
* Set up object pages: they must have PGTY_slab set to avoid freeing
|
||||
* them as real pages.
|
||||
*
|
||||
* We also want to avoid inserting kfence_free() in the kfree()
|
||||
* fast-path in SLUB, and therefore need to ensure kfree() correctly
|
||||
|
||||
28
mm/slab.h
28
mm/slab.h
@ -50,7 +50,7 @@ typedef union {
|
||||
|
||||
/* Reuses the bits in struct page */
|
||||
struct slab {
|
||||
unsigned long __page_flags;
|
||||
unsigned long flags;
|
||||
|
||||
struct kmem_cache *slab_cache;
|
||||
union {
|
||||
@ -99,7 +99,7 @@ struct slab {
|
||||
|
||||
#define SLAB_MATCH(pg, sl) \
|
||||
static_assert(offsetof(struct page, pg) == offsetof(struct slab, sl))
|
||||
SLAB_MATCH(flags, __page_flags);
|
||||
SLAB_MATCH(flags, flags);
|
||||
SLAB_MATCH(compound_head, slab_cache); /* Ensure bit 0 is clear */
|
||||
SLAB_MATCH(_refcount, __page_refcount);
|
||||
#ifdef CONFIG_MEMCG
|
||||
@ -167,30 +167,6 @@ static_assert(IS_ALIGNED(offsetof(struct slab, freelist), sizeof(freelist_aba_t)
|
||||
*/
|
||||
#define slab_page(s) folio_page(slab_folio(s), 0)
|
||||
|
||||
/*
|
||||
* If network-based swap is enabled, sl*b must keep track of whether pages
|
||||
* were allocated from pfmemalloc reserves.
|
||||
*/
|
||||
static inline bool slab_test_pfmemalloc(const struct slab *slab)
|
||||
{
|
||||
return folio_test_active(slab_folio(slab));
|
||||
}
|
||||
|
||||
static inline void slab_set_pfmemalloc(struct slab *slab)
|
||||
{
|
||||
folio_set_active(slab_folio(slab));
|
||||
}
|
||||
|
||||
static inline void slab_clear_pfmemalloc(struct slab *slab)
|
||||
{
|
||||
folio_clear_active(slab_folio(slab));
|
||||
}
|
||||
|
||||
static inline void __slab_clear_pfmemalloc(struct slab *slab)
|
||||
{
|
||||
__folio_clear_active(slab_folio(slab));
|
||||
}
|
||||
|
||||
static inline void *slab_address(const struct slab *slab)
|
||||
{
|
||||
return folio_address(slab_folio(slab));
|
||||
|
||||
80
mm/slub.c
80
mm/slub.c
@ -91,14 +91,14 @@
|
||||
* The partially empty slabs cached on the CPU partial list are used
|
||||
* for performance reasons, which speeds up the allocation process.
|
||||
* These slabs are not frozen, but are also exempt from list management,
|
||||
* by clearing the PG_workingset flag when moving out of the node
|
||||
* by clearing the SL_partial flag when moving out of the node
|
||||
* partial list. Please see __slab_free() for more details.
|
||||
*
|
||||
* To sum up, the current scheme is:
|
||||
* - node partial slab: PG_Workingset && !frozen
|
||||
* - cpu partial slab: !PG_Workingset && !frozen
|
||||
* - cpu slab: !PG_Workingset && frozen
|
||||
* - full slab: !PG_Workingset && !frozen
|
||||
* - node partial slab: SL_partial && !frozen
|
||||
* - cpu partial slab: !SL_partial && !frozen
|
||||
* - cpu slab: !SL_partial && frozen
|
||||
* - full slab: !SL_partial && !frozen
|
||||
*
|
||||
* list_lock
|
||||
*
|
||||
@ -183,6 +183,22 @@
|
||||
* the fast path and disables lockless freelists.
|
||||
*/
|
||||
|
||||
/**
|
||||
* enum slab_flags - How the slab flags bits are used.
|
||||
* @SL_locked: Is locked with slab_lock()
|
||||
* @SL_partial: On the per-node partial list
|
||||
* @SL_pfmemalloc: Was allocated from PF_MEMALLOC reserves
|
||||
*
|
||||
* The slab flags share space with the page flags but some bits have
|
||||
* different interpretations. The high bits are used for information
|
||||
* like zone/node/section.
|
||||
*/
|
||||
enum slab_flags {
|
||||
SL_locked = PG_locked,
|
||||
SL_partial = PG_workingset, /* Historical reasons for this bit */
|
||||
SL_pfmemalloc = PG_active, /* Historical reasons for this bit */
|
||||
};
|
||||
|
||||
/*
|
||||
* We could simply use migrate_disable()/enable() but as long as it's a
|
||||
* function call even on !PREEMPT_RT, use inline preempt_disable() there.
|
||||
@ -634,17 +650,36 @@ static inline unsigned int slub_get_cpu_partial(struct kmem_cache *s)
|
||||
}
|
||||
#endif /* CONFIG_SLUB_CPU_PARTIAL */
|
||||
|
||||
/*
|
||||
* If network-based swap is enabled, slub must keep track of whether memory
|
||||
* were allocated from pfmemalloc reserves.
|
||||
*/
|
||||
static inline bool slab_test_pfmemalloc(const struct slab *slab)
|
||||
{
|
||||
return test_bit(SL_pfmemalloc, &slab->flags);
|
||||
}
|
||||
|
||||
static inline void slab_set_pfmemalloc(struct slab *slab)
|
||||
{
|
||||
set_bit(SL_pfmemalloc, &slab->flags);
|
||||
}
|
||||
|
||||
static inline void __slab_clear_pfmemalloc(struct slab *slab)
|
||||
{
|
||||
__clear_bit(SL_pfmemalloc, &slab->flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Per slab locking using the pagelock
|
||||
*/
|
||||
static __always_inline void slab_lock(struct slab *slab)
|
||||
{
|
||||
bit_spin_lock(PG_locked, &slab->__page_flags);
|
||||
bit_spin_lock(SL_locked, &slab->flags);
|
||||
}
|
||||
|
||||
static __always_inline void slab_unlock(struct slab *slab)
|
||||
{
|
||||
bit_spin_unlock(PG_locked, &slab->__page_flags);
|
||||
bit_spin_unlock(SL_locked, &slab->flags);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
@ -1010,7 +1045,7 @@ static void print_slab_info(const struct slab *slab)
|
||||
{
|
||||
pr_err("Slab 0x%p objects=%u used=%u fp=0x%p flags=%pGp\n",
|
||||
slab, slab->objects, slab->inuse, slab->freelist,
|
||||
&slab->__page_flags);
|
||||
&slab->flags);
|
||||
}
|
||||
|
||||
void skip_orig_size_check(struct kmem_cache *s, const void *object)
|
||||
@ -2717,23 +2752,19 @@ static void discard_slab(struct kmem_cache *s, struct slab *slab)
|
||||
free_slab(s, slab);
|
||||
}
|
||||
|
||||
/*
|
||||
* SLUB reuses PG_workingset bit to keep track of whether it's on
|
||||
* the per-node partial list.
|
||||
*/
|
||||
static inline bool slab_test_node_partial(const struct slab *slab)
|
||||
{
|
||||
return folio_test_workingset(slab_folio(slab));
|
||||
return test_bit(SL_partial, &slab->flags);
|
||||
}
|
||||
|
||||
static inline void slab_set_node_partial(struct slab *slab)
|
||||
{
|
||||
set_bit(PG_workingset, folio_flags(slab_folio(slab), 0));
|
||||
set_bit(SL_partial, &slab->flags);
|
||||
}
|
||||
|
||||
static inline void slab_clear_node_partial(struct slab *slab)
|
||||
{
|
||||
clear_bit(PG_workingset, folio_flags(slab_folio(slab), 0));
|
||||
clear_bit(SL_partial, &slab->flags);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -4269,7 +4300,12 @@ static void *___kmalloc_large_node(size_t size, gfp_t flags, int node)
|
||||
flags = kmalloc_fix_flags(flags);
|
||||
|
||||
flags |= __GFP_COMP;
|
||||
folio = (struct folio *)alloc_pages_node_noprof(node, flags, order);
|
||||
|
||||
if (node == NUMA_NO_NODE)
|
||||
folio = (struct folio *)alloc_frozen_pages_noprof(flags, order);
|
||||
else
|
||||
folio = (struct folio *)__alloc_frozen_pages_noprof(flags, order, node, NULL);
|
||||
|
||||
if (folio) {
|
||||
ptr = folio_address(folio);
|
||||
lruvec_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B,
|
||||
@ -4765,7 +4801,7 @@ static void free_large_kmalloc(struct folio *folio, void *object)
|
||||
lruvec_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B,
|
||||
-(PAGE_SIZE << order));
|
||||
__folio_clear_large_kmalloc(folio);
|
||||
folio_put(folio);
|
||||
free_frozen_pages(&folio->page, order);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -4930,12 +4966,12 @@ alloc_new:
|
||||
* When slub_debug_orig_size() is off, krealloc() only knows about the bucket
|
||||
* size of an allocation (but not the exact size it was allocated with) and
|
||||
* hence implements the following semantics for shrinking and growing buffers
|
||||
* with __GFP_ZERO.
|
||||
* with __GFP_ZERO::
|
||||
*
|
||||
* new bucket
|
||||
* 0 size size
|
||||
* |--------|----------------|
|
||||
* | keep | zero |
|
||||
* new bucket
|
||||
* 0 size size
|
||||
* |--------|----------------|
|
||||
* | keep | zero |
|
||||
*
|
||||
* Otherwise, the original allocation size 'orig_size' could be used to
|
||||
* precisely clear the requested size, and the new size will also be stored
|
||||
|
||||
@ -146,11 +146,11 @@ def detect_kernel_config():
|
||||
|
||||
|
||||
def for_each_slab(prog):
|
||||
PGSlab = ~prog.constant('PG_slab')
|
||||
slabtype = prog.constant('PGTY_slab')
|
||||
|
||||
for page in for_each_page(prog):
|
||||
try:
|
||||
if page.page_type.value_() == PGSlab:
|
||||
if (page.page_type.value_() >> 24) == slabtype:
|
||||
yield cast('struct slab *', page)
|
||||
except FaultError:
|
||||
pass
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user