mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-01-17 03:50:37 +00:00
If a socket has sk->sk_bypass_prot_mem flagged, the socket opts out of the global protocol memory accounting. Let's control the flag by a new sysctl knob. The flag is written once during socket(2) and is inherited to child sockets. Tested with a script that creates local socket pairs and send()s a bunch of data without recv()ing. Setup: # mkdir /sys/fs/cgroup/test # echo $$ >> /sys/fs/cgroup/test/cgroup.procs # sysctl -q net.ipv4.tcp_mem="1000 1000 1000" # ulimit -n 524288 Without net.core.bypass_prot_mem, charged to tcp_mem & memcg # python3 pressure.py & # cat /sys/fs/cgroup/test/memory.stat | grep sock sock 22642688 <-------------------------------------- charged to memcg # cat /proc/net/sockstat| grep TCP TCP: inuse 2006 orphan 0 tw 0 alloc 2008 mem 5376 <-- charged to tcp_mem # ss -tn | head -n 5 State Recv-Q Send-Q Local Address:Port Peer Address:Port ESTAB 2000 0 127.0.0.1:34479 127.0.0.1:53188 ESTAB 2000 0 127.0.0.1:34479 127.0.0.1:49972 ESTAB 2000 0 127.0.0.1:34479 127.0.0.1:53868 ESTAB 2000 0 127.0.0.1:34479 127.0.0.1:53554 # nstat | grep Pressure || echo no pressure TcpExtTCPMemoryPressures 1 0.0 With net.core.bypass_prot_mem=1, charged to memcg only: # sysctl -q net.core.bypass_prot_mem=1 # python3 pressure.py & # cat /sys/fs/cgroup/test/memory.stat | grep sock sock 2757468160 <------------------------------------ charged to memcg # cat /proc/net/sockstat | grep TCP TCP: inuse 2006 orphan 0 tw 0 alloc 2008 mem 0 <- NOT charged to tcp_mem # ss -tn | head -n 5 State Recv-Q Send-Q Local Address:Port Peer Address:Port ESTAB 111000 0 127.0.0.1:36019 127.0.0.1:49026 ESTAB 110000 0 127.0.0.1:36019 127.0.0.1:45630 ESTAB 110000 0 127.0.0.1:36019 127.0.0.1:44870 ESTAB 111000 0 127.0.0.1:36019 127.0.0.1:45274 # nstat | grep Pressure || echo no pressure no pressure Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com> Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org> Reviewed-by: Shakeel Butt <shakeel.butt@linux.dev> Reviewed-by: Eric Dumazet <edumazet@google.com> Acked-by: Roman Gushchin <roman.gushchin@linux.dev> Link: https://patch.msgid.link/20251014235604.3057003-4-kuniyu@google.com
32 lines
592 B
C
32 lines
592 B
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef __NETNS_CORE_H__
|
|
#define __NETNS_CORE_H__
|
|
|
|
#include <linux/types.h>
|
|
|
|
struct ctl_table_header;
|
|
struct prot_inuse;
|
|
struct cpumask;
|
|
|
|
struct netns_core {
|
|
/* core sysctls */
|
|
struct ctl_table_header *sysctl_hdr;
|
|
|
|
int sysctl_somaxconn;
|
|
int sysctl_txq_reselection;
|
|
int sysctl_optmem_max;
|
|
u8 sysctl_txrehash;
|
|
u8 sysctl_tstamp_allow_data;
|
|
u8 sysctl_bypass_prot_mem;
|
|
|
|
#ifdef CONFIG_PROC_FS
|
|
struct prot_inuse __percpu *prot_inuse;
|
|
#endif
|
|
|
|
#if IS_ENABLED(CONFIG_RPS) && IS_ENABLED(CONFIG_SYSCTL)
|
|
struct cpumask *rps_default_mask;
|
|
#endif
|
|
};
|
|
|
|
#endif
|