1
0
mirror of https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2026-01-11 17:10:13 +00:00

Compare commits

...

19 Commits

Author SHA1 Message Date
Linus Torvalds
ea1013c153 bpf-fixes
-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEE+soXsSLHKoYyzcli6rmadz2vbToFAmlCBmwACgkQ6rmadz2v
 bToUZA//ZY0IE1x1nCixEAqGF/nGpDzVX4YQQfjrUoXQOD4ykzt35yTNXl6B1IVA
 dliVSI6kUtdoThUa7xJUxMSkDsVBsEMT/zYXQEXJG1zXvJANCB9wTzsC3OCBWbXt
 BRczcEkq0OHC9/l5CrILR6ocwxKGDIMIysfeOSABgfqckSEhylWy3+EWZQCk08ka
 gNpXlDJUG7dYpcZD/zhuC7e5Rg1uNvN7WiTv+Biig8xZCsEtYOq+qC5C/sOnsypI
 nqfECfbx48cVl49SjatdgquuHn/INESdLRCHisshkurA2Mp5PQuCmrwlXbv4JG59
 v9b7lsFQlkpvEXMdo9VYe6K2gjfkOPRdWsVPu2oXA1qISRmrDqX8cKOpapUIwRhL
 p3ASruMOnz0KFqVaET8+5u2SwtALeW+c+1p1aHMfVGF/qbXuyG05qBkLoGFJR+Xr
 WznXUXY80Z7pjD57SpA6U3DigAkGqKCBXUwdifaOq8HQonwsnQGqkW/3NngNULGP
 IC4u0JXn61VgQsM/kAw+ucc4bdKI0g4oKJR56lT48elrj6Yxrjpde4oOqzZ0IQKu
 VQ0YnzWqqT2tjh4YNMOwkNPbFR4ALd329zI6TUkWib/jByEBNcfjSj9BRANd1KSx
 JgSHAE6agrbl6h3nOx584YCasX3Zq+nfv1Sj4Z/5GaHKKW3q/Vw=
 =wHLt
 -----END PGP SIGNATURE-----

Merge tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

Pull bpf fixes from Alexei Starovoitov:

 - Fix BPF builds due to -fms-extensions. selftests (Alexei
   Starovoitov), bpftool (Quentin Monnet).

 - Fix build of net/smc when CONFIG_BPF_SYSCALL=y, but CONFIG_BPF_JIT=n
   (Geert Uytterhoeven)

 - Fix livepatch/BPF interaction and support reliable unwinding through
   BPF stack frames (Josh Poimboeuf)

 - Do not audit capability check in arm64 JIT (Ondrej Mosnacek)

 - Fix truncated dmabuf BPF iterator reads (T.J. Mercier)

 - Fix verifier assumptions of bpf_d_path's output buffer (Shuran Liu)

 - Fix warnings in libbpf when built with -Wdiscarded-qualifiers under
   C23 (Mikhail Gavrilov)

* tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf:
  selftests/bpf: add regression test for bpf_d_path()
  bpf: Fix verifier assumptions of bpf_d_path's output buffer
  selftests/bpf: Add test for truncated dmabuf_iter reads
  bpf: Fix truncated dmabuf iterator reads
  x86/unwind/orc: Support reliable unwinding through BPF stack frames
  bpf: Add bpf_has_frame_pointer()
  bpf, arm64: Do not audit capability check in do_jit()
  libbpf: Fix -Wdiscarded-qualifiers under C23
  bpftool: Fix build warnings due to MS extensions
  net: smc: SMC_HS_CTRL_BPF should depend on BPF_JIT
  selftests/bpf: Add -fms-extensions to bpf build flags
2025-12-17 15:54:58 +12:00
Linus Torvalds
64e68f8a95 s390 fixes for 6.19-rc2
- clear 'Search boot program' flag when 'bootprog' sysfs file is
   written to override a value set from Hardware Management Console
 
 - fix cyclic dead-lock in zpci_zdev_put() and zpci_scan_devices()
   functions when triggering PCI device recovery using sysfs
 
 - annotate the expected lock context imbalance in zpci_release_device()
   function to fix a sparse complaint
 
 - fix the logic to fallback to the return address register value in
   the topmost frame when stack tracing uses a back chain
 -----BEGIN PGP SIGNATURE-----
 
 iI0EABYKADUWIQQrtrZiYVkVzKQcYivNdxKlNrRb8AUCaUFNXRccYWdvcmRlZXZA
 bGludXguaWJtLmNvbQAKCRDNdxKlNrRb8JpYAQC4mM0ZoTUp+c6rVjgPsDLbhFMm
 HX6PGcTCxwAirSQSqwD+LrC216fNcG5gsfRU4NdMzVxAs11DnRcnbjAx8/3tawg=
 =Q3D2
 -----END PGP SIGNATURE-----

Merge tag 's390-6.19-3' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux

Pull s390 fixes from Alexander Gordeev:

 - clear 'Search boot program' flag when 'bootprog' sysfs file is
   written to override a value set from Hardware Management Console

 - fix cyclic dead-lock in zpci_zdev_put() and zpci_scan_devices()
   functions when triggering PCI device recovery using sysfs

 - annotate the expected lock context imbalance in zpci_release_device()
   function to fix a sparse complaint

 - fix the logic to fallback to the return address register value in the
   topmost frame when stack tracing uses a back chain

* tag 's390-6.19-3' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux:
  s390/stacktrace: Do not fallback to RA register
  s390/pci: Annotate lock context imbalance in zpci_release_device()
  s390/pci: Fix cyclic dead-lock in zpci_zdev_put() and zpci_scan_devices()
  s390/ipl: Clear SBP flag when bootprog is set
2025-12-17 15:48:30 +12:00
Jens Remus
489e96651d s390/stacktrace: Do not fallback to RA register
The logic to fallback to the return address (RA) register value in
the topmost frame when stack tracing using back chain is broken in
multiple ways:

When assuming the RA register 14 has not been saved yet one must assume
that a new user stack frame has not been allocated either.  Therefore
the back chain would not contain the stack pointer (SP) at entry, but
the caller's SP at its entry instead.

Therefore when falling back to the RA register 14 value it would also be
necessary to fallback to the SP register 15 value.  Otherwise an invalid
combination of RA register 14 and caller's SP at its entry (from the
back chain) is used.

In the topmost frame the back chain contains either the caller's SP at
its entry (before having allocated a new stack frame in the prologue),
the SP at entry (after having allocated a new stack frame), or an
uninitialized value (during static/dynamic stack allocation).  In both
cases where the back chain is valid either the caller or prologue must
have saved its respective RA to the respective frame.  Therefore, if the
RA obtained from the frame pointed to by the back chain is invalid, this
does not indicate that the IP in the topmost frame is still early in the
prologue and the RA has not been saved.

Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Jens Remus <jremus@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
2025-12-14 11:03:58 +01:00
Benjamin Block
af241e6bfc s390/pci: Annotate lock context imbalance in zpci_release_device()
When checking `arch/s390/pci/pci.c` with `sparse` during build, the
following complaint is reported:

  arch/s390/pci/pci.c: note: in included file (through include/linux/smp.h, include/linux/lockdep.h, include/linux/spinlock.h, include/linux/mmzone.h, include/linux/gfp.h, include/linux/slab.h):
  ./include/linux/list.h:237:25: warning: context imbalance in 'zpci_release_device' - unexpected unlock

But this is expected, as zpci_release_device() is expected to be called
with `zpci_list_lock` held, as part of `kref_put_lock()` or similar.

Reflect this by annotating the function with the appropriate
__releases().

Signed-off-by: Benjamin Block <bblock@linux.ibm.com>
Reviewed-by: Farhan Ali <alifm@linux.ibm.com>
Reviewed-by: Niklas Schnelle <schnelle@linux.ibm.com>
Reviewed-by: Gerd Bayer <gbayer@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
2025-12-14 11:03:58 +01:00
Benjamin Block
4cb92fa763 s390/pci: Fix cyclic dead-lock in zpci_zdev_put() and zpci_scan_devices()
When triggering PCI device recovery by writing into the SysFS attribute
`recover` of a Physical Function with existing child SR-IOV Virtual
Functions, lockdep is reporting a possible deadlock between three
threads:

         Thread (A)             Thread (B)             Thread (C)
             |                      |                      |
      recover_store()      zpci_scan_devices()    zpci_scan_devices()
lock(pci_rescan_remove_lock)        |                      |
             |                      |                      |
             |                      |            zpci_bus_scan_busses()
             |                      |             lock(zbus_list_lock)
             |              zpci_add_device()              |
             |          lock(zpci_add_remove_lock)         |
             |                      |                      ┴
             |                      |             zpci_bus_scan_bus()
             |                      |         lock(pci_rescan_remove_lock)
             ┴                      |
      zpci_zdev_put()               |
 lock(zpci_add_remove_lock)         |
                                    ┴
                              zpci_bus_get()
                           lock(zbus_list_lock)

In zpci_bus_scan_busses() the `zbus_list_lock` is taken for the whole
duration of the function, which also includes taking
`pci_rescan_remove_lock`, among other things. But `zbus_list_lock` only
really needs to protect the modification of the global registration
`zbus_list`, it can be dropped while the functions within the list
iteration run; this way we break the cycle above.

Break up zpci_bus_scan_busses() into an "iterator" zpci_bus_get_next()
that iterates over `zbus_list` element by element, and acquires and
releases `zbus_list_lock` as necessary, but never keep holding it.
References to `zpci_bus` objects are also acquired and released.

The reference counting on `zpci_bus` objects is also changed so that all
put() and get() operations are done under the protection of
`zbus_list_lock`, and if the operation results in a modification of
`zpci_bus_list`, this modification is done in the same critical section
(apart the very first initialization). This way objects are never seen
on the list that are about to be released and/or half-initialized.

Fixes: 14c87ba8123a ("s390/pci: separate zbus registration from scanning")
Suggested-by: Niklas Schnelle <schnelle@linux.ibm.com>
Signed-off-by: Benjamin Block <bblock@linux.ibm.com>
Reviewed-by: Niklas Schnelle <schnelle@linux.ibm.com>
Reviewed-by: Gerd Bayer <gbayer@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
2025-12-14 11:03:58 +01:00
Sven Schnelle
b1aa01d312 s390/ipl: Clear SBP flag when bootprog is set
With z16 a new flag 'search boot program' was introduced for
list-directed IPL (SCSI, NVMe, ECKD DASD). If this flag is set,
e.g. via selecting the "Automatic" value for the "Boot program
selector" control on an HMC load panel, it is copied to the reipl
structure from the initial ipl structure. When a user now sets a
boot prog via sysfs, the flag is not cleared and the bootloader
will again automatically select the boot program, ignoring user
configuration.

To avoid that, clear the SBP flag when a bootprog sysfs file is
written.

Cc: stable@vger.kernel.org
Reviewed-by: Peter Oberparleiter <oberpar@linux.ibm.com>
Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
2025-12-14 11:03:57 +01:00
Alexei Starovoitov
1d528e794f Merge branch 'bpf-fix-bpf_d_path-helper-prototype'
Shuran Liu says:

====================
bpf: fix bpf_d_path() helper prototype

Hi,

This series fixes a verifier issue with bpf_d_path() and adds a
regression test to cover its use within a hook function.

Patch 1 updates the bpf_d_path() helper prototype so that the second
argument is marked as MEM_WRITE. This makes it explicit to the verifier
that the helper writes into the provided buffer.

Patch 2 extends the existing d_path selftest to cover incorrect verifier
assumptions caused by an incorrect function prototype. The test program calls
bpf_d_path() and checks if the first character of the path can be read.
It ensures the verifier does not assume the buffer remains unwritten.

Changelog
=========

v5:
  - Moved the temporary file for the fallocate test from /tmp to /dev/shm
    Since bpf CI's 9P filesystem under /tmp does not support fallocate.

v4:
  - Use the fallocate hook instead of an LSM hook to simplify the selftest,
    as suggested by Matt and Alexei.
  - Add a utility function in test_d_path.c to load the BPF program,
    improving code reuse.

v3:
  - Switch the pathname prefix loop to use bpf_for() instead of
    #pragma unroll, as suggested by Matt.
  - Remove /tmp/bpf_d_path_test in the test cleanup path.
  - Add the missing Reviewed-by tags.

v2:
  - Merge the new test into the existing d_path selftest rather than
  creating new files.
  - Add PID filtering in the LSM program to avoid nondeterministic failures
  due to unrelated processes triggering bprm_check_security.
  - Synchronize child execution using a pipe to ensure deterministic
  updates to the PID.

Thanks for your time and reviews.
====================

Link: https://patch.msgid.link/20251206141210.3148-1-electronlsr@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2025-12-10 01:36:26 -08:00
Shuran Liu
79e247d660 selftests/bpf: add regression test for bpf_d_path()
Add a regression test for bpf_d_path() to cover incorrect verifier
assumptions caused by an incorrect function prototype. The test
attaches to the fallocate hook, calls bpf_d_path() and verifies that
a simple prefix comparison on the returned pathname behaves correctly
after the fix in patch 1. It ensures the verifier does not assume
the buffer remains unwritten.

Co-developed-by: Zesen Liu <ftyg@live.com>
Signed-off-by: Zesen Liu <ftyg@live.com>
Co-developed-by: Peili Gao <gplhust955@gmail.com>
Signed-off-by: Peili Gao <gplhust955@gmail.com>
Co-developed-by: Haoran Ni <haoran.ni.cs@gmail.com>
Signed-off-by: Haoran Ni <haoran.ni.cs@gmail.com>
Signed-off-by: Shuran Liu <electronlsr@gmail.com>
Link: https://lore.kernel.org/r/20251206141210.3148-3-electronlsr@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2025-12-10 01:36:26 -08:00
Shuran Liu
ac44dcc788 bpf: Fix verifier assumptions of bpf_d_path's output buffer
Commit 37cce22dbd51 ("bpf: verifier: Refactor helper access type
tracking") started distinguishing read vs write accesses performed by
helpers.

The second argument of bpf_d_path() is a pointer to a buffer that the
helper fills with the resulting path. However, its prototype currently
uses ARG_PTR_TO_MEM without MEM_WRITE.

Before 37cce22dbd51, helper accesses were conservatively treated as
potential writes, so this mismatch did not cause issues. Since that
commit, the verifier may incorrectly assume that the buffer contents
are unchanged across the helper call and base its optimizations on this
wrong assumption. This can lead to misbehaviour in BPF programs that
read back the buffer, such as prefix comparisons on the returned path.

Fix this by marking the second argument of bpf_d_path() as
ARG_PTR_TO_MEM | MEM_WRITE so that the verifier correctly models the
write to the caller-provided buffer.

Fixes: 37cce22dbd51 ("bpf: verifier: Refactor helper access type tracking")
Co-developed-by: Zesen Liu <ftyg@live.com>
Signed-off-by: Zesen Liu <ftyg@live.com>
Co-developed-by: Peili Gao <gplhust955@gmail.com>
Signed-off-by: Peili Gao <gplhust955@gmail.com>
Co-developed-by: Haoran Ni <haoran.ni.cs@gmail.com>
Signed-off-by: Haoran Ni <haoran.ni.cs@gmail.com>
Signed-off-by: Shuran Liu <electronlsr@gmail.com>
Reviewed-by: Matt Bobrowski <mattbobrowski@google.com>
Link: https://lore.kernel.org/r/20251206141210.3148-2-electronlsr@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2025-12-10 01:34:04 -08:00
T.J. Mercier
9489d457d4 selftests/bpf: Add test for truncated dmabuf_iter reads
If many dmabufs are present, reads of the dmabuf iterator can be
truncated at PAGE_SIZE or user buffer size boundaries before the fix in
"bpf: Fix truncated dmabuf iterator reads". Add a test to
confirm truncation does not occur.

Signed-off-by: T.J. Mercier <tjmercier@google.com>
Link: https://lore.kernel.org/r/20251204000348.1413593-2-tjmercier@google.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2025-12-09 23:49:04 -08:00
T.J. Mercier
234483565d bpf: Fix truncated dmabuf iterator reads
If there is a large number (hundreds) of dmabufs allocated, the text
output generated from dmabuf_iter_seq_show can exceed common user buffer
sizes (e.g. PAGE_SIZE) necessitating multiple start/stop cycles to
iterate through all dmabufs. However the dmabuf iterator currently
returns NULL in dmabuf_iter_seq_start for all non-zero pos values, which
results in the truncation of the output before all dmabufs are handled.

After dma_buf_iter_begin / dma_buf_iter_next, the refcount of the buffer
is elevated so that the BPF iterator program can run without holding any
locks. When a stop occurs, instead of immediately dropping the reference
on the buffer, stash a pointer to the buffer in seq->priv until
either start is called or the iterator is released. This also enables
the resumption of iteration without first walking through the list of
dmabufs based on the pos value.

Fixes: 76ea95534995 ("bpf: Add dmabuf iterator")
Signed-off-by: T.J. Mercier <tjmercier@google.com>
Link: https://lore.kernel.org/r/20251204000348.1413593-1-tjmercier@google.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2025-12-09 23:48:34 -08:00
Alexei Starovoitov
297c3fba9d Merge branch 'bpf-x86-unwind-orc-support-reliable-unwinding-through-bpf-stack-frames'
Josh Poimboeuf says:

====================
bpf, x86/unwind/orc: Support reliable unwinding through BPF stack frames

Fix livepatch stalls which may be seen when a task is blocked with BPF
JIT on its kernel stack.

Changes since v1 (https://lore.kernel.org/cover.1764699074.git.jpoimboe@kernel.org):
- fix NULL ptr deref in __arch_prepare_bpf_trampoline()
====================

Link: https://patch.msgid.link/cover.1764818927.git.jpoimboe@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2025-12-09 23:30:27 -08:00
Josh Poimboeuf
01bc3b6db1 x86/unwind/orc: Support reliable unwinding through BPF stack frames
BPF JIT programs and trampolines use a frame pointer, so the current ORC
unwinder strategy of falling back to frame pointers (when an ORC entry
is missing) usually works in practice when unwinding through BPF JIT
stack frames.

However, that frame pointer fallback is just a guess, so the unwind gets
marked unreliable for live patching, which can cause livepatch
transition stalls.

Make the common case reliable by calling the bpf_has_frame_pointer()
helper to detect the valid frame pointer region of BPF JIT programs and
trampolines.

Fixes: ee9f8fce9964 ("x86/unwind: Add the ORC unwinder")
Reported-by: Andrey Grodzovsky <andrey.grodzovsky@crowdstrike.com>
Closes: https://lore.kernel.org/0e555733-c670-4e84-b2e6-abb8b84ade38@crowdstrike.com
Acked-by: Song Liu <song@kernel.org>
Acked-and-tested-by: Andrey Grodzovsky <andrey.grodzovsky@crowdstrike.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
Link: https://lore.kernel.org/r/a18505975662328c8ffb1090dded890c6f8c1004.1764818927.git.jpoimboe@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Jiri Olsa <jolsa@kernel.org>
2025-12-09 23:30:04 -08:00
Josh Poimboeuf
ca45c84afb bpf: Add bpf_has_frame_pointer()
Introduce a bpf_has_frame_pointer() helper that unwinders can call to
determine whether a given instruction pointer is within the valid frame
pointer region of a BPF JIT program or trampoline (i.e., after the
prologue, before the epilogue).

This will enable livepatch (with the ORC unwinder) to reliably unwind
through BPF JIT frames.

Acked-by: Song Liu <song@kernel.org>
Acked-and-tested-by: Andrey Grodzovsky <andrey.grodzovsky@crowdstrike.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
Link: https://lore.kernel.org/r/fd2bc5b4e261a680774b28f6100509fd5ebad2f0.1764818927.git.jpoimboe@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Jiri Olsa <jolsa@kernel.org>
2025-12-09 23:29:42 -08:00
Ondrej Mosnacek
189e5deb94 bpf, arm64: Do not audit capability check in do_jit()
Analogically to the x86 commit 881a9c9cb785 ("bpf: Do not audit
capability check in do_jit()"), change the capable() call to
ns_capable_noaudit() in order to avoid spurious SELinux denials in audit
log.

The commit log from that commit applies here as well:
"""
The failure of this check only results in a security mitigation being
applied, slightly affecting performance of the compiled BPF program. It
doesn't result in a failed syscall, an thus auditing a failed LSM
permission check for it is unwanted. For example with SELinux, it causes
a denial to be reported for confined processes running as root, which
tends to be flagged as a problem to be fixed in the policy. Yet
dontauditing or allowing CAP_SYS_ADMIN to the domain may not be
desirable, as it would allow/silence also other checks - either going
against the principle of least privilege or making debugging potentially
harder.

Fix it by changing it from capable() to ns_capable_noaudit(), which
instructs the LSMs to not audit the resulting denials.
"""

Fixes: f300769ead03 ("arm64: bpf: Only mitigate cBPF programs loaded by unprivileged users")
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
Link: https://lore.kernel.org/r/20251204125916.441021-1-omosnace@redhat.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2025-12-09 23:25:11 -08:00
Mikhail Gavrilov
d70f79fef6 libbpf: Fix -Wdiscarded-qualifiers under C23
glibc ≥ 2.42 (GCC 15) defaults to -std=gnu23, which promotes
-Wdiscarded-qualifiers to an error.

In C23, strstr() and strchr() return "const char *".

Change variable types to const char * where the pointers are never
modified (res, sym_sfx, next_path).

Suggested-by: Florian Weimer <fweimer@redhat.com>
Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Mikhail Gavrilov <mikhail.v.gavrilov@gmail.com>
Link: https://lore.kernel.org/r/20251206092825.1471385-1-mikhail.v.gavrilov@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2025-12-09 23:23:21 -08:00
Quentin Monnet
639f58a0f4 bpftool: Fix build warnings due to MS extensions
The kernel is now built with -fms-extensions. Anonymous structs or
unions permitted by these extensions have been used in several places,
and can end up in the generated vmlinux.h file, for example:

    struct ns_tree {
        [...]
    };

    [...]

    struct ns_common {
            [...]
            union {
                    struct ns_tree;
                    struct callback_head ns_rcu;
            };
    };

Trying to include this header for compiling a tool may result in build
warnings, if the compiler does not expect these extensions. This is the
case, for example, with bpftool:

    In file included from skeleton/pid_iter.bpf.c:3:
    .../tools/testing/selftests/bpf/tools/build/bpftool/vmlinux.h:64057:3:
    warning: declaration does not declare anything
    [-Wmissing-declarations]
     64057 |                 struct ns_tree;
           |                 ^~~~~~~~~~~~~~

Fix these build warnings in bpftool by turning on Microsoft extensions
when compiling the two BPF programs that rely on vmlinux.h.

Reported-by: Alexei Starovoitov <ast@kernel.org>
Closes: https://lore.kernel.org/bpf/CAADnVQK9ZkPC7+R5VXKHVdtj8tumpMXm7BTp0u9CoiFLz_aPTg@mail.gmail.com/
Signed-off-by: Quentin Monnet <qmo@kernel.org>
Link: https://lore.kernel.org/r/20251208130748.68371-1-qmo@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2025-12-09 23:21:33 -08:00
Geert Uytterhoeven
861111b698 net: smc: SMC_HS_CTRL_BPF should depend on BPF_JIT
If CONFIG_BPF_SYSCALL=y, but CONFIG_BPF_JIT=n:

    net/smc/smc_hs_bpf.c: In function ‘bpf_smc_hs_ctrl_init’:
    include/linux/bpf.h:2068:50: error: statement with no effect [-Werror=unused-value]
     2068 | #define register_bpf_struct_ops(st_ops, type) ({ (void *)(st_ops); 0; })
	  |                                                  ^~~~~~~~~~~~~~~~
    net/smc/smc_hs_bpf.c:139:16: note: in expansion of macro ‘register_bpf_struct_ops’
      139 |         return register_bpf_struct_ops(&bpf_smc_hs_ctrl_ops, smc_hs_ctrl);
	  |                ^~~~~~~~~~~~~~~~~~~~~~~

While this compile error is caused by a bug in <linux/bpf.h>, none of
the code in net/smc/smc_hs_bpf.c becomes effective if CONFIG_BPF_JIT is
not enabled.  Hence add a dependency on BPF_JIT.

While at it, add the missing newline at the end of the file.

Fixes: 15f295f55656658e ("net/smc: bpf: Introduce generic hook for handshake flow")
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://patch.msgid.link/988c61e5fea280872d81b3640f1f34d0619cfbbf.1764843951.git.geert@linux-m68k.org
2025-12-04 11:07:18 -08:00
Alexei Starovoitov
835a507535 selftests/bpf: Add -fms-extensions to bpf build flags
The kernel is now built with -fms-extensions, therefore
generated vmlinux.h contains types like:
struct slab {
   ..
   struct freelist_counters;
};

Use -fms-extensions and -Wno-microsoft-anon-tag flags
to build bpf programs that #include "vmlinux.h"

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2025-12-03 20:20:11 -08:00
21 changed files with 388 additions and 106 deletions

View File

@ -748,6 +748,7 @@ ForEachMacros:
- 'ynl_attr_for_each_nested'
- 'ynl_attr_for_each_payload'
- 'zorro_for_each_dev'
- 'zpci_bus_for_each'
IncludeBlocks: Preserve
IncludeCategories:

View File

@ -1004,7 +1004,7 @@ static void __maybe_unused build_bhb_mitigation(struct jit_ctx *ctx)
arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE)
return;
if (capable(CAP_SYS_ADMIN))
if (ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN))
return;
if (supports_clearbhb(SCOPE_SYSTEM)) {

View File

@ -15,6 +15,7 @@ struct ipl_pl_hdr {
#define IPL_PL_FLAG_IPLPS 0x80
#define IPL_PL_FLAG_SIPL 0x40
#define IPL_PL_FLAG_IPLSR 0x20
#define IPL_PL_FLAG_SBP 0x10
/* IPL Parameter Block header */
struct ipl_pb_hdr {

View File

@ -262,6 +262,24 @@ static struct kobj_attribute sys_##_prefix##_##_name##_attr = \
sys_##_prefix##_##_name##_show, \
sys_##_prefix##_##_name##_store)
#define DEFINE_IPL_ATTR_BOOTPROG_RW(_prefix, _name, _fmt_out, _fmt_in, _hdr, _value) \
IPL_ATTR_SHOW_FN(_prefix, _name, _fmt_out, (unsigned long long) _value) \
static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \
struct kobj_attribute *attr, \
const char *buf, size_t len) \
{ \
unsigned long long value; \
if (sscanf(buf, _fmt_in, &value) != 1) \
return -EINVAL; \
(_value) = value; \
(_hdr).flags &= ~IPL_PL_FLAG_SBP; \
return len; \
} \
static struct kobj_attribute sys_##_prefix##_##_name##_attr = \
__ATTR(_name, 0644, \
sys_##_prefix##_##_name##_show, \
sys_##_prefix##_##_name##_store)
#define DEFINE_IPL_ATTR_STR_RW(_prefix, _name, _fmt_out, _fmt_in, _value)\
IPL_ATTR_SHOW_FN(_prefix, _name, _fmt_out, _value) \
static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \
@ -818,12 +836,13 @@ DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%llx\n",
reipl_block_fcp->fcp.wwpn);
DEFINE_IPL_ATTR_RW(reipl_fcp, lun, "0x%016llx\n", "%llx\n",
reipl_block_fcp->fcp.lun);
DEFINE_IPL_ATTR_RW(reipl_fcp, bootprog, "%lld\n", "%lld\n",
reipl_block_fcp->fcp.bootprog);
DEFINE_IPL_ATTR_RW(reipl_fcp, br_lba, "%lld\n", "%lld\n",
reipl_block_fcp->fcp.br_lba);
DEFINE_IPL_ATTR_RW(reipl_fcp, device, "0.0.%04llx\n", "0.0.%llx\n",
reipl_block_fcp->fcp.devno);
DEFINE_IPL_ATTR_BOOTPROG_RW(reipl_fcp, bootprog, "%lld\n", "%lld\n",
reipl_block_fcp->hdr,
reipl_block_fcp->fcp.bootprog);
static void reipl_get_ascii_loadparm(char *loadparm,
struct ipl_parameter_block *ibp)
@ -942,10 +961,11 @@ DEFINE_IPL_ATTR_RW(reipl_nvme, fid, "0x%08llx\n", "%llx\n",
reipl_block_nvme->nvme.fid);
DEFINE_IPL_ATTR_RW(reipl_nvme, nsid, "0x%08llx\n", "%llx\n",
reipl_block_nvme->nvme.nsid);
DEFINE_IPL_ATTR_RW(reipl_nvme, bootprog, "%lld\n", "%lld\n",
reipl_block_nvme->nvme.bootprog);
DEFINE_IPL_ATTR_RW(reipl_nvme, br_lba, "%lld\n", "%lld\n",
reipl_block_nvme->nvme.br_lba);
DEFINE_IPL_ATTR_BOOTPROG_RW(reipl_nvme, bootprog, "%lld\n", "%lld\n",
reipl_block_nvme->hdr,
reipl_block_nvme->nvme.bootprog);
static struct attribute *reipl_nvme_attrs[] = {
&sys_reipl_nvme_fid_attr.attr,
@ -1038,8 +1058,9 @@ static const struct bin_attribute *const reipl_eckd_bin_attrs[] = {
};
DEFINE_IPL_CCW_ATTR_RW(reipl_eckd, device, reipl_block_eckd->eckd);
DEFINE_IPL_ATTR_RW(reipl_eckd, bootprog, "%lld\n", "%lld\n",
reipl_block_eckd->eckd.bootprog);
DEFINE_IPL_ATTR_BOOTPROG_RW(reipl_eckd, bootprog, "%lld\n", "%lld\n",
reipl_block_eckd->hdr,
reipl_block_eckd->eckd.bootprog);
static struct attribute *reipl_eckd_attrs[] = {
&sys_reipl_eckd_device_attr.attr,
@ -1567,12 +1588,13 @@ DEFINE_IPL_ATTR_RW(dump_fcp, wwpn, "0x%016llx\n", "%llx\n",
dump_block_fcp->fcp.wwpn);
DEFINE_IPL_ATTR_RW(dump_fcp, lun, "0x%016llx\n", "%llx\n",
dump_block_fcp->fcp.lun);
DEFINE_IPL_ATTR_RW(dump_fcp, bootprog, "%lld\n", "%lld\n",
dump_block_fcp->fcp.bootprog);
DEFINE_IPL_ATTR_RW(dump_fcp, br_lba, "%lld\n", "%lld\n",
dump_block_fcp->fcp.br_lba);
DEFINE_IPL_ATTR_RW(dump_fcp, device, "0.0.%04llx\n", "0.0.%llx\n",
dump_block_fcp->fcp.devno);
DEFINE_IPL_ATTR_BOOTPROG_RW(dump_fcp, bootprog, "%lld\n", "%lld\n",
dump_block_fcp->hdr,
dump_block_fcp->fcp.bootprog);
DEFINE_IPL_ATTR_SCP_DATA_RW(dump_fcp, dump_block_fcp->hdr,
dump_block_fcp->fcp,
@ -1604,10 +1626,11 @@ DEFINE_IPL_ATTR_RW(dump_nvme, fid, "0x%08llx\n", "%llx\n",
dump_block_nvme->nvme.fid);
DEFINE_IPL_ATTR_RW(dump_nvme, nsid, "0x%08llx\n", "%llx\n",
dump_block_nvme->nvme.nsid);
DEFINE_IPL_ATTR_RW(dump_nvme, bootprog, "%lld\n", "%llx\n",
dump_block_nvme->nvme.bootprog);
DEFINE_IPL_ATTR_RW(dump_nvme, br_lba, "%lld\n", "%llx\n",
dump_block_nvme->nvme.br_lba);
DEFINE_IPL_ATTR_BOOTPROG_RW(dump_nvme, bootprog, "%lld\n", "%llx\n",
dump_block_nvme->hdr,
dump_block_nvme->nvme.bootprog);
DEFINE_IPL_ATTR_SCP_DATA_RW(dump_nvme, dump_block_nvme->hdr,
dump_block_nvme->nvme,
@ -1635,8 +1658,9 @@ static const struct attribute_group dump_nvme_attr_group = {
/* ECKD dump device attributes */
DEFINE_IPL_CCW_ATTR_RW(dump_eckd, device, dump_block_eckd->eckd);
DEFINE_IPL_ATTR_RW(dump_eckd, bootprog, "%lld\n", "%llx\n",
dump_block_eckd->eckd.bootprog);
DEFINE_IPL_ATTR_BOOTPROG_RW(dump_eckd, bootprog, "%lld\n", "%llx\n",
dump_block_eckd->hdr,
dump_block_eckd->eckd.bootprog);
IPL_ATTR_BR_CHR_SHOW_FN(dump, dump_block_eckd->eckd);
IPL_ATTR_BR_CHR_STORE_FN(dump, dump_block_eckd->eckd);

View File

@ -104,7 +104,6 @@ void arch_stack_walk_user_common(stack_trace_consume_fn consume_entry, void *coo
struct stack_frame_vdso_wrapper __user *sf_vdso;
struct stack_frame_user __user *sf;
unsigned long ip, sp;
bool first = true;
if (!current->mm)
return;
@ -133,24 +132,11 @@ void arch_stack_walk_user_common(stack_trace_consume_fn consume_entry, void *coo
if (__get_user(ip, &sf->gprs[8]))
break;
}
/* Sanity check: ABI requires SP to be 8 byte aligned. */
if (sp & 0x7)
/* Validate SP and RA (ABI requires SP to be 8 byte aligned). */
if (sp & 0x7 || ip_invalid(ip))
break;
if (ip_invalid(ip)) {
/*
* If the instruction address is invalid, and this
* is the first stack frame, assume r14 has not
* been written to the stack yet. Otherwise exit.
*/
if (!first)
break;
ip = regs->gprs[14];
if (ip_invalid(ip))
break;
}
if (!store_ip(consume_entry, cookie, entry, perf, ip))
break;
first = false;
}
pagefault_enable();
}

View File

@ -961,6 +961,7 @@ void zpci_device_reserved(struct zpci_dev *zdev)
}
void zpci_release_device(struct kref *kref)
__releases(&zpci_list_lock)
{
struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref);
@ -1148,6 +1149,7 @@ static void zpci_add_devices(struct list_head *scan_list)
int zpci_scan_devices(void)
{
struct zpci_bus *zbus;
LIST_HEAD(scan_list);
int rc;
@ -1156,7 +1158,10 @@ int zpci_scan_devices(void)
return rc;
zpci_add_devices(&scan_list);
zpci_bus_scan_busses();
zpci_bus_for_each(zbus) {
zpci_bus_scan_bus(zbus);
cond_resched();
}
return 0;
}

View File

@ -153,23 +153,6 @@ int zpci_bus_scan_bus(struct zpci_bus *zbus)
return ret;
}
/* zpci_bus_scan_busses - Scan all registered busses
*
* Scan all available zbusses
*
*/
void zpci_bus_scan_busses(void)
{
struct zpci_bus *zbus = NULL;
mutex_lock(&zbus_list_lock);
list_for_each_entry(zbus, &zbus_list, bus_next) {
zpci_bus_scan_bus(zbus);
cond_resched();
}
mutex_unlock(&zbus_list_lock);
}
static bool zpci_bus_is_multifunction_root(struct zpci_dev *zdev)
{
return !s390_pci_no_rid && zdev->rid_available &&
@ -222,10 +205,29 @@ out_free_domain:
return -ENOMEM;
}
static void zpci_bus_release(struct kref *kref)
/**
* zpci_bus_release - Un-initialize resources associated with the zbus and
* free memory
* @kref: refcount * that is part of struct zpci_bus
*
* MUST be called with `zbus_list_lock` held, but the lock is released during
* run of the function.
*/
static inline void zpci_bus_release(struct kref *kref)
__releases(&zbus_list_lock)
{
struct zpci_bus *zbus = container_of(kref, struct zpci_bus, kref);
lockdep_assert_held(&zbus_list_lock);
list_del(&zbus->bus_next);
mutex_unlock(&zbus_list_lock);
/*
* At this point no-one should see this object, or be able to get a new
* reference to it.
*/
if (zbus->bus) {
pci_lock_rescan_remove();
pci_stop_root_bus(zbus->bus);
@ -237,16 +239,19 @@ static void zpci_bus_release(struct kref *kref)
pci_unlock_rescan_remove();
}
mutex_lock(&zbus_list_lock);
list_del(&zbus->bus_next);
mutex_unlock(&zbus_list_lock);
zpci_remove_parent_msi_domain(zbus);
kfree(zbus);
}
static void zpci_bus_put(struct zpci_bus *zbus)
static inline void __zpci_bus_get(struct zpci_bus *zbus)
{
kref_put(&zbus->kref, zpci_bus_release);
lockdep_assert_held(&zbus_list_lock);
kref_get(&zbus->kref);
}
static inline void zpci_bus_put(struct zpci_bus *zbus)
{
kref_put_mutex(&zbus->kref, zpci_bus_release, &zbus_list_lock);
}
static struct zpci_bus *zpci_bus_get(int topo, bool topo_is_tid)
@ -258,7 +263,7 @@ static struct zpci_bus *zpci_bus_get(int topo, bool topo_is_tid)
if (!zbus->multifunction)
continue;
if (topo_is_tid == zbus->topo_is_tid && topo == zbus->topo) {
kref_get(&zbus->kref);
__zpci_bus_get(zbus);
goto out_unlock;
}
}
@ -268,6 +273,44 @@ out_unlock:
return zbus;
}
/**
* zpci_bus_get_next - get the next zbus object from given position in the list
* @pos: current position/cursor in the global zbus list
*
* Acquires and releases references as the cursor iterates (might also free/
* release the cursor). Is tolerant of concurrent operations on the list.
*
* To begin the iteration, set *@pos to %NULL before calling the function.
*
* *@pos is set to %NULL in cases where either the list is empty, or *@pos is
* the last element in the list.
*
* Context: Process context. May sleep.
*/
void zpci_bus_get_next(struct zpci_bus **pos)
{
struct zpci_bus *curp = *pos, *next = NULL;
mutex_lock(&zbus_list_lock);
if (curp)
next = list_next_entry(curp, bus_next);
else
next = list_first_entry(&zbus_list, typeof(*curp), bus_next);
if (list_entry_is_head(next, &zbus_list, bus_next))
next = NULL;
if (next)
__zpci_bus_get(next);
*pos = next;
mutex_unlock(&zbus_list_lock);
/* zpci_bus_put() might drop refcount to 0 and locks zbus_list_lock */
if (curp)
zpci_bus_put(curp);
}
static struct zpci_bus *zpci_bus_alloc(int topo, bool topo_is_tid)
{
struct zpci_bus *zbus;
@ -279,9 +322,6 @@ static struct zpci_bus *zpci_bus_alloc(int topo, bool topo_is_tid)
zbus->topo = topo;
zbus->topo_is_tid = topo_is_tid;
INIT_LIST_HEAD(&zbus->bus_next);
mutex_lock(&zbus_list_lock);
list_add_tail(&zbus->bus_next, &zbus_list);
mutex_unlock(&zbus_list_lock);
kref_init(&zbus->kref);
INIT_LIST_HEAD(&zbus->resources);
@ -291,6 +331,10 @@ static struct zpci_bus *zpci_bus_alloc(int topo, bool topo_is_tid)
zbus->bus_resource.flags = IORESOURCE_BUS;
pci_add_resource(&zbus->resources, &zbus->bus_resource);
mutex_lock(&zbus_list_lock);
list_add_tail(&zbus->bus_next, &zbus_list);
mutex_unlock(&zbus_list_lock);
return zbus;
}

View File

@ -15,7 +15,20 @@ int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops);
void zpci_bus_device_unregister(struct zpci_dev *zdev);
int zpci_bus_scan_bus(struct zpci_bus *zbus);
void zpci_bus_scan_busses(void);
void zpci_bus_get_next(struct zpci_bus **pos);
/**
* zpci_bus_for_each - iterate over all the registered zbus objects
* @pos: a struct zpci_bus * as cursor
*
* Acquires and releases references as the cursor iterates over the registered
* objects. Is tolerant against concurrent removals of objects.
*
* Context: Process context. May sleep.
*/
#define zpci_bus_for_each(pos) \
for ((pos) = NULL, zpci_bus_get_next(&(pos)); (pos) != NULL; \
zpci_bus_get_next(&(pos)))
int zpci_bus_scan_device(struct zpci_dev *zdev);
void zpci_bus_remove_device(struct zpci_dev *zdev, bool set_error);

View File

@ -2,6 +2,7 @@
#include <linux/objtool.h>
#include <linux/module.h>
#include <linux/sort.h>
#include <linux/bpf.h>
#include <asm/ptrace.h>
#include <asm/stacktrace.h>
#include <asm/unwind.h>
@ -172,6 +173,25 @@ static struct orc_entry *orc_ftrace_find(unsigned long ip)
}
#endif
/* Fake frame pointer entry -- used as a fallback for generated code */
static struct orc_entry orc_fp_entry = {
.type = ORC_TYPE_CALL,
.sp_reg = ORC_REG_BP,
.sp_offset = 16,
.bp_reg = ORC_REG_PREV_SP,
.bp_offset = -16,
};
static struct orc_entry *orc_bpf_find(unsigned long ip)
{
#ifdef CONFIG_BPF_JIT
if (bpf_has_frame_pointer(ip))
return &orc_fp_entry;
#endif
return NULL;
}
/*
* If we crash with IP==0, the last successfully executed instruction
* was probably an indirect function call with a NULL function pointer,
@ -186,15 +206,6 @@ static struct orc_entry null_orc_entry = {
.type = ORC_TYPE_CALL
};
/* Fake frame pointer entry -- used as a fallback for generated code */
static struct orc_entry orc_fp_entry = {
.type = ORC_TYPE_CALL,
.sp_reg = ORC_REG_BP,
.sp_offset = 16,
.bp_reg = ORC_REG_PREV_SP,
.bp_offset = -16,
};
static struct orc_entry *orc_find(unsigned long ip)
{
static struct orc_entry *orc;
@ -238,6 +249,11 @@ static struct orc_entry *orc_find(unsigned long ip)
if (orc)
return orc;
/* BPF lookup: */
orc = orc_bpf_find(ip);
if (orc)
return orc;
return orc_ftrace_find(ip);
}
@ -495,9 +511,8 @@ bool unwind_next_frame(struct unwind_state *state)
if (!orc) {
/*
* As a fallback, try to assume this code uses a frame pointer.
* This is useful for generated code, like BPF, which ORC
* doesn't know about. This is just a guess, so the rest of
* the unwind is no longer considered reliable.
* This is just a guess, so the rest of the unwind is no longer
* considered reliable.
*/
orc = &orc_fp_entry;
state->error = true;

View File

@ -1678,6 +1678,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
emit_prologue(&prog, image, stack_depth,
bpf_prog_was_classic(bpf_prog), tail_call_reachable,
bpf_is_subprog(bpf_prog), bpf_prog->aux->exception_cb);
bpf_prog->aux->ksym.fp_start = prog - temp;
/* Exception callback will clobber callee regs for its own use, and
* restore the original callee regs from main prog's stack frame.
*/
@ -2736,6 +2739,8 @@ emit_jmp:
pop_r12(&prog);
}
EMIT1(0xC9); /* leave */
bpf_prog->aux->ksym.fp_end = prog - temp;
emit_return(&prog, image + addrs[i - 1] + (prog - temp));
break;
@ -3325,6 +3330,9 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
}
EMIT1(0x55); /* push rbp */
EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
if (im)
im->ksym.fp_start = prog - (u8 *)rw_image;
if (!is_imm8(stack_size)) {
/* sub rsp, stack_size */
EMIT3_off32(0x48, 0x81, 0xEC, stack_size);
@ -3462,7 +3470,11 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);
emit_ldx(&prog, BPF_DW, BPF_REG_6, BPF_REG_FP, -rbx_off);
EMIT1(0xC9); /* leave */
if (im)
im->ksym.fp_end = prog - (u8 *)rw_image;
if (flags & BPF_TRAMP_F_SKIP_FRAME) {
/* skip our return address and return to parent */
EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */

View File

@ -1283,6 +1283,8 @@ struct bpf_ksym {
struct list_head lnode;
struct latch_tree_node tnode;
bool prog;
u32 fp_start;
u32 fp_end;
};
enum bpf_tramp_prog_type {
@ -1511,6 +1513,7 @@ void bpf_image_ksym_add(struct bpf_ksym *ksym);
void bpf_image_ksym_del(struct bpf_ksym *ksym);
void bpf_ksym_add(struct bpf_ksym *ksym);
void bpf_ksym_del(struct bpf_ksym *ksym);
bool bpf_has_frame_pointer(unsigned long ip);
int bpf_jit_charge_modmem(u32 size);
void bpf_jit_uncharge_modmem(u32 size);
bool bpf_prog_has_trampoline(const struct bpf_prog *prog);

View File

@ -760,6 +760,22 @@ struct bpf_prog *bpf_prog_ksym_find(unsigned long addr)
NULL;
}
bool bpf_has_frame_pointer(unsigned long ip)
{
struct bpf_ksym *ksym;
unsigned long offset;
guard(rcu)();
ksym = bpf_ksym_find(ip);
if (!ksym || !ksym->fp_start || !ksym->fp_end)
return false;
offset = ip - ksym->start;
return offset >= ksym->fp_start && offset < ksym->fp_end;
}
const struct exception_table_entry *search_bpf_extables(unsigned long addr)
{
const struct exception_table_entry *e = NULL;

View File

@ -6,10 +6,33 @@
#include <linux/kernel.h>
#include <linux/seq_file.h>
struct dmabuf_iter_priv {
/*
* If this pointer is non-NULL, the buffer's refcount is elevated to
* prevent destruction between stop/start. If reading is not resumed and
* start is never called again, then dmabuf_iter_seq_fini drops the
* reference when the iterator is released.
*/
struct dma_buf *dmabuf;
};
static void *dmabuf_iter_seq_start(struct seq_file *seq, loff_t *pos)
{
if (*pos)
return NULL;
struct dmabuf_iter_priv *p = seq->private;
if (*pos) {
struct dma_buf *dmabuf = p->dmabuf;
if (!dmabuf)
return NULL;
/*
* Always resume from where we stopped, regardless of the value
* of pos.
*/
p->dmabuf = NULL;
return dmabuf;
}
return dma_buf_iter_begin();
}
@ -54,8 +77,11 @@ static void dmabuf_iter_seq_stop(struct seq_file *seq, void *v)
{
struct dma_buf *dmabuf = v;
if (dmabuf)
dma_buf_put(dmabuf);
if (dmabuf) {
struct dmabuf_iter_priv *p = seq->private;
p->dmabuf = dmabuf;
}
}
static const struct seq_operations dmabuf_iter_seq_ops = {
@ -71,11 +97,27 @@ static void bpf_iter_dmabuf_show_fdinfo(const struct bpf_iter_aux_info *aux,
seq_puts(seq, "dmabuf iter\n");
}
static int dmabuf_iter_seq_init(void *priv, struct bpf_iter_aux_info *aux)
{
struct dmabuf_iter_priv *p = (struct dmabuf_iter_priv *)priv;
p->dmabuf = NULL;
return 0;
}
static void dmabuf_iter_seq_fini(void *priv)
{
struct dmabuf_iter_priv *p = (struct dmabuf_iter_priv *)priv;
if (p->dmabuf)
dma_buf_put(p->dmabuf);
}
static const struct bpf_iter_seq_info dmabuf_iter_seq_info = {
.seq_ops = &dmabuf_iter_seq_ops,
.init_seq_private = NULL,
.fini_seq_private = NULL,
.seq_priv_size = 0,
.init_seq_private = dmabuf_iter_seq_init,
.fini_seq_private = dmabuf_iter_seq_fini,
.seq_priv_size = sizeof(struct dmabuf_iter_priv),
};
static struct bpf_iter_reg bpf_dmabuf_reg_info = {

View File

@ -965,7 +965,7 @@ static const struct bpf_func_proto bpf_d_path_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_BTF_ID,
.arg1_btf_id = &bpf_d_path_btf_ids[0],
.arg2_type = ARG_PTR_TO_MEM,
.arg2_type = ARG_PTR_TO_MEM | MEM_WRITE,
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
.allowed = bpf_d_path_allowed,
};

View File

@ -22,10 +22,10 @@ config SMC_DIAG
config SMC_HS_CTRL_BPF
bool "Generic eBPF hook for SMC handshake flow"
depends on SMC && BPF_SYSCALL
depends on SMC && BPF_JIT && BPF_SYSCALL
default y
help
SMC_HS_CTRL_BPF enables support to register generic eBPF hook for SMC
handshake flow, which offer much greater flexibility in modifying the behavior
of the SMC protocol stack compared to a complete kernel-based approach. Select
this option if you want filtring the handshake process via eBPF programs.
this option if you want filtring the handshake process via eBPF programs.

View File

@ -224,6 +224,8 @@ endif
$(OUTPUT)%.bpf.o: skeleton/%.bpf.c $(OUTPUT)vmlinux.h $(LIBBPF_BOOTSTRAP)
$(QUIET_CLANG)$(CLANG) \
-Wno-microsoft-anon-tag \
-fms-extensions \
-I$(or $(OUTPUT),.) \
-I$(srctree)/tools/include/uapi/ \
-I$(LIBBPF_BOOTSTRAP_INCLUDE) \

View File

@ -8484,7 +8484,7 @@ static int kallsyms_cb(unsigned long long sym_addr, char sym_type,
struct bpf_object *obj = ctx;
const struct btf_type *t;
struct extern_desc *ext;
char *res;
const char *res;
res = strstr(sym_name, ".llvm.");
if (sym_type == 'd' && res)
@ -11818,7 +11818,8 @@ static int avail_kallsyms_cb(unsigned long long sym_addr, char sym_type,
*
* [0] fb6a421fb615 ("kallsyms: Match symbols exactly with CONFIG_LTO_CLANG")
*/
char sym_trim[256], *psym_trim = sym_trim, *sym_sfx;
char sym_trim[256], *psym_trim = sym_trim;
const char *sym_sfx;
if (!(sym_sfx = strstr(sym_name, ".llvm.")))
return 0;
@ -12401,7 +12402,7 @@ static int resolve_full_path(const char *file, char *result, size_t result_sz)
if (!search_paths[i])
continue;
for (s = search_paths[i]; s != NULL; s = strchr(s, ':')) {
char *next_path;
const char *next_path;
int seg_len;
if (s[0] == ':')

View File

@ -437,6 +437,8 @@ BPF_CFLAGS = -g -Wall -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \
-I$(abspath $(OUTPUT)/../usr/include) \
-std=gnu11 \
-fno-strict-aliasing \
-Wno-microsoft-anon-tag \
-fms-extensions \
-Wno-compare-distinct-pointer-types \
-Wno-initializer-overrides \
#

View File

@ -38,6 +38,14 @@ static int set_pathname(int fd, pid_t pid)
return readlink(buf, src.paths[src.cnt++], MAX_PATH_LEN);
}
static inline long syscall_close(int fd)
{
return syscall(__NR_close_range,
(unsigned int)fd,
(unsigned int)fd,
0u);
}
static int trigger_fstat_events(pid_t pid)
{
int sockfd = -1, procfd = -1, devfd = -1;
@ -104,36 +112,47 @@ out_close:
/* sys_close no longer triggers filp_close, but we can
* call sys_close_range instead which still does
*/
#define close(fd) syscall(__NR_close_range, fd, fd, 0)
close(pipefd[0]);
close(pipefd[1]);
close(sockfd);
close(procfd);
close(devfd);
close(localfd);
close(indicatorfd);
#undef close
syscall_close(pipefd[0]);
syscall_close(pipefd[1]);
syscall_close(sockfd);
syscall_close(procfd);
syscall_close(devfd);
syscall_close(localfd);
syscall_close(indicatorfd);
return ret;
}
static void attach_and_load(struct test_d_path **skel)
{
int err;
*skel = test_d_path__open_and_load();
if (CHECK(!*skel, "setup", "d_path skeleton failed\n"))
goto cleanup;
err = test_d_path__attach(*skel);
if (CHECK(err, "setup", "attach failed: %d\n", err))
goto cleanup;
(*skel)->bss->my_pid = getpid();
return;
cleanup:
test_d_path__destroy(*skel);
*skel = NULL;
}
static void test_d_path_basic(void)
{
struct test_d_path__bss *bss;
struct test_d_path *skel;
int err;
skel = test_d_path__open_and_load();
if (CHECK(!skel, "setup", "d_path skeleton failed\n"))
goto cleanup;
err = test_d_path__attach(skel);
if (CHECK(err, "setup", "attach failed: %d\n", err))
attach_and_load(&skel);
if (!skel)
goto cleanup;
bss = skel->bss;
bss->my_pid = getpid();
err = trigger_fstat_events(bss->my_pid);
if (err < 0)
@ -195,6 +214,39 @@ static void test_d_path_check_types(void)
test_d_path_check_types__destroy(skel);
}
/* Check if the verifier correctly generates code for
* accessing the memory modified by d_path helper.
*/
static void test_d_path_mem_access(void)
{
int localfd = -1;
char path_template[] = "/dev/shm/d_path_loadgen.XXXXXX";
struct test_d_path__bss *bss;
struct test_d_path *skel;
attach_and_load(&skel);
if (!skel)
goto cleanup;
bss = skel->bss;
localfd = mkstemp(path_template);
if (CHECK(localfd < 0, "trigger", "mkstemp failed\n"))
goto cleanup;
if (CHECK(fallocate(localfd, 0, 0, 1024) < 0, "trigger", "fallocate failed\n"))
goto cleanup;
remove(path_template);
if (CHECK(!bss->path_match_fallocate, "check",
"failed to read fallocate path"))
goto cleanup;
cleanup:
syscall_close(localfd);
test_d_path__destroy(skel);
}
void test_d_path(void)
{
if (test__start_subtest("basic"))
@ -205,4 +257,7 @@ void test_d_path(void)
if (test__start_subtest("check_alloc_mem"))
test_d_path_check_types();
if (test__start_subtest("check_mem_access"))
test_d_path_mem_access();
}

View File

@ -73,12 +73,10 @@ close_memfd:
return -1;
}
static int create_sys_heap_dmabuf(void)
static int create_sys_heap_dmabuf(size_t bytes)
{
sysheap_test_buffer_size = 20 * getpagesize();
struct dma_heap_allocation_data data = {
.len = sysheap_test_buffer_size,
.len = bytes,
.fd = 0,
.fd_flags = O_RDWR | O_CLOEXEC,
.heap_flags = 0,
@ -110,7 +108,9 @@ close_sysheap_dmabuf:
static int create_test_buffers(void)
{
udmabuf = create_udmabuf();
sysheap_dmabuf = create_sys_heap_dmabuf();
sysheap_test_buffer_size = 20 * getpagesize();
sysheap_dmabuf = create_sys_heap_dmabuf(sysheap_test_buffer_size);
if (udmabuf < 0 || sysheap_dmabuf < 0)
return -1;
@ -219,6 +219,26 @@ close_iter_fd:
close(iter_fd);
}
static void subtest_dmabuf_iter_check_lots_of_buffers(struct dmabuf_iter *skel)
{
int iter_fd;
char buf[1024];
size_t total_bytes_read = 0;
ssize_t bytes_read;
iter_fd = bpf_iter_create(bpf_link__fd(skel->links.dmabuf_collector));
if (!ASSERT_OK_FD(iter_fd, "iter_create"))
return;
while ((bytes_read = read(iter_fd, buf, sizeof(buf))) > 0)
total_bytes_read += bytes_read;
ASSERT_GT(total_bytes_read, getpagesize(), "total_bytes_read");
close(iter_fd);
}
static void subtest_dmabuf_iter_check_open_coded(struct dmabuf_iter *skel, int map_fd)
{
LIBBPF_OPTS(bpf_test_run_opts, topts);
@ -275,6 +295,23 @@ void test_dmabuf_iter(void)
subtest_dmabuf_iter_check_no_infinite_reads(skel);
if (test__start_subtest("default_iter"))
subtest_dmabuf_iter_check_default_iter(skel);
if (test__start_subtest("lots_of_buffers")) {
size_t NUM_BUFS = 100;
int buffers[NUM_BUFS];
int i;
for (i = 0; i < NUM_BUFS; ++i) {
buffers[i] = create_sys_heap_dmabuf(getpagesize());
if (!ASSERT_OK_FD(buffers[i], "dmabuf_fd"))
goto cleanup_bufs;
}
subtest_dmabuf_iter_check_lots_of_buffers(skel);
cleanup_bufs:
for (--i; i >= 0; --i)
close(buffers[i]);
}
if (test__start_subtest("open_coded"))
subtest_dmabuf_iter_check_open_coded(skel, map_fd);

View File

@ -17,6 +17,7 @@ int rets_close[MAX_FILES] = {};
int called_stat = 0;
int called_close = 0;
int path_match_fallocate = 0;
SEC("fentry/security_inode_getattr")
int BPF_PROG(prog_stat, struct path *path, struct kstat *stat,
@ -62,4 +63,26 @@ int BPF_PROG(prog_close, struct file *file, void *id)
return 0;
}
SEC("fentry/vfs_fallocate")
int BPF_PROG(prog_fallocate, struct file *file, int mode, loff_t offset, loff_t len)
{
pid_t pid = bpf_get_current_pid_tgid() >> 32;
int ret = 0;
char path_fallocate[MAX_PATH_LEN] = {};
if (pid != my_pid)
return 0;
ret = bpf_d_path(&file->f_path,
path_fallocate, MAX_PATH_LEN);
if (ret < 0)
return 0;
if (!path_fallocate[0])
return 0;
path_match_fallocate = 1;
return 0;
}
char _license[] SEC("license") = "GPL";