1
0
mirror of https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2026-01-11 17:10:13 +00:00

Compare commits

...

106 Commits

Author SHA1 Message Date
Linus Torvalds
7475e51b87 Including fixes from BPF and netfilter.
Current release - regressions:
 
  - core: fix undefined behavior in netdev name allocation
 
  - bpf: do not allocate percpu memory at init stage
 
  - netfilter: nf_tables: split async and sync catchall in two functions
 
  - mptcp: fix possible NULL pointer dereference on close
 
 Current release - new code bugs:
 
  - eth: ice: dpll: fix initial lock status of dpll
 
 Previous releases - regressions:
 
  - bpf: fix precision backtracking instruction iteration
 
  - af_unix: fix use-after-free in unix_stream_read_actor()
 
  - tipc: fix kernel-infoleak due to uninitialized TLV value
 
  - eth: bonding: stop the device in bond_setup_by_slave()
 
  - eth: mlx5:
    - fix double free of encap_header
    - avoid referencing skb after free-ing in drop path
 
  - eth: hns3: fix VF reset
 
  - eth: mvneta: fix calls to page_pool_get_stats
 
 Previous releases - always broken:
 
  - core: set SOCK_RCU_FREE before inserting socket into hashtable
 
  - bpf: fix control-flow graph checking in privileged mode
 
  - eth: ppp: limit MRU to 64K
 
  - eth: stmmac: avoid rx queue overrun
 
  - eth: icssg-prueth: fix error cleanup on failing initialization
 
  - eth: hns3: fix out-of-bounds access may occur when coalesce info is
  	      read via debugfs
 
  - eth: cortina: handle large frames
 
 Misc:
 
  - selftests: gso: support CONFIG_MAX_SKB_FRAGS up to 45
 
 Signed-off-by: Paolo Abeni <pabeni@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 
 iQJGBAABCAAwFiEEg1AjqC77wbdLX2LbKSR5jcyPE6QFAmVV9akSHHBhYmVuaUBy
 ZWRoYXQuY29tAAoJECkkeY3MjxOkICMP/1+QHUaD4JG1mW9oYc2zINPfQl3dqQt3
 2CGSE2yrtbQvyQl39BDa0WFzV5X6So6/U50twhTNM+UAJsCaOvxCUDvUP9eY9Dcm
 z2H4oITZimyP4CEb3l7JpL2PImvfImL7D/fCPPMUZVzNY6dkEFznaQrnawbJz4gg
 mZXDnjwIXq7OchoJy3dHzyOn4ZQj2Df5VcfBzkVMdMcwV55Sd5JezbhwJ6NOmnKA
 uoXlq4pFYj3ahAhEQfLWUwXmF3e6esHs/WUCMe5FR9YkanJlu4oHUmY3RLzfcdQA
 PPIPDRxOzthcXyymqvqs7gnZ3ruMUll4B7tGTVFpJch8ts+DwGdUyBIIoDd/1BUT
 gmjipP5HPia3Qdtk3Jc4vMkcf5AwoGo0hXku7YYJ1K7+4+t8ep3/hDbQc0PLWX6J
 afiQgqpnNXHSTqBO5zl91vSwhGr/AAtAkDlPnsQL/RDAxY4teIwxHuoMvwPWaHZJ
 sMo5ZcHXvNnBbGhpozFtmrnbf1nduUrQmW5LkJViCLf25Sj6pDYbo8WnhMuOKSnZ
 7an2YqniCgBtrX4MEVn2jsWgavI+SxndVIQR04u0uwqmP+dn8s9LUfjKKDtPWHsK
 +zMFtk+Op03TW5ur9w3+dgrGH0cLogPO3BJkho7xXKBfZ6/tN/pOef3/nV9xY6g8
 JjnBUdpZRTWI
 =VjWw
 -----END PGP SIGNATURE-----

Merge tag 'net-6.7-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net

Pull networking fixes from Paolo Abeni:
 "Including fixes from BPF and netfilter.

  Current release - regressions:

   - core: fix undefined behavior in netdev name allocation

   - bpf: do not allocate percpu memory at init stage

   - netfilter: nf_tables: split async and sync catchall in two
     functions

   - mptcp: fix possible NULL pointer dereference on close

  Current release - new code bugs:

   - eth: ice: dpll: fix initial lock status of dpll

  Previous releases - regressions:

   - bpf: fix precision backtracking instruction iteration

   - af_unix: fix use-after-free in unix_stream_read_actor()

   - tipc: fix kernel-infoleak due to uninitialized TLV value

   - eth: bonding: stop the device in bond_setup_by_slave()

   - eth: mlx5:
      - fix double free of encap_header
      - avoid referencing skb after free-ing in drop path

   - eth: hns3: fix VF reset

   - eth: mvneta: fix calls to page_pool_get_stats

  Previous releases - always broken:

   - core: set SOCK_RCU_FREE before inserting socket into hashtable

   - bpf: fix control-flow graph checking in privileged mode

   - eth: ppp: limit MRU to 64K

   - eth: stmmac: avoid rx queue overrun

   - eth: icssg-prueth: fix error cleanup on failing initialization

   - eth: hns3: fix out-of-bounds access may occur when coalesce info is
     read via debugfs

   - eth: cortina: handle large frames

  Misc:

   - selftests: gso: support CONFIG_MAX_SKB_FRAGS up to 45"

* tag 'net-6.7-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (78 commits)
  macvlan: Don't propagate promisc change to lower dev in passthru
  net: sched: do not offload flows with a helper in act_ct
  net/mlx5e: Check return value of snprintf writing to fw_version buffer for representors
  net/mlx5e: Check return value of snprintf writing to fw_version buffer
  net/mlx5e: Reduce the size of icosq_str
  net/mlx5: Increase size of irq name buffer
  net/mlx5e: Update doorbell for port timestamping CQ before the software counter
  net/mlx5e: Track xmit submission to PTP WQ after populating metadata map
  net/mlx5e: Avoid referencing skb after free-ing in drop path of mlx5e_sq_xmit_wqe
  net/mlx5e: Don't modify the peer sent-to-vport rules for IPSec offload
  net/mlx5e: Fix pedit endianness
  net/mlx5e: fix double free of encap_header in update funcs
  net/mlx5e: fix double free of encap_header
  net/mlx5: Decouple PHC .adjtime and .adjphase implementations
  net/mlx5: DR, Allow old devices to use multi destination FTE
  net/mlx5: Free used cpus mask when an IRQ is released
  Revert "net/mlx5: DR, Supporting inline WQE when possible"
  bpf: Do not allocate percpu memory at init stage
  net: Fix undefined behavior in netdev name allocation
  dt-bindings: net: ethernet-controller: Fix formatting error
  ...
2023-11-16 07:51:26 -05:00
Linus Torvalds
6eb1acd976 xen: branch for v6.7-rc2
-----BEGIN PGP SIGNATURE-----
 
 iHUEABYIAB0WIQRTLbB6QfY48x44uB6AXGG7T9hjvgUCZVX+TwAKCRCAXGG7T9hj
 vmy/AP9E0XUIT05gFVcoyHXsh5VfCd7pu4bSDtf5TRZIpzgV2wD/czHkik77rJ55
 kPtFuZ4gEngFVSM8z3C+bKszEWu7rgM=
 =Zy37
 -----END PGP SIGNATURE-----

Merge tag 'for-linus-6.7a-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip

Pull xen updates from Juergen Gross:

 - A fix in the Xen events driver avoiding the use of RCU after
   the call to rcu_report_dead() when taking a cpu down

 - A fix for running as Xen dom0 to line up ACPI's idea of power
   management capabilities with the one of Xen

 - A cleanup eliminating several kernel-doc warnings in Xen related
   code

 - A cleanup series of the Xen events driver

* tag 'for-linus-6.7a-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
  xen/events: remove some info_for_irq() calls in pirq handling
  xen/events: modify internal [un]bind interfaces
  xen/events: drop xen_allocate_irqs_dynamic()
  xen/events: remove some simple helpers from events_base.c
  xen/events: reduce externally visible helper functions
  xen/events: remove unused functions
  xen/events: fix delayed eoi list handling
  xen/shbuf: eliminate 17 kernel-doc warnings
  acpi/processor: sanitize _OSC/_PDC capabilities for Xen dom0
  xen/events: avoid using info_for_irq() in xen_send_IPI_one()
2023-11-16 07:44:34 -05:00
Linus Torvalds
372bed5fbb vhost,virtio,vdpa,firmware: bugfixes
bugfixes all over the place
 
 Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 
 iQFDBAABCAAtFiEEXQn9CHHI+FuUyooNKB8NuNKNVGkFAmVT1w0PHG1zdEByZWRo
 YXQuY29tAAoJECgfDbjSjVRpYXMH/AxTzofnpIJOvmY0QYLbBg3+wu0ml4X1pvJc
 Ia2pm5NZLTPucesw+AFM1q8YIiRX5mBRTmxtGLFzqiC+miAKPvMJK2hQe1jlNqgM
 7zBb0jeHvURvo4rvDi8a2Xvf6T2ypGapBhmcNy7VtEugLrXGHGMMzjE3IS1/Zsbm
 AK70P0ujTpLZtp4TFy2qqZoaikijbAdHhu2GTo1KfFh3RcwSc1uhbrtoTHUQ42bw
 0nXz8eLyuu9VmUryVgLy+j9pnS9KDMNhVCpjpVIHD4iI9L6sQVj8fwc/Bnxq8mh7
 yDcagvcIK54M786cAmLWC+zQu7feMQ8FcC2zduJT+Yl2X4fX04E=
 =xFJf
 -----END PGP SIGNATURE-----

Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull virtio fixes from Michael Tsirkin:
 "Bugfixes all over the place"

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost:
  vhost-vdpa: fix use after free in vhost_vdpa_probe()
  virtio_pci: Switch away from deprecated irq_set_affinity_hint
  riscv, qemu_fw_cfg: Add support for RISC-V architecture
  vdpa_sim_blk: allocate the buffer zeroed
  virtio_pci: move structure to a header
2023-11-16 07:39:37 -05:00
Paolo Abeni
cff088d924 netfilter pull request 23-11-15
-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEN9lkrMBJgcdVAPub1V2XiooUIOQFAmVVD/4ACgkQ1V2XiooU
 IOQegA/9FUHHllNsrk0hUWKeH+1P54jExmKlIhSfwUJ3RptehF0ARb3uQb0rVPuM
 DwpaMVZ1lhL3TzcFy1HeCNFU6mKxqKhlvzDYvbRcfbNE96eL7l5RfJwWh0UViwWI
 wUtZhznHE6ygdZJ8TZUA+b5vuZkihO2ZNc7rpuBimCQGEXqZfoOpNFVSoNBsWvL5
 3s/qq7ai6rl34ipVlPpFO7Gk94EHHMwa4BhGfTNVRSrG2B9EcPAMAOKVDBCPrGST
 CdVkkn79ajNUhiKSkD3/JC4fMDavT6ZEIzUMFfwIdF1b7Q+5F64lbZaD5gD2bO30
 QAk7ttgq951XnQpLfXC4H4vHdf65GeeYLPSuLqyo/rnkr/+h+hCgnorBwZrzrG3D
 mbqQtUZOsJjrH6vcExuHwlM1LEOSGbGuI/zjSTEXEJG8M3qngxXCeMQun2fB8IIN
 yTB40SPUhvKlR6JdfJRou/WJj9eKZmyuaWP2MaQT7LFftttt24Ndluoo5KCx92IZ
 9LmCm8h93oWKRFAxk3NQQCZbbW0RW8Af+gbmnRI2TjtyDpNrnzPhR3fjopD9wqJH
 xMuW5xeTjqdjFR7jvAaYPnnFOOk0vA0JlZlBNmmNIqp04+BmD4RTVqmqwzNCjHvm
 KqqWGjKOEYOWN2OJawaqu6V5cU0/untb2OG2b5WAhDycF/CnxA0=
 =lwne
 -----END PGP SIGNATURE-----

Merge tag 'nf-23-11-15' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf

Pablo Neira Ayuso says:

====================
Netfilter fixes for net

The following patchset contains Netfilter fixes for net:

1) Remove unused variable causing compilation warning in nft_set_rbtree,
   from Yang Li. This unused variable is a left over from previous
   merge window.

2) Possible return of uninitialized in nf_conntrack_bridge, from
   Linkui Xiao. This is there since nf_conntrack_bridge is available.

3) Fix incorrect pointer math in nft_byteorder, from Dan Carpenter.
   Problem has been there since 2016.

4) Fix bogus error in destroy set element command. Problem is there
   since this new destroy command was added.

5) Fix race condition in ipset between swap and destroy commands and
   add/del/test control plane. This problem is there since ipset was
   merged.

6) Split async and sync catchall GC in two function to fix unsafe
   iteration over RCU. This is a fix-for-fix that was included in
   the previous pull request.

* tag 'nf-23-11-15' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf:
  netfilter: nf_tables: split async and sync catchall in two functions
  netfilter: ipset: fix race condition between swap/destroy and kernel side add/del/test
  netfilter: nf_tables: bogus ENOENT when destroying element which does not exist
  netfilter: nf_tables: fix pointer math issue in nft_byteorder_eval()
  netfilter: nf_conntrack_bridge: initialize err to 0
  netfilter: nft_set_rbtree: Remove unused variable nft_net
====================

Link: https://lore.kernel.org/r/20231115184514.8965-1-pablo@netfilter.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2023-11-16 11:02:52 +01:00
Vlad Buslov
7e1caeace0 macvlan: Don't propagate promisc change to lower dev in passthru
Macvlan device in passthru mode sets its lower device promiscuous mode
according to its MACVLAN_FLAG_NOPROMISC flag instead of synchronizing it to
its own promiscuity setting. However, macvlan_change_rx_flags() function
doesn't check the mode before propagating such changes to the lower device
which can cause net_device->promiscuity counter overflow as illustrated by
reproduction example [0] and resulting dmesg log [1]. Fix the issue by
first verifying the mode in macvlan_change_rx_flags() function before
propagating promiscuous mode change to the lower device.

[0]:
ip link add macvlan1 link enp8s0f0 type macvlan mode passthru
ip link set macvlan1 promisc on
ip l set dev macvlan1 up
ip link set macvlan1 promisc off
ip l set dev macvlan1 down
ip l set dev macvlan1 up

[1]:
[ 5156.281724] macvlan1: entered promiscuous mode
[ 5156.285467] mlx5_core 0000:08:00.0 enp8s0f0: entered promiscuous mode
[ 5156.287639] macvlan1: left promiscuous mode
[ 5156.288339] mlx5_core 0000:08:00.0 enp8s0f0: left promiscuous mode
[ 5156.290907] mlx5_core 0000:08:00.0 enp8s0f0: entered promiscuous mode
[ 5156.317197] mlx5_core 0000:08:00.0 enp8s0f0: promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.

Fixes: efdbd2b30caa ("macvlan: Propagate promiscuity setting to lower devices.")
Reviewed-by: Gal Pressman <gal@nvidia.com>
Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Link: https://lore.kernel.org/r/20231114175915.1649154-1-vladbu@nvidia.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2023-11-16 10:52:59 +01:00
Xin Long
7cd5af0e93 net: sched: do not offload flows with a helper in act_ct
There is no hardware supporting ct helper offload. However, prior to this
patch, a flower filter with a helper in the ct action can be successfully
set into the HW, for example (eth1 is a bnxt NIC):

  # tc qdisc add dev eth1 ingress_block 22 ingress
  # tc filter add block 22 proto ip flower skip_sw ip_proto tcp \
    dst_port 21 ct_state -trk action ct helper ipv4-tcp-ftp
  # tc filter show dev eth1 ingress

    filter block 22 protocol ip pref 49152 flower chain 0 handle 0x1
      eth_type ipv4
      ip_proto tcp
      dst_port 21
      ct_state -trk
      skip_sw
      in_hw in_hw_count 1   <----
        action order 1: ct zone 0 helper ipv4-tcp-ftp pipe
         index 2 ref 1 bind 1
        used_hw_stats delayed

This might cause the flower filter not to work as expected in the HW.

This patch avoids this problem by simply returning -EOPNOTSUPP in
tcf_ct_offload_act_setup() to not allow to offload flows with a helper
in act_ct.

Fixes: a21b06e73191 ("net: sched: add helper support in act_ct")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/f8685ec7702c4a448a1371a8b34b43217b583b9d.1699898008.git.lucien.xin@gmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2023-11-16 10:10:51 +01:00
Jakub Kicinski
bdc454fcdc Merge branch 'mlx5-fixes-2023-11-13-manual'
Saeed Mahameed says:

====================
This series provides bug fixes to mlx5 driver.
====================

Link: https://lore.kernel.org/r/20231114215846.5902-1-saeed@kernel.org/
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-11-15 22:34:32 -08:00
Rahul Rameshbabu
1b2bd0c026 net/mlx5e: Check return value of snprintf writing to fw_version buffer for representors
Treat the operation as an error case when the return value is equivalent to
the size of the name buffer. Failed to write null terminator to the name
buffer, making the string malformed and should not be used. Provide a
string with only the firmware version when forming the string with the
board id fails. This logic for representors is identical to normal flow
with ethtool.

Without check, will trigger -Wformat-truncation with W=1.

    drivers/net/ethernet/mellanox/mlx5/core/en_rep.c: In function 'mlx5e_rep_get_drvinfo':
    drivers/net/ethernet/mellanox/mlx5/core/en_rep.c:78:31: warning: '%.16s' directive output may be truncated writing up to 16 bytes into a region of size between 13 and 22 [-Wformat-truncation=]
      78 |                  "%d.%d.%04d (%.16s)",
         |                               ^~~~~
    drivers/net/ethernet/mellanox/mlx5/core/en_rep.c:77:9: note: 'snprintf' output between 12 and 37 bytes into a destination of size 32
      77 |         snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
         |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      78 |                  "%d.%d.%04d (%.16s)",
         |                  ~~~~~~~~~~~~~~~~~~~~~
      79 |                  fw_rev_maj(mdev), fw_rev_min(mdev),
         |                  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      80 |                  fw_rev_sub(mdev), mdev->board_id);
         |                  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Fixes: cf83c8fdcd47 ("net/mlx5e: Add missing ethtool driver info for representors")
Link: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=6d4ab2e97dcfbcd748ae71761a9d8e5e41cc732c
Signed-off-by: Rahul Rameshbabu <rrameshbabu@nvidia.com>
Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Link: https://lore.kernel.org/r/20231114215846.5902-16-saeed@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-11-15 22:34:31 -08:00
Rahul Rameshbabu
41e63c2baa net/mlx5e: Check return value of snprintf writing to fw_version buffer
Treat the operation as an error case when the return value is equivalent to
the size of the name buffer. Failed to write null terminator to the name
buffer, making the string malformed and should not be used. Provide a
string with only the firmware version when forming the string with the
board id fails.

Without check, will trigger -Wformat-truncation with W=1.

    drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c: In function 'mlx5e_ethtool_get_drvinfo':
    drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c:49:31: warning: '%.16s' directive output may be truncated writing up to 16 bytes into a region of size between 13 and 22 [-Wformat-truncation=]
      49 |                  "%d.%d.%04d (%.16s)",
         |                               ^~~~~
    drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c:48:9: note: 'snprintf' output between 12 and 37 bytes into a destination of size 32
      48 |         snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
         |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      49 |                  "%d.%d.%04d (%.16s)",
         |                  ~~~~~~~~~~~~~~~~~~~~~
      50 |                  fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev),
         |                  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      51 |                  mdev->board_id);
         |                  ~~~~~~~~~~~~~~~

Fixes: 84e11edb71de ("net/mlx5e: Show board id in ethtool driver information")
Link: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=6d4ab2e97dcfbcd748ae71761a9d8e5e41cc732c
Signed-off-by: Rahul Rameshbabu <rrameshbabu@nvidia.com>
Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-11-15 22:34:31 -08:00
Saeed Mahameed
dce9414284 net/mlx5e: Reduce the size of icosq_str
icosq_str size is unnecessarily too long, and it causes a build warning
-Wformat-truncation with W=1. Looking closely, It doesn't need to be 255B,
hence this patch reduces the size to 32B which should be more than enough
to host the string: "ICOSQ: 0x%x, ".

While here, add a missing space in the formatted string.

This fixes the following build warning:

$ KCFLAGS='-Wall -Werror'
$ make O=/tmp/kbuild/linux W=1 -s -j12 drivers/net/ethernet/mellanox/mlx5/core/

drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c: In function 'mlx5e_reporter_rx_timeout':
drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c:718:56:
error: ', CQ: 0x' directive output may be truncated writing 8 bytes into a region of size between 0 and 255 [-Werror=format-truncation=]
  718 |                  "RX timeout on channel: %d, %sRQ: 0x%x, CQ: 0x%x",
      |                                                        ^~~~~~~~
drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c:717:9: note: 'snprintf' output between 43 and 322 bytes into a destination of size 288
  717 |         snprintf(err_str, sizeof(err_str),
      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  718 |                  "RX timeout on channel: %d, %sRQ: 0x%x, CQ: 0x%x",
      |                  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  719 |                  rq->ix, icosq_str, rq->rqn, rq->cq.mcq.cqn);
      |                  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Fixes: 521f31af004a ("net/mlx5e: Allow RQ outside of channel context")
Link: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=6d4ab2e97dcfbcd748ae71761a9d8e5e41cc732c
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Link: https://lore.kernel.org/r/20231114215846.5902-14-saeed@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-11-15 22:34:31 -08:00
Rahul Rameshbabu
3338bebfc2 net/mlx5: Increase size of irq name buffer
Without increased buffer size, will trigger -Wformat-truncation with W=1
for the snprintf operation writing to the buffer.

    drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c: In function 'mlx5_irq_alloc':
    drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c:296:7: error: '@pci:' directive output may be truncated writing 5 bytes into a region of size between 1 and 32 [-Werror=format-truncation=]
      296 |    "%s@pci:%s", name, pci_name(dev->pdev));
          |       ^~~~~
    drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c:295:2: note: 'snprintf' output 6 or more bytes (assuming 37) into a destination of size 32
      295 |  snprintf(irq->name, MLX5_MAX_IRQ_NAME,
          |  ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      296 |    "%s@pci:%s", name, pci_name(dev->pdev));
          |    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Fixes: ada9f5d00797 ("IB/mlx5: Fix eq names to display nicely in /proc/interrupts")
Link: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=6d4ab2e97dcfbcd748ae71761a9d8e5e41cc732c
Signed-off-by: Rahul Rameshbabu <rrameshbabu@nvidia.com>
Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Link: https://lore.kernel.org/r/20231114215846.5902-13-saeed@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-11-15 22:34:31 -08:00
Rahul Rameshbabu
92214be597 net/mlx5e: Update doorbell for port timestamping CQ before the software counter
Previously, mlx5e_ptp_poll_ts_cq would update the device doorbell with the
incremented consumer index after the relevant software counters in the
kernel were updated. In the mlx5e_sq_xmit_wqe context, this would lead to
either overrunning the device CQ or exceeding the expected software buffer
size in the device CQ if the device CQ size was greater than the software
buffer size. Update the relevant software counter only after updating the
device CQ consumer index in the port timestamping napi_poll context.

Log:
    mlx5_core 0000:08:00.0: cq_err_event_notifier:517:(pid 0): CQ error on CQN 0x487, syndrome 0x1
    mlx5_core 0000:08:00.0 eth2: mlx5e_cq_error_event: cqn=0x000487 event=0x04

Fixes: 1880bc4e4a96 ("net/mlx5e: Add TX port timestamp support")
Signed-off-by: Rahul Rameshbabu <rrameshbabu@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Link: https://lore.kernel.org/r/20231114215846.5902-12-saeed@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-11-15 22:34:31 -08:00
Rahul Rameshbabu
7e3f3ba97e net/mlx5e: Track xmit submission to PTP WQ after populating metadata map
Ensure the skb is available in metadata mapping to skbs before tracking the
metadata index for detecting undelivered CQEs. If the metadata index is put
in the tracking list before putting the skb in the map, the metadata index
might be used for detecting undelivered CQEs before the relevant skb is
available in the map, which can lead to a null-ptr-deref.

Log:
    general protection fault, probably for non-canonical address 0xdffffc0000000005: 0000 [#1] SMP KASAN
    KASAN: null-ptr-deref in range [0x0000000000000028-0x000000000000002f]
    CPU: 0 PID: 1243 Comm: kworker/0:2 Not tainted 6.6.0-rc4+ #108
    Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
    Workqueue: events mlx5e_rx_dim_work [mlx5_core]
    RIP: 0010:mlx5e_ptp_napi_poll+0x9a4/0x2290 [mlx5_core]
    Code: 8c 24 38 cc ff ff 4c 8d 3c c1 4c 89 f9 48 c1 e9 03 42 80 3c 31 00 0f 85 97 0f 00 00 4d 8b 3f 49 8d 7f 28 48 89 f9 48 c1 e9 03 <42> 80 3c 31 00 0f 85 8b 0f 00 00 49 8b 47 28 48 85 c0 0f 84 05 07
    RSP: 0018:ffff8884d3c09c88 EFLAGS: 00010206
    RAX: 0000000000000069 RBX: ffff8881160349d8 RCX: 0000000000000005
    RDX: ffffed10218f48cf RSI: 0000000000000004 RDI: 0000000000000028
    RBP: ffff888122707700 R08: 0000000000000001 R09: ffffed109a781383
    R10: 0000000000000003 R11: 0000000000000003 R12: ffff88810c7a7a40
    R13: ffff888122707700 R14: dffffc0000000000 R15: 0000000000000000
    FS:  0000000000000000(0000) GS:ffff8884d3c00000(0000) knlGS:0000000000000000
    CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
    CR2: 00007f4f878dd6e0 CR3: 000000014d108002 CR4: 0000000000370eb0
    DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
    DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
    Call Trace:
    <IRQ>
    ? die_addr+0x3c/0xa0
    ? exc_general_protection+0x144/0x210
    ? asm_exc_general_protection+0x22/0x30
    ? mlx5e_ptp_napi_poll+0x9a4/0x2290 [mlx5_core]
    ? mlx5e_ptp_napi_poll+0x8f6/0x2290 [mlx5_core]
    __napi_poll.constprop.0+0xa4/0x580
    net_rx_action+0x460/0xb80
    ? _raw_spin_unlock_irqrestore+0x32/0x60
    ? __napi_poll.constprop.0+0x580/0x580
    ? tasklet_action_common.isra.0+0x2ef/0x760
    __do_softirq+0x26c/0x827
    irq_exit_rcu+0xc2/0x100
    common_interrupt+0x7f/0xa0
    </IRQ>
    <TASK>
    asm_common_interrupt+0x22/0x40
    RIP: 0010:__kmem_cache_alloc_node+0xb/0x330
    Code: 41 5d 41 5e 41 5f c3 8b 44 24 14 8b 4c 24 10 09 c8 eb d5 e8 b7 43 ca 01 0f 1f 80 00 00 00 00 0f 1f 44 00 00 55 48 89 e5 41 57 <41> 56 41 89 d6 41 55 41 89 f5 41 54 49 89 fc 53 48 83 e4 f0 48 83
    RSP: 0018:ffff88812c4079c0 EFLAGS: 00000246
    RAX: 1ffffffff083c7fe RBX: ffff888100042dc0 RCX: 0000000000000218
    RDX: 00000000ffffffff RSI: 0000000000000dc0 RDI: ffff888100042dc0
    RBP: ffff88812c4079c8 R08: ffffffffa0289f96 R09: ffffed1025880ea9
    R10: ffff888138839f80 R11: 0000000000000002 R12: 0000000000000dc0
    R13: 0000000000000100 R14: 000000000000008c R15: ffff8881271fc450
    ? cmd_exec+0x796/0x2200 [mlx5_core]
    kmalloc_trace+0x26/0xc0
    cmd_exec+0x796/0x2200 [mlx5_core]
    mlx5_cmd_do+0x22/0xc0 [mlx5_core]
    mlx5_cmd_exec+0x17/0x30 [mlx5_core]
    mlx5_core_modify_cq_moderation+0x139/0x1b0 [mlx5_core]
    ? mlx5_add_cq_to_tasklet+0x280/0x280 [mlx5_core]
    ? lockdep_set_lock_cmp_fn+0x190/0x190
    ? process_one_work+0x659/0x1220
    mlx5e_rx_dim_work+0x9d/0x100 [mlx5_core]
    process_one_work+0x730/0x1220
    ? lockdep_hardirqs_on_prepare+0x400/0x400
    ? max_active_store+0xf0/0xf0
    ? assign_work+0x168/0x240
    worker_thread+0x70f/0x12d0
    ? __kthread_parkme+0xd1/0x1d0
    ? process_one_work+0x1220/0x1220
    kthread+0x2d9/0x3b0
    ? kthread_complete_and_exit+0x20/0x20
    ret_from_fork+0x2d/0x70
    ? kthread_complete_and_exit+0x20/0x20
    ret_from_fork_asm+0x11/0x20
    </TASK>
    Modules linked in: xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat br_netfilter rpcsec_gss_krb5 auth_rpcgss oid_registry overlay mlx5_ib ib_uverbs ib_core zram zsmalloc mlx5_core fuse
    ---[ end trace 0000000000000000 ]---

Fixes: 3178308ad4ca ("net/mlx5e: Make tx_port_ts logic resilient to out-of-order CQEs")
Signed-off-by: Rahul Rameshbabu <rrameshbabu@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Link: https://lore.kernel.org/r/20231114215846.5902-11-saeed@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-11-15 22:34:31 -08:00
Rahul Rameshbabu
64f14d16ee net/mlx5e: Avoid referencing skb after free-ing in drop path of mlx5e_sq_xmit_wqe
When SQ is a port timestamping SQ for PTP, do not access tx flags of skb
after free-ing the skb. Free the skb only after all references that depend
on it have been handled in the dropped WQE path.

Fixes: 3178308ad4ca ("net/mlx5e: Make tx_port_ts logic resilient to out-of-order CQEs")
Signed-off-by: Rahul Rameshbabu <rrameshbabu@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Link: https://lore.kernel.org/r/20231114215846.5902-10-saeed@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-11-15 22:34:31 -08:00
Jianbo Liu
bdf788cf22 net/mlx5e: Don't modify the peer sent-to-vport rules for IPSec offload
As IPSec packet offload in switchdev mode is not supported with LAG,
it's unnecessary to modify those sent-to-vport rules to the peer eswitch.

Fixes: c6c2bf5db4ea ("net/mlx5e: Support IPsec packet offload for TX in switchdev mode")
Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Link: https://lore.kernel.org/r/20231114215846.5902-9-saeed@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-11-15 22:34:31 -08:00
Vlad Buslov
0c101a23ca net/mlx5e: Fix pedit endianness
Referenced commit addressed endianness issue in mlx5 pedit implementation
in ad hoc manner instead of systematically treating integer values
according to their types which left pedit fields of sizes not equal to 4
and where the bytes being modified are not least significant ones broken on
big endian machines since wrong bits will be consumed during parsing which
leads to following example error when applying pedit to source and
destination MAC addresses:

[Wed Oct 18 12:52:42 2023] mlx5_core 0001:00:00.1 p1v3_r: attempt to offload an unsupported field (cmd 0)
[Wed Oct 18 12:52:42 2023] mask: 00000000330c5b68: 00 00 00 00 ff ff 00 00 00 00 ff ff 00 00 00 00  ................
[Wed Oct 18 12:52:42 2023] mask: 0000000017d22fd9: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
[Wed Oct 18 12:52:42 2023] mask: 000000008186d717: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
[Wed Oct 18 12:52:42 2023] mask: 0000000029eb6149: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
[Wed Oct 18 12:52:42 2023] mask: 000000007ed103e4: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
[Wed Oct 18 12:52:42 2023] mask: 00000000db8101a6: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
[Wed Oct 18 12:52:42 2023] mask: 00000000ec3c08a9: 00 00 00 00 00 00 00 00 00 00 00 00              ............

Treat masks and values of pedit and filter match as network byte order,
refactor pointers to them to void pointers instead of confusing u32
pointers and only cast to pointer-to-integer when reading a value from
them. Treat pedit mlx5_fields->field_mask as host byte order according to
its type u32, change the constants in fields array accordingly.

Fixes: 82198d8bcdef ("net/mlx5e: Fix endianness when calculating pedit mask first bit")
Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Reviewed-by: Gal Pressman <gal@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Link: https://lore.kernel.org/r/20231114215846.5902-8-saeed@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-11-15 22:34:31 -08:00
Gavin Li
3a4aa3cb83 net/mlx5e: fix double free of encap_header in update funcs
Follow up to the previous patch to fix the same issue for
mlx5e_tc_tun_update_header_ipv4{6} when mlx5_packet_reformat_alloc()
fails.

When mlx5_packet_reformat_alloc() fails, the encap_header allocated in
mlx5e_tc_tun_update_header_ipv4{6} will be released within it. However,
e->encap_header is already set to the previously freed encap_header
before mlx5_packet_reformat_alloc(). As a result, the later
mlx5e_encap_put() will free e->encap_header again, causing a double free
issue.

mlx5e_encap_put()
     --> mlx5e_encap_dealloc()
         --> kfree(e->encap_header)

This patch fix it by not setting e->encap_header until
mlx5_packet_reformat_alloc() success.

Fixes: a54e20b4fcae ("net/mlx5e: Add basic TC tunnel set action for SRIOV offloads")
Signed-off-by: Gavin Li <gavinl@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Link: https://lore.kernel.org/r/20231114215846.5902-7-saeed@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-11-15 22:34:31 -08:00
Dust Li
6f9b1a0731 net/mlx5e: fix double free of encap_header
When mlx5_packet_reformat_alloc() fails, the encap_header allocated in
mlx5e_tc_tun_create_header_ipv4{6} will be released within it. However,
e->encap_header is already set to the previously freed encap_header
before mlx5_packet_reformat_alloc(). As a result, the later
mlx5e_encap_put() will free e->encap_header again, causing a double free
issue.

mlx5e_encap_put()
    --> mlx5e_encap_dealloc()
        --> kfree(e->encap_header)

This happens when cmd: MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT fail.

This patch fix it by not setting e->encap_header until
mlx5_packet_reformat_alloc() success.

Fixes: d589e785baf5e ("net/mlx5e: Allow concurrent creation of encap entries")
Reported-by: Cruz Zhao <cruzzhao@linux.alibaba.com>
Reported-by: Tianchen Ding <dtcccc@linux.alibaba.com>
Signed-off-by: Dust Li <dust.li@linux.alibaba.com>
Reviewed-by: Wojciech Drewek <wojciech.drewek@intel.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-11-15 22:34:25 -08:00
Rahul Rameshbabu
fd64fd13c4 net/mlx5: Decouple PHC .adjtime and .adjphase implementations
When running a phase adjustment operation, the free running clock should
not be modified at all. The phase control keyword is intended to trigger an
internal servo on the device that will converge to the provided delta. A
free running counter cannot implement phase adjustment.

Fixes: 8e11a68e2e8a ("net/mlx5: Add adjphase function to support hardware-only offset control")
Signed-off-by: Rahul Rameshbabu <rrameshbabu@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Link: https://lore.kernel.org/r/20231114215846.5902-5-saeed@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-11-15 22:34:09 -08:00
Erez Shitrit
ad4d82c3ea net/mlx5: DR, Allow old devices to use multi destination FTE
The current check isn't aware of old devices that don't have the
relevant FW capability. This patch allows multi destination FTE
in old cards, as it was before this check.

Fixes: f6f46e7173cb ("net/mlx5: DR, Add check for multi destination FTE")
Signed-off-by: Erez Shitrit <erezsh@nvidia.com>
Reviewed-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Link: https://lore.kernel.org/r/20231114215846.5902-4-saeed@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-11-15 22:34:09 -08:00
Maher Sanalla
7d2f74d1d4 net/mlx5: Free used cpus mask when an IRQ is released
Each EQ table maintains a cpumask of the already used CPUs that are mapped
to IRQs to ensure that each IRQ gets mapped to a unique CPU.

However, on IRQ release, the said cpumask is not updated by clearing the
CPU from the mask to allow future IRQ request, causing the following
error when a SF is reloaded after it has utilized all CPUs for its IRQs:

mlx5_irq_affinity_request:135:(pid 306010): Didn't find a matching IRQ.
err = -28

Thus, when releasing an IRQ, clear its mapped CPU from the used CPUs
mask, to prevent the case described above.

While at it, move the used cpumask update to the EQ layer as it is more
fitting and preserves symmetricity of the IRQ request/release API.

Fixes: a1772de78d73 ("net/mlx5: Refactor completion IRQ request/release API")
Signed-off-by: Maher Sanalla <msanalla@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Link: https://lore.kernel.org/r/20231114215846.5902-3-saeed@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-11-15 22:34:09 -08:00
Itamar Gozlan
df3aafe501 Revert "net/mlx5: DR, Supporting inline WQE when possible"
This reverts commit 95c337cce0e11d06a715da73e6796ade9216637f.
The revert is required due to the suspicion it cause some tests
fail and will be moved to further investigation.

Fixes: 95c337cce0e1 ("net/mlx5: DR, Supporting inline WQE when possible")
Signed-off-by: Itamar Gozlan <igozlan@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Link: https://lore.kernel.org/r/20231114215846.5902-2-saeed@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-11-15 22:34:08 -08:00
Jakub Kicinski
a6a6a0a9fd Merge https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Alexei Starovoitov says:

====================
pull-request: bpf 2023-11-15

We've added 7 non-merge commits during the last 6 day(s) which contain
a total of 9 files changed, 200 insertions(+), 49 deletions(-).

The main changes are:

1) Do not allocate bpf specific percpu memory unconditionally, from Yonghong.

2) Fix precision backtracking instruction iteration, from Andrii.

3) Fix control flow graph checking, from Andrii.

4) Fix xskxceiver selftest build, from Anders.

* https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf:
  bpf: Do not allocate percpu memory at init stage
  selftests/bpf: add more test cases for check_cfg()
  bpf: fix control-flow graph checking in privileged mode
  selftests/bpf: add edge case backtracking logic test
  bpf: fix precision backtracking instruction iteration
  bpf: handle ldimm64 properly in check_cfg()
  selftests: bpf: xskxceiver: ksft_print_msg: fix format type error
====================

Link: https://lore.kernel.org/r/20231115214949.48854-1-alexei.starovoitov@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-11-15 22:28:02 -08:00
Yonghong Song
1fda5bb66a bpf: Do not allocate percpu memory at init stage
Kirill Shutemov reported significant percpu memory consumption increase after
booting in 288-cpu VM ([1]) due to commit 41a5db8d8161 ("bpf: Add support for
non-fix-size percpu mem allocation"). The percpu memory consumption is
increased from 111MB to 969MB. The number is from /proc/meminfo.

I tried to reproduce the issue with my local VM which at most supports upto
255 cpus. With 252 cpus, without the above commit, the percpu memory
consumption immediately after boot is 57MB while with the above commit the
percpu memory consumption is 231MB.

This is not good since so far percpu memory from bpf memory allocator is not
widely used yet. Let us change pre-allocation in init stage to on-demand
allocation when verifier detects there is a need of percpu memory for bpf
program. With this change, percpu memory consumption after boot can be reduced
signicantly.

  [1] https://lore.kernel.org/lkml/20231109154934.4saimljtqx625l3v@box.shutemov.name/

Fixes: 41a5db8d8161 ("bpf: Add support for non-fix-size percpu mem allocation")
Reported-and-tested-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
Acked-by: Hou Tao <houtao1@huawei.com>
Link: https://lore.kernel.org/r/20231111013928.948838-1-yonghong.song@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-11-15 07:51:06 -08:00
Gal Pressman
674e318089 net: Fix undefined behavior in netdev name allocation
Cited commit removed the strscpy() call and kept the snprintf() only.

It is common to use 'dev->name' as the format string before a netdev is
registered, this results in 'res' and 'name' pointers being equal.
According to POSIX, if copying takes place between objects that overlap
as a result of a call to sprintf() or snprintf(), the results are
undefined.

Add back the strscpy() and use 'buf' as an intermediate buffer.

Fixes: 7ad17b04dc7b ("net: trust the bitmap in __dev_alloc_name()")
Cc: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Gal Pressman <gal@nvidia.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Simon Horman <horms@kernel.org>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2023-11-15 11:05:08 +00:00
Niklas Söderlund
efc0c8363b dt-bindings: net: ethernet-controller: Fix formatting error
When moving the *-internal-delay-ps properties to only apply for RGMII
interface modes there where a typo in the text formatting.

Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
2023-11-15 10:00:55 +00:00
Johnathan Mantey
9e2e7efbbb Revert ncsi: Propagate carrier gain/loss events to the NCSI controller
This reverts commit 3780bb29311eccb7a1c9641032a112eed237f7e3.

The cited commit introduced unwanted behavior.

The intent for the commit was to be able to detect carrier loss/gain
for just the NIC connected to the BMC. The unwanted effect is a
carrier loss for auxiliary paths also causes the BMC to lose
carrier. The BMC never regains carrier despite the secondary NIC
regaining a link.

This change, when merged, needs to be backported to stable kernels.
5.4-stable, 5.10-stable, 5.15-stable, 6.1-stable, 6.5-stable

Fixes: 3780bb29311e ("ncsi: Propagate carrier gain/loss events to the NCSI controller")
CC: stable@vger.kernel.org
Signed-off-by: Johnathan Mantey <johnathanx.mantey@intel.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2023-11-15 09:59:44 +00:00
Juergen Gross
cee96422e8 xen/events: remove some info_for_irq() calls in pirq handling
Instead of the IRQ number user the struct irq_info pointer as parameter
in the internal pirq related functions. This allows to drop some calls
of info_for_irq().

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
2023-11-15 09:51:51 +01:00
Juergen Gross
3fcdaf3d76 xen/events: modify internal [un]bind interfaces
Modify the internal bind- and unbind-interfaces to take a struct
irq_info parameter. When allocating a new IRQ pass the pointer from
the allocating function further up.

This will reduce the number of info_for_irq() calls and make the code
more efficient.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
2023-11-15 09:29:39 +01:00
Juergen Gross
5dd9ad32d7 xen/events: drop xen_allocate_irqs_dynamic()
Instead of having a common function for allocating a single IRQ or a
consecutive number of IRQs, split up the functionality into the callers
of xen_allocate_irqs_dynamic().

This allows to handle any allocation error in xen_irq_init() gracefully
instead of panicing the system. Let xen_irq_init() return the irq_info
pointer or NULL in case of an allocation error.

Additionally set the IRQ into irq_info already at allocation time, as
otherwise the IRQ would be '0' (which is a valid IRQ number) until
being set.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
2023-11-15 09:02:59 +01:00
Jakub Kicinski
a133eae83a Merge branch 'mptcp-misc-fixes-for-v6-7'
Matthieu Baerts says:

====================
mptcp: misc. fixes for v6.7

Here are a few fixes related to MPTCP:

- Patch 1 limits GSO max size to ~64K when MPTCP is being used due to a
  spec limit. 'gso_max_size' can exceed the max value supported by MPTCP
  since v5.19.

- Patch 2 fixes a possible NULL pointer dereference on close that can
  happen since v6.7-rc1.

- Patch 3 avoids sending a RM_ADDR when the corresponding address is no
  longer tracked locally. A regression for a fix backported to v5.19.

- Patch 4 adds a missing lock when changing the IP TOS with setsockopt().
  A fix for v5.17.

- Patch 5 fixes an expectation when running MPTCP Join selftest with the
  checksum option (-C). An issue present since v6.1.
====================

Link: https://lore.kernel.org/r/20231114-upstream-net-20231113-mptcp-misc-fixes-6-7-rc2-v1-0-7b9cd6a7b7f4@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-11-14 20:10:45 -08:00
Paolo Abeni
7cefbe5e1d selftests: mptcp: fix fastclose with csum failure
Running the mp_join selftest manually with the following command line:

  ./mptcp_join.sh -z -C

leads to some failures:

  002 fastclose server test
  # ...
  rtx                                 [fail] got 1 MP_RST[s] TX expected 0
  # ...
  rstrx                               [fail] got 1 MP_RST[s] RX expected 0

The problem is really in the wrong expectations for the RST checks
implied by the csum validation. Note that the same check is repeated
explicitly in the same test-case, with the correct expectation and
pass successfully.

Address the issue explicitly setting the correct expectation for
the failing checks.

Reported-by: Xiumei Mu <xmu@redhat.com>
Fixes: 6bf41020b72b ("selftests: mptcp: update and extend fastclose test-cases")
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Matthieu Baerts <matttbe@kernel.org>
Signed-off-by: Matthieu Baerts <matttbe@kernel.org>
Link: https://lore.kernel.org/r/20231114-upstream-net-20231113-mptcp-misc-fixes-6-7-rc2-v1-5-7b9cd6a7b7f4@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-11-14 20:10:21 -08:00
Paolo Abeni
7679d34f97 mptcp: fix setsockopt(IP_TOS) subflow locking
The MPTCP implementation of the IP_TOS socket option uses the lockless
variant of the TOS manipulation helper and does not hold such lock at
the helper invocation time.

Add the required locking.

Fixes: ffcacff87cd6 ("mptcp: Support for IP_TOS for MPTCP setsockopt()")
Cc: stable@vger.kernel.org
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/457
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Mat Martineau <martineau@kernel.org>
Signed-off-by: Matthieu Baerts <matttbe@kernel.org>
Link: https://lore.kernel.org/r/20231114-upstream-net-20231113-mptcp-misc-fixes-6-7-rc2-v1-4-7b9cd6a7b7f4@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-11-14 20:10:20 -08:00
Geliang Tang
8df220b292 mptcp: add validity check for sending RM_ADDR
This patch adds the validity check for sending RM_ADDRs for userspace PM
in mptcp_pm_remove_addrs(), only send a RM_ADDR when the address is in the
anno_list or conn_list.

Fixes: 8b1c94da1e48 ("mptcp: only send RM_ADDR in nl_cmd_remove")
Cc: stable@vger.kernel.org
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Reviewed-by: Mat Martineau <martineau@kernel.org>
Signed-off-by: Matthieu Baerts <matttbe@kernel.org>
Link: https://lore.kernel.org/r/20231114-upstream-net-20231113-mptcp-misc-fixes-6-7-rc2-v1-3-7b9cd6a7b7f4@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-11-14 20:10:20 -08:00
Paolo Abeni
d109a77672 mptcp: fix possible NULL pointer dereference on close
After the blamed commit below, the MPTCP release callback can
dereference the first subflow pointer via __mptcp_set_connected()
and send buffer auto-tuning. Such pointer is always expected to be
valid, except at socket destruction time, when the first subflow is
deleted and the pointer zeroed.

If the connect event is handled by the release callback while the
msk socket is finally released, MPTCP hits the following splat:

  general protection fault, probably for non-canonical address 0xdffffc00000000f2: 0000 [#1] PREEMPT SMP KASAN
  KASAN: null-ptr-deref in range [0x0000000000000790-0x0000000000000797]
  CPU: 1 PID: 26719 Comm: syz-executor.2 Not tainted 6.6.0-syzkaller-10102-gff269e2cd5ad #0
  Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/09/2023
  RIP: 0010:mptcp_subflow_ctx net/mptcp/protocol.h:542 [inline]
  RIP: 0010:__mptcp_propagate_sndbuf net/mptcp/protocol.h:813 [inline]
  RIP: 0010:__mptcp_set_connected+0x57/0x3e0 net/mptcp/subflow.c:424
  RAX: dffffc0000000000 RBX: 0000000000000000 RCX: ffffffff8a62323c
  RDX: 00000000000000f2 RSI: ffffffff8a630116 RDI: 0000000000000790
  RBP: ffff88803334b100 R08: 0000000000000001 R09: 0000000000000000
  R10: 0000000000000001 R11: 0000000000000034 R12: ffff88803334b198
  R13: ffff888054f0b018 R14: 0000000000000000 R15: ffff88803334b100
  FS:  0000000000000000(0000) GS:ffff8880b9900000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 00007fbcb4f75198 CR3: 000000006afb5000 CR4: 00000000003506f0
  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
  DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
  Call Trace:
   <TASK>
   mptcp_release_cb+0xa2c/0xc40 net/mptcp/protocol.c:3405
   release_sock+0xba/0x1f0 net/core/sock.c:3537
   mptcp_close+0x32/0xf0 net/mptcp/protocol.c:3084
   inet_release+0x132/0x270 net/ipv4/af_inet.c:433
   inet6_release+0x4f/0x70 net/ipv6/af_inet6.c:485
   __sock_release+0xae/0x260 net/socket.c:659
   sock_close+0x1c/0x20 net/socket.c:1419
   __fput+0x270/0xbb0 fs/file_table.c:394
   task_work_run+0x14d/0x240 kernel/task_work.c:180
   exit_task_work include/linux/task_work.h:38 [inline]
   do_exit+0xa92/0x2a20 kernel/exit.c:876
   do_group_exit+0xd4/0x2a0 kernel/exit.c:1026
   get_signal+0x23ba/0x2790 kernel/signal.c:2900
   arch_do_signal_or_restart+0x90/0x7f0 arch/x86/kernel/signal.c:309
   exit_to_user_mode_loop kernel/entry/common.c:168 [inline]
   exit_to_user_mode_prepare+0x11f/0x240 kernel/entry/common.c:204
   __syscall_exit_to_user_mode_work kernel/entry/common.c:285 [inline]
   syscall_exit_to_user_mode+0x1d/0x60 kernel/entry/common.c:296
   do_syscall_64+0x4b/0x110 arch/x86/entry/common.c:88
   entry_SYSCALL_64_after_hwframe+0x63/0x6b
  RIP: 0033:0x7fb515e7cae9
  Code: Unable to access opcode bytes at 0x7fb515e7cabf.
  RSP: 002b:00007fb516c560c8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
  RAX: 000000000000003c RBX: 00007fb515f9c120 RCX: 00007fb515e7cae9
  RDX: 0000000000000000 RSI: 0000000020000140 RDI: 0000000000000006
  RBP: 00007fb515ec847a R08: 0000000000000000 R09: 0000000000000000
  R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
  R13: 000000000000006e R14: 00007fb515f9c120 R15: 00007ffc631eb968
   </TASK>

To avoid sparkling unneeded conditionals, address the issue explicitly
checking msk->first only in the critical place.

Fixes: 8005184fd1ca ("mptcp: refactor sndbuf auto-tuning")
Cc: stable@vger.kernel.org
Reported-by: <syzbot+9dfbaedb6e6baca57a32@syzkaller.appspotmail.com>
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/454
Reported-by: Eric Dumazet <edumazet@google.com>
Closes: https://lore.kernel.org/netdev/CANn89iLZUA6S2a=K8GObnS62KK6Jt4B7PsAs7meMFooM8xaTgw@mail.gmail.com/
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Mat Martineau <martineau@kernel.org>
Signed-off-by: Matthieu Baerts <matttbe@kernel.org>
Link: https://lore.kernel.org/r/20231114-upstream-net-20231113-mptcp-misc-fixes-6-7-rc2-v1-2-7b9cd6a7b7f4@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-11-14 20:10:20 -08:00
Paolo Abeni
9fce92f050 mptcp: deal with large GSO size
After the blamed commit below, the TCP sockets (and the MPTCP subflows)
can build egress packets larger than 64K. That exceeds the maximum DSS
data size, the length being misrepresent on the wire and the stream being
corrupted, as later observed on the receiver:

  WARNING: CPU: 0 PID: 9696 at net/mptcp/protocol.c:705 __mptcp_move_skbs_from_subflow+0x2604/0x26e0
  CPU: 0 PID: 9696 Comm: syz-executor.7 Not tainted 6.6.0-rc5-gcd8bdf563d46 #45
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014
  netlink: 8 bytes leftover after parsing attributes in process `syz-executor.4'.
  RIP: 0010:__mptcp_move_skbs_from_subflow+0x2604/0x26e0 net/mptcp/protocol.c:705
  RSP: 0018:ffffc90000006e80 EFLAGS: 00010246
  RAX: ffffffff83e9f674 RBX: ffff88802f45d870 RCX: ffff888102ad0000
  netlink: 8 bytes leftover after parsing attributes in process `syz-executor.4'.
  RDX: 0000000080000303 RSI: 0000000000013908 RDI: 0000000000003908
  RBP: ffffc90000007110 R08: ffffffff83e9e078 R09: 1ffff1100e548c8a
  R10: dffffc0000000000 R11: ffffed100e548c8b R12: 0000000000013908
  R13: dffffc0000000000 R14: 0000000000003908 R15: 000000000031cf29
  FS:  00007f239c47e700(0000) GS:ffff88811b200000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 00007f239c45cd78 CR3: 000000006a66c006 CR4: 0000000000770ef0
  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
  DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000600
  PKRU: 55555554
  Call Trace:
   <IRQ>
   mptcp_data_ready+0x263/0xac0 net/mptcp/protocol.c:819
   subflow_data_ready+0x268/0x6d0 net/mptcp/subflow.c:1409
   tcp_data_queue+0x21a1/0x7a60 net/ipv4/tcp_input.c:5151
   tcp_rcv_established+0x950/0x1d90 net/ipv4/tcp_input.c:6098
   tcp_v6_do_rcv+0x554/0x12f0 net/ipv6/tcp_ipv6.c:1483
   tcp_v6_rcv+0x2e26/0x3810 net/ipv6/tcp_ipv6.c:1749
   ip6_protocol_deliver_rcu+0xd6b/0x1ae0 net/ipv6/ip6_input.c:438
   ip6_input+0x1c5/0x470 net/ipv6/ip6_input.c:483
   ipv6_rcv+0xef/0x2c0 include/linux/netfilter.h:304
   __netif_receive_skb+0x1ea/0x6a0 net/core/dev.c:5532
   process_backlog+0x353/0x660 net/core/dev.c:5974
   __napi_poll+0xc6/0x5a0 net/core/dev.c:6536
   net_rx_action+0x6a0/0xfd0 net/core/dev.c:6603
   __do_softirq+0x184/0x524 kernel/softirq.c:553
   do_softirq+0xdd/0x130 kernel/softirq.c:454

Address the issue explicitly bounding the maximum GSO size to what MPTCP
actually allows.

Reported-by: Christoph Paasch <cpaasch@apple.com>
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/450
Fixes: 7c4e983c4f3c ("net: allow gso_max_size to exceed 65536")
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Mat Martineau <martineau@kernel.org>
Signed-off-by: Matthieu Baerts <matttbe@kernel.org>
Link: https://lore.kernel.org/r/20231114-upstream-net-20231113-mptcp-misc-fixes-6-7-rc2-v1-1-7b9cd6a7b7f4@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-11-14 20:10:20 -08:00
Ziwei Xiao
278a370c17 gve: Fixes for napi_poll when budget is 0
Netpoll will explicilty pass the polling call with a budget of 0 to
indicate it's clearing the Tx path only. For the gve_rx_poll and
gve_xdp_poll, they were mistakenly taking the 0 budget as the indication
to do all the work. Add check to avoid the rx path and xdp path being
called when budget is 0. And also avoid napi_complete_done being called
when budget is 0 for netpoll.

Fixes: f5cedc84a30d ("gve: Add transmit and receive support")
Signed-off-by: Ziwei Xiao <ziweixiao@google.com>
Link: https://lore.kernel.org/r/20231114004144.2022268-1-ziweixiao@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-11-14 20:00:11 -08:00
Jakub Kicinski
67af0bdcd6 Merge branch '100GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue
Tony Nguyen says:

====================
Intel Wired LAN Driver Updates 2023-11-13 (ice)

This series contains updates to ice driver only.

Arkadiusz ensures the device is initialized with valid lock status
value. He also removes range checking of dpll priority to allow firmware
to process the request; supported values are firmware dependent.
Finally, he removes setting of can change capability for pins that
cannot be changed.

Dan restores ability to load a package which doesn't contain a signature
segment.

* '100GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue:
  ice: fix DDP package download for packages without signature segment
  ice: dpll: fix output pin capabilities
  ice: dpll: fix check for dpll input priority range
  ice: dpll: fix initial lock status of dpll
====================

Link: https://lore.kernel.org/r/20231113230551.548489-1-anthony.l.nguyen@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-11-14 19:56:30 -08:00
Jakub Kicinski
9d350b2b0d Merge branch 'pds_core-fix-irq-index-bug-and-compiler-warnings'
Shannon Nelson says:

====================
pds_core: fix irq index bug and compiler warnings

The first patch fixes a bug in our interrupt masking where we used the
wrong index.  The second patch addresses a couple of kernel test robot
string truncation warnings.
====================

Link: https://lore.kernel.org/r/20231113183257.71110-1-shannon.nelson@amd.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-11-14 19:52:14 -08:00
Shannon Nelson
7c02f6ae67 pds_core: fix up some format-truncation complaints
Our friendly kernel test robot pointed out a couple of potential
string truncation issues.  None of which were we worried about,
but can be relatively easily fixed to quiet the complaints.

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202310211736.66syyDpp-lkp@intel.com/
Fixes: 45d76f492938 ("pds_core: set up device and adminq")
Signed-off-by: Shannon Nelson <shannon.nelson@amd.com>
Link: https://lore.kernel.org/r/20231113183257.71110-3-shannon.nelson@amd.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-11-14 19:52:09 -08:00
Shannon Nelson
09d4c14c6c pds_core: use correct index to mask irq
Use the qcq's interrupt index, not the irq number, to mask
the interrupt.  Since the irq number can be out of range from
the number of possible interrupts, we can end up accessing
and potentially scribbling on out-of-range and/or unmapped
memory, making the kernel angry.

    [ 3116.039364] BUG: unable to handle page fault for address: ffffbeea1c3edf84
    [ 3116.047059] #PF: supervisor write access in kernel mode
    [ 3116.052895] #PF: error_code(0x0002) - not-present page
    [ 3116.058636] PGD 100000067 P4D 100000067 PUD 1001f2067 PMD 10f82e067 PTE 0
    [ 3116.066221] Oops: 0002 [#1] SMP NOPTI
    [ 3116.092948] RIP: 0010:iowrite32+0x9/0x76
    [ 3116.190452] Call Trace:
    [ 3116.193185]  <IRQ>
    [ 3116.195430]  ? show_trace_log_lvl+0x1d6/0x2f9
    [ 3116.200298]  ? show_trace_log_lvl+0x1d6/0x2f9
    [ 3116.205166]  ? pdsc_adminq_isr+0x43/0x55 [pds_core]
    [ 3116.210618]  ? __die_body.cold+0x8/0xa
    [ 3116.214806]  ? page_fault_oops+0x16d/0x1ac
    [ 3116.219382]  ? exc_page_fault+0xbe/0x13b
    [ 3116.223764]  ? asm_exc_page_fault+0x22/0x27
    [ 3116.228440]  ? iowrite32+0x9/0x76
    [ 3116.232143]  pdsc_adminq_isr+0x43/0x55 [pds_core]
    [ 3116.237627]  __handle_irq_event_percpu+0x3a/0x184
    [ 3116.243088]  handle_irq_event+0x57/0xb0
    [ 3116.247575]  handle_edge_irq+0x87/0x225
    [ 3116.252062]  __common_interrupt+0x3e/0xbc
    [ 3116.256740]  common_interrupt+0x7b/0x98
    [ 3116.261216]  </IRQ>
    [ 3116.263745]  <TASK>
    [ 3116.266268]  asm_common_interrupt+0x22/0x27

Reported-by: Joao Martins <joao.m.martins@oracle.com>
Fixes: 01ba61b55b20 ("pds_core: Add adminq processing and commands")
Signed-off-by: Shannon Nelson <shannon.nelson@amd.com>
Link: https://lore.kernel.org/r/20231113183257.71110-2-shannon.nelson@amd.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-11-14 19:52:09 -08:00
Alex Pakhunov
17dd5efe5f tg3: Increment tx_dropped in tg3_tso_bug()
tg3_tso_bug() drops a packet if it cannot be segmented for any reason.
The number of discarded frames should be incremented accordingly.

Signed-off-by: Alex Pakhunov <alexey.pakhunov@spacex.com>
Signed-off-by: Vincent Wong <vincent.wong2@spacex.com>
Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Link: https://lore.kernel.org/r/20231113182350.37472-2-alexey.pakhunov@spacex.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-11-14 19:50:23 -08:00
Alex Pakhunov
907d1bdb8b tg3: Move the [rt]x_dropped counters to tg3_napi
This change moves [rt]x_dropped counters to tg3_napi so that they can be
updated by a single writer, race-free.

Signed-off-by: Alex Pakhunov <alexey.pakhunov@spacex.com>
Signed-off-by: Vincent Wong <vincent.wong2@spacex.com>
Reviewed-by: Michael Chan <michael.chan@broadcom.com>
Link: https://lore.kernel.org/r/20231113182350.37472-1-alexey.pakhunov@spacex.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-11-14 19:50:23 -08:00
Baruch Siach
b6cb454185 net: stmmac: avoid rx queue overrun
dma_rx_size can be set as low as 64. Rx budget might be higher than
that. Make sure to not overrun allocated rx buffers when budget is
larger.

Leave one descriptor unused to avoid wrap around of 'dirty_rx' vs
'cur_rx'.

Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Reviewed-by: Serge Semin <fancer.lancer@gmail.com>
Fixes: 47dd7a540b8a ("net: add support for STMicroelectronics Ethernet controllers.")
Link: https://lore.kernel.org/r/d95413e44c97d4692e72cec13a75f894abeb6998.1699897370.git.baruch@tkos.co.il
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-11-14 19:48:03 -08:00
Baruch Siach
fa02de9e75 net: stmmac: fix rx budget limit check
The while loop condition verifies 'count < limit'. Neither value change
before the 'count >= limit' check. As is this check is dead code. But
code inspection reveals a code path that modifies 'count' and then goto
'drain_data' and back to 'read_again'. So there is a need to verify
count value sanity after 'read_again'.

Move 'read_again' up to fix the count limit check.

Fixes: ec222003bd94 ("net: stmmac: Prepare to add Split Header support")
Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Reviewed-by: Serge Semin <fancer.lancer@gmail.com>
Link: https://lore.kernel.org/r/d9486296c3b6b12ab3a0515fcd47d56447a07bfc.1699897370.git.baruch@tkos.co.il
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-11-14 19:48:02 -08:00
Pablo Neira Ayuso
8837ba3e58 netfilter: nf_tables: split async and sync catchall in two functions
list_for_each_entry_safe() does not work for the async case which runs
under RCU, therefore, split GC logic for catchall in two functions
instead, one for each of the sync and async GC variants.

The catchall sync GC variant never sees a _DEAD bit set on ever, thus,
this handling is removed in such case, moreover, allocate GC sync batch
via GFP_KERNEL.

Fixes: 93995bf4af2c ("netfilter: nf_tables: remove catchall element in GC sync path")
Reported-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2023-11-14 16:16:21 +01:00
Jozsef Kadlecsik
28628fa952 netfilter: ipset: fix race condition between swap/destroy and kernel side add/del/test
Linkui Xiao reported that there's a race condition when ipset swap and destroy is
called, which can lead to crash in add/del/test element operations. Swap then
destroy are usual operations to replace a set with another one in a production
system. The issue can in some cases be reproduced with the script:

ipset create hash_ip1 hash:net family inet hashsize 1024 maxelem 1048576
ipset add hash_ip1 172.20.0.0/16
ipset add hash_ip1 192.168.0.0/16
iptables -A INPUT -m set --match-set hash_ip1 src -j ACCEPT
while [ 1 ]
do
	# ... Ongoing traffic...
        ipset create hash_ip2 hash:net family inet hashsize 1024 maxelem 1048576
        ipset add hash_ip2 172.20.0.0/16
        ipset swap hash_ip1 hash_ip2
        ipset destroy hash_ip2
        sleep 0.05
done

In the race case the possible order of the operations are

	CPU0			CPU1
	ip_set_test
				ipset swap hash_ip1 hash_ip2
				ipset destroy hash_ip2
	hash_net_kadt

Swap replaces hash_ip1 with hash_ip2 and then destroy removes hash_ip2 which
is the original hash_ip1. ip_set_test was called on hash_ip1 and because destroy
removed it, hash_net_kadt crashes.

The fix is to force ip_set_swap() to wait for all readers to finish accessing the
old set pointers by calling synchronize_rcu().

The first version of the patch was written by Linkui Xiao <xiaolinkui@kylinos.cn>.

v2: synchronize_rcu() is moved into ip_set_swap() in order not to burden
    ip_set_destroy() unnecessarily when all sets are destroyed.
v3: Florian Westphal pointed out that all netfilter hooks run with rcu_read_lock() held
    and em_ipset.c wraps the entire ip_set_test() in rcu read lock/unlock pair.
    So there's no need to extend the rcu read locked area in ipset itself.

Closes: https://lore.kernel.org/all/69e7963b-e7f8-3ad0-210-7b86eebf7f78@netfilter.org/
Reported by: Linkui Xiao <xiaolinkui@kylinos.cn>
Signed-off-by: Jozsef Kadlecsik <kadlec@netfilter.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2023-11-14 16:16:21 +01:00
Pablo Neira Ayuso
a7d5a955bf netfilter: nf_tables: bogus ENOENT when destroying element which does not exist
destroy element command bogusly reports ENOENT in case a set element
does not exist. ENOENT errors are skipped, however, err is still set
and propagated to userspace.

 # nft destroy element ip raw BLACKLIST { 1.2.3.4 }
 Error: Could not process rule: No such file or directory
 destroy element ip raw BLACKLIST { 1.2.3.4 }
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Fixes: f80a612dd77c ("netfilter: nf_tables: add support to destroy operation")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2023-11-14 16:16:21 +01:00
Dan Carpenter
c301f0981f netfilter: nf_tables: fix pointer math issue in nft_byteorder_eval()
The problem is in nft_byteorder_eval() where we are iterating through a
loop and writing to dst[0], dst[1], dst[2] and so on...  On each
iteration we are writing 8 bytes.  But dst[] is an array of u32 so each
element only has space for 4 bytes.  That means that every iteration
overwrites part of the previous element.

I spotted this bug while reviewing commit caf3ef7468f7 ("netfilter:
nf_tables: prevent OOB access in nft_byteorder_eval") which is a related
issue.  I think that the reason we have not detected this bug in testing
is that most of time we only write one element.

Fixes: ce1e7989d989 ("netfilter: nft_byteorder: provide 64bit le/be conversion")
Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2023-11-14 16:16:21 +01:00
Linkui Xiao
a44af08e3d netfilter: nf_conntrack_bridge: initialize err to 0
K2CI reported a problem:

	consume_skb(skb);
	return err;
[nf_br_ip_fragment() error]  uninitialized symbol 'err'.

err is not initialized, because returning 0 is expected, initialize err
to 0.

Fixes: 3c171f496ef5 ("netfilter: bridge: add connection tracking system")
Reported-by: k2ci <kernel-bot@kylinos.cn>
Signed-off-by: Linkui Xiao <xiaolinkui@kylinos.cn>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2023-11-14 16:16:21 +01:00
Yang Li
67059b6159 netfilter: nft_set_rbtree: Remove unused variable nft_net
The code that uses nft_net has been removed, and the nft_pernet function
is merely obtaining a reference to shared data through the net pointer.
The content of the net pointer is not modified or changed, so both of
them should be removed.

silence the warning:
net/netfilter/nft_set_rbtree.c:627:26: warning: variable ‘nft_net’ set but not used

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=7103
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2023-11-14 16:10:20 +01:00
Eric Dumazet
4b7b492615 af_unix: fix use-after-free in unix_stream_read_actor()
syzbot reported the following crash [1]

After releasing unix socket lock, u->oob_skb can be changed
by another thread. We must temporarily increase skb refcount
to make sure this other thread will not free the skb under us.

[1]

BUG: KASAN: slab-use-after-free in unix_stream_read_actor+0xa7/0xc0 net/unix/af_unix.c:2866
Read of size 4 at addr ffff88801f3b9cc4 by task syz-executor107/5297

CPU: 1 PID: 5297 Comm: syz-executor107 Not tainted 6.6.0-syzkaller-15910-gb8e3a87a627b #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/09/2023
Call Trace:
<TASK>
__dump_stack lib/dump_stack.c:88 [inline]
dump_stack_lvl+0xd9/0x1b0 lib/dump_stack.c:106
print_address_description mm/kasan/report.c:364 [inline]
print_report+0xc4/0x620 mm/kasan/report.c:475
kasan_report+0xda/0x110 mm/kasan/report.c:588
unix_stream_read_actor+0xa7/0xc0 net/unix/af_unix.c:2866
unix_stream_recv_urg net/unix/af_unix.c:2587 [inline]
unix_stream_read_generic+0x19a5/0x2480 net/unix/af_unix.c:2666
unix_stream_recvmsg+0x189/0x1b0 net/unix/af_unix.c:2903
sock_recvmsg_nosec net/socket.c:1044 [inline]
sock_recvmsg+0xe2/0x170 net/socket.c:1066
____sys_recvmsg+0x21f/0x5c0 net/socket.c:2803
___sys_recvmsg+0x115/0x1a0 net/socket.c:2845
__sys_recvmsg+0x114/0x1e0 net/socket.c:2875
do_syscall_x64 arch/x86/entry/common.c:51 [inline]
do_syscall_64+0x3f/0x110 arch/x86/entry/common.c:82
entry_SYSCALL_64_after_hwframe+0x63/0x6b
RIP: 0033:0x7fc67492c559
Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 51 18 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007fc6748ab228 EFLAGS: 00000246 ORIG_RAX: 000000000000002f
RAX: ffffffffffffffda RBX: 000000000000001c RCX: 00007fc67492c559
RDX: 0000000040010083 RSI: 0000000020000140 RDI: 0000000000000004
RBP: 00007fc6749b6348 R08: 00007fc6748ab6c0 R09: 00007fc6748ab6c0
R10: 0000000000000000 R11: 0000000000000246 R12: 00007fc6749b6340
R13: 00007fc6749b634c R14: 00007ffe9fac52a0 R15: 00007ffe9fac5388
</TASK>

Allocated by task 5295:
kasan_save_stack+0x33/0x50 mm/kasan/common.c:45
kasan_set_track+0x25/0x30 mm/kasan/common.c:52
__kasan_slab_alloc+0x81/0x90 mm/kasan/common.c:328
kasan_slab_alloc include/linux/kasan.h:188 [inline]
slab_post_alloc_hook mm/slab.h:763 [inline]
slab_alloc_node mm/slub.c:3478 [inline]
kmem_cache_alloc_node+0x180/0x3c0 mm/slub.c:3523
__alloc_skb+0x287/0x330 net/core/skbuff.c:641
alloc_skb include/linux/skbuff.h:1286 [inline]
alloc_skb_with_frags+0xe4/0x710 net/core/skbuff.c:6331
sock_alloc_send_pskb+0x7e4/0x970 net/core/sock.c:2780
sock_alloc_send_skb include/net/sock.h:1884 [inline]
queue_oob net/unix/af_unix.c:2147 [inline]
unix_stream_sendmsg+0xb5f/0x10a0 net/unix/af_unix.c:2301
sock_sendmsg_nosec net/socket.c:730 [inline]
__sock_sendmsg+0xd5/0x180 net/socket.c:745
____sys_sendmsg+0x6ac/0x940 net/socket.c:2584
___sys_sendmsg+0x135/0x1d0 net/socket.c:2638
__sys_sendmsg+0x117/0x1e0 net/socket.c:2667
do_syscall_x64 arch/x86/entry/common.c:51 [inline]
do_syscall_64+0x3f/0x110 arch/x86/entry/common.c:82
entry_SYSCALL_64_after_hwframe+0x63/0x6b

Freed by task 5295:
kasan_save_stack+0x33/0x50 mm/kasan/common.c:45
kasan_set_track+0x25/0x30 mm/kasan/common.c:52
kasan_save_free_info+0x2b/0x40 mm/kasan/generic.c:522
____kasan_slab_free mm/kasan/common.c:236 [inline]
____kasan_slab_free+0x15b/0x1b0 mm/kasan/common.c:200
kasan_slab_free include/linux/kasan.h:164 [inline]
slab_free_hook mm/slub.c:1800 [inline]
slab_free_freelist_hook+0x114/0x1e0 mm/slub.c:1826
slab_free mm/slub.c:3809 [inline]
kmem_cache_free+0xf8/0x340 mm/slub.c:3831
kfree_skbmem+0xef/0x1b0 net/core/skbuff.c:1015
__kfree_skb net/core/skbuff.c:1073 [inline]
consume_skb net/core/skbuff.c:1288 [inline]
consume_skb+0xdf/0x170 net/core/skbuff.c:1282
queue_oob net/unix/af_unix.c:2178 [inline]
unix_stream_sendmsg+0xd49/0x10a0 net/unix/af_unix.c:2301
sock_sendmsg_nosec net/socket.c:730 [inline]
__sock_sendmsg+0xd5/0x180 net/socket.c:745
____sys_sendmsg+0x6ac/0x940 net/socket.c:2584
___sys_sendmsg+0x135/0x1d0 net/socket.c:2638
__sys_sendmsg+0x117/0x1e0 net/socket.c:2667
do_syscall_x64 arch/x86/entry/common.c:51 [inline]
do_syscall_64+0x3f/0x110 arch/x86/entry/common.c:82
entry_SYSCALL_64_after_hwframe+0x63/0x6b

The buggy address belongs to the object at ffff88801f3b9c80
which belongs to the cache skbuff_head_cache of size 240
The buggy address is located 68 bytes inside of
freed 240-byte region [ffff88801f3b9c80, ffff88801f3b9d70)

The buggy address belongs to the physical page:
page:ffffea00007cee40 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x1f3b9
flags: 0xfff00000000800(slab|node=0|zone=1|lastcpupid=0x7ff)
page_type: 0xffffffff()
raw: 00fff00000000800 ffff888142a60640 dead000000000122 0000000000000000
raw: 0000000000000000 00000000000c000c 00000001ffffffff 0000000000000000
page dumped because: kasan: bad access detected
page_owner tracks the page as allocated
page last allocated via order 0, migratetype Unmovable, gfp_mask 0x12cc0(GFP_KERNEL|__GFP_NOWARN|__GFP_NORETRY), pid 5299, tgid 5283 (syz-executor107), ts 103803840339, free_ts 103600093431
set_page_owner include/linux/page_owner.h:31 [inline]
post_alloc_hook+0x2cf/0x340 mm/page_alloc.c:1537
prep_new_page mm/page_alloc.c:1544 [inline]
get_page_from_freelist+0xa25/0x36c0 mm/page_alloc.c:3312
__alloc_pages+0x1d0/0x4a0 mm/page_alloc.c:4568
alloc_pages_mpol+0x258/0x5f0 mm/mempolicy.c:2133
alloc_slab_page mm/slub.c:1870 [inline]
allocate_slab+0x251/0x380 mm/slub.c:2017
new_slab mm/slub.c:2070 [inline]
___slab_alloc+0x8c7/0x1580 mm/slub.c:3223
__slab_alloc.constprop.0+0x56/0xa0 mm/slub.c:3322
__slab_alloc_node mm/slub.c:3375 [inline]
slab_alloc_node mm/slub.c:3468 [inline]
kmem_cache_alloc_node+0x132/0x3c0 mm/slub.c:3523
__alloc_skb+0x287/0x330 net/core/skbuff.c:641
alloc_skb include/linux/skbuff.h:1286 [inline]
alloc_skb_with_frags+0xe4/0x710 net/core/skbuff.c:6331
sock_alloc_send_pskb+0x7e4/0x970 net/core/sock.c:2780
sock_alloc_send_skb include/net/sock.h:1884 [inline]
queue_oob net/unix/af_unix.c:2147 [inline]
unix_stream_sendmsg+0xb5f/0x10a0 net/unix/af_unix.c:2301
sock_sendmsg_nosec net/socket.c:730 [inline]
__sock_sendmsg+0xd5/0x180 net/socket.c:745
____sys_sendmsg+0x6ac/0x940 net/socket.c:2584
___sys_sendmsg+0x135/0x1d0 net/socket.c:2638
__sys_sendmsg+0x117/0x1e0 net/socket.c:2667
page last free stack trace:
reset_page_owner include/linux/page_owner.h:24 [inline]
free_pages_prepare mm/page_alloc.c:1137 [inline]
free_unref_page_prepare+0x4f8/0xa90 mm/page_alloc.c:2347
free_unref_page+0x33/0x3b0 mm/page_alloc.c:2487
__unfreeze_partials+0x21d/0x240 mm/slub.c:2655
qlink_free mm/kasan/quarantine.c:168 [inline]
qlist_free_all+0x6a/0x170 mm/kasan/quarantine.c:187
kasan_quarantine_reduce+0x18e/0x1d0 mm/kasan/quarantine.c:294
__kasan_slab_alloc+0x65/0x90 mm/kasan/common.c:305
kasan_slab_alloc include/linux/kasan.h:188 [inline]
slab_post_alloc_hook mm/slab.h:763 [inline]
slab_alloc_node mm/slub.c:3478 [inline]
slab_alloc mm/slub.c:3486 [inline]
__kmem_cache_alloc_lru mm/slub.c:3493 [inline]
kmem_cache_alloc+0x15d/0x380 mm/slub.c:3502
vm_area_dup+0x21/0x2f0 kernel/fork.c:500
__split_vma+0x17d/0x1070 mm/mmap.c:2365
split_vma mm/mmap.c:2437 [inline]
vma_modify+0x25d/0x450 mm/mmap.c:2472
vma_modify_flags include/linux/mm.h:3271 [inline]
mprotect_fixup+0x228/0xc80 mm/mprotect.c:635
do_mprotect_pkey+0x852/0xd60 mm/mprotect.c:809
__do_sys_mprotect mm/mprotect.c:830 [inline]
__se_sys_mprotect mm/mprotect.c:827 [inline]
__x64_sys_mprotect+0x78/0xb0 mm/mprotect.c:827
do_syscall_x64 arch/x86/entry/common.c:51 [inline]
do_syscall_64+0x3f/0x110 arch/x86/entry/common.c:82
entry_SYSCALL_64_after_hwframe+0x63/0x6b

Memory state around the buggy address:
ffff88801f3b9b80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
ffff88801f3b9c00: fb fb fb fb fb fb fc fc fc fc fc fc fc fc fc fc
>ffff88801f3b9c80: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
^
ffff88801f3b9d00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fc fc
ffff88801f3b9d80: fc fc fc fc fc fc fc fc fa fb fb fb fb fb fb fb

Fixes: 876c14ad014d ("af_unix: fix holding spinlock in oob handling")
Reported-and-tested-by: syzbot+7a2d546fa43e49315ed3@syzkaller.appspotmail.com
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Rao Shoaib <rao.shoaib@oracle.com>
Reviewed-by: Rao shoaib <rao.shoaib@oracle.com>
Link: https://lore.kernel.org/r/20231113134938.168151-1-edumazet@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2023-11-14 10:51:13 +01:00
Juergen Gross
3bdb0ac350 xen/events: remove some simple helpers from events_base.c
The helper functions type_from_irq() and cpu_from_irq() are just one
line functions used only internally.

Open code them where needed. At the same time modify and rename
get_evtchn_to_irq() to return a struct irq_info instead of the IRQ
number.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
2023-11-14 10:16:34 +01:00
Juergen Gross
686464514f xen/events: reduce externally visible helper functions
get_evtchn_to_irq() has only one external user while irq_from_evtchn()
provides the same functionality and is exported for a wider user base.
Modify the only external user of get_evtchn_to_irq() to use
irq_from_evtchn() instead and make get_evtchn_to_irq() static.

evtchn_from_irq() and irq_from_virq() have a single external user and
can easily be combined to a new helper irq_evtchn_from_virq() allowing
to drop irq_from_virq() and to make evtchn_from_irq() static.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
2023-11-14 09:29:28 +01:00
Jakub Kicinski
48c205c69a Merge branch 'r8169-fix-dash-devices-network-lost-issue'
ChunHao Lin says:

====================
r8169: fix DASH devices network lost issue

This series are used to fix network lost issue on systems that support
DASH. It has been tested on rtl8168ep and rtl8168fp.
====================

Link: https://lore.kernel.org/r/20231109173400.4573-1-hau@realtek.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-11-13 21:00:11 -08:00
ChunHao Lin
868c3b95af r8169: fix network lost after resume on DASH systems
Device that support DASH may be reseted or powered off during suspend.
So driver needs to handle DASH during system suspend and resume. Or
DASH firmware will influence device behavior and causes network lost.

Fixes: b646d90053f8 ("r8169: magic.")
Cc: stable@vger.kernel.org
Reviewed-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: ChunHao Lin <hau@realtek.com>
Link: https://lore.kernel.org/r/20231109173400.4573-3-hau@realtek.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-11-13 21:00:07 -08:00
ChunHao Lin
0ab0c45d8a r8169: add handling DASH when DASH is disabled
For devices that support DASH, even DASH is disabled, there may still
exist a default firmware that will influence device behavior.
So driver needs to handle DASH for devices that support DASH, no
matter the DASH status is.

This patch also prepares for "fix network lost after resume on DASH
systems".

Fixes: ee7a1beb9759 ("r8169:call "rtl8168_driver_start" "rtl8168_driver_stop" only when hardware dash function is enabled")
Cc: stable@vger.kernel.org
Signed-off-by: ChunHao Lin <hau@realtek.com>
Reviewed-by: Heiner Kallweit <hkallweit1@gmail.com>
Link: https://lore.kernel.org/r/20231109173400.4573-2-hau@realtek.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-11-13 21:00:07 -08:00
Jakub Kicinski
334e90b8d7 Merge branch 'fix-large-frames-in-the-gemini-ethernet-driver'
Linus Walleij says:

====================
Fix large frames in the Gemini ethernet driver

This is the result of a bug hunt for a problem with the
RTL8366RB DSA switch leading me wrong all over the place.

I am indebted to Vladimir Oltean who as usual pointed
out where the real problem was, many thanks!

Tryig to actually use big ("jumbo") frames on this
hardware uncovered the real bugs. Then I tested it on
the DSA switch and it indeed fixes the issue.

To make sure it also works fine with big frames on
non-DSA devices I also copied a large video file over
scp to a device with maximum frame size, the data
was transported in large TCP packets ending up in
0x7ff sized frames using software checksumming at
~2.0 MB/s.

If I set down the MTU to the standard 1500 bytes so
that hardware checksumming is used, the scp transfer
of the same file was slightly lower, ~1.8-1.9 MB/s.

Despite this not being the best test it shows that
we can now stress the hardware with large frames
and that software checksum works fine.

v3: https://lore.kernel.org/r/20231107-gemini-largeframe-fix-v3-0-e3803c080b75@linaro.org
v2: https://lore.kernel.org/r/20231105-gemini-largeframe-fix-v2-0-cd3a5aa6c496@linaro.org
v1: https://lore.kernel.org/r/20231104-gemini-largeframe-fix-v1-0-9c5513f22f33@linaro.org
====================

Link: https://lore.kernel.org/r/20231109-gemini-largeframe-fix-v4-0-6e611528db08@linaro.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-11-13 20:58:22 -08:00
Linus Walleij
dc6c0bfbaa net: ethernet: cortina: Fix MTU max setting
The RX max frame size is over 10000 for the Gemini ethernet,
but the TX max frame size is actually just 2047 (0x7ff after
checking the datasheet). Reflect this in what we offer to Linux,
cap the MTU at the TX max frame minus ethernet headers.

We delete the code disabling the hardware checksum for large
MTUs as netdev->mtu can no longer be larger than
netdev->max_mtu meaning the if()-clause in gmac_fix_features()
is never true.

Fixes: 4d5ae32f5e1e ("net: ethernet: Add a driver for Gemini gigabit ethernet")
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Link: https://lore.kernel.org/r/20231109-gemini-largeframe-fix-v4-3-6e611528db08@linaro.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-11-13 20:58:20 -08:00
Linus Walleij
d4d0c5b4d2 net: ethernet: cortina: Handle large frames
The Gemini ethernet controller provides hardware checksumming
for frames up to 1514 bytes including ethernet headers but not
FCS.

If we start sending bigger frames (after first bumping up the MTU
on both interfaces sending and receiving the frames), truncated
packets start to appear on the target such as in this tcpdump
resulting from ping -s 1474:

23:34:17.241983 14:d6:4d:a8:3c:4f (oui Unknown) > bc:ae:c5:6b:a8:3d (oui Unknown),
ethertype IPv4 (0x0800), length 1514: truncated-ip - 2 bytes missing!
(tos 0x0, ttl 64, id 32653, offset 0, flags [DF], proto ICMP (1), length 1502)
OpenWrt.lan > Fecusia: ICMP echo request, id 1672, seq 50, length 1482

If we bypass the hardware checksumming and provide a software
fallback, everything starts working fine up to the max TX MTU
of 2047 bytes, for example ping -s2000 192.168.1.2:

00:44:29.587598 bc:ae:c5:6b:a8:3d (oui Unknown) > 14:d6:4d:a8:3c:4f (oui Unknown),
ethertype IPv4 (0x0800), length 2042:
(tos 0x0, ttl 64, id 51828, offset 0, flags [none], proto ICMP (1), length 2028)
Fecusia > OpenWrt.lan: ICMP echo reply, id 1683, seq 4, length 2008

The bit enabling to bypass hardware checksum (or any of the
"TSS" bits) are undocumented in the hardware reference manual.
The entire hardware checksum unit appears undocumented. The
conclusion that we need to use the "bypass" bit was found by
trial-and-error.

Since no hardware checksum will happen, we slot in a software
checksum fallback.

Check for the condition where we need to compute checksum on the
skb with either hardware or software using == CHECKSUM_PARTIAL instead
of != CHECKSUM_NONE which is an incomplete check according to
<linux/skbuff.h>.

On the D-Link DIR-685 router this fixes a bug on the conduit
interface to the RTL8366RB DSA switch: as the switch needs to add
space for its tag it increases the MTU on the conduit interface
to 1504 and that means that when the router sends packages
of 1500 bytes these get an extra 4 bytes of DSA tag and the
transfer fails because of the erroneous hardware checksumming,
affecting such basic functionality as the LuCI web interface.

Fixes: 4d5ae32f5e1e ("net: ethernet: Add a driver for Gemini gigabit ethernet")
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Link: https://lore.kernel.org/r/20231109-gemini-largeframe-fix-v4-2-6e611528db08@linaro.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-11-13 20:58:20 -08:00
Linus Walleij
510e35fb93 net: ethernet: cortina: Fix max RX frame define
Enumerator 3 is 1548 bytes according to the datasheet.
Not 1542.

Fixes: 4d5ae32f5e1e ("net: ethernet: Add a driver for Gemini gigabit ethernet")
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Link: https://lore.kernel.org/r/20231109-gemini-largeframe-fix-v4-1-6e611528db08@linaro.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-11-13 20:58:19 -08:00
Eric Dumazet
3cffa2ddc4 bonding: stop the device in bond_setup_by_slave()
Commit 9eed321cde22 ("net: lapbether: only support ethernet devices")
has been able to keep syzbot away from net/lapb, until today.

In the following splat [1], the issue is that a lapbether device has
been created on a bonding device without members. Then adding a non
ARPHRD_ETHER member forced the bonding master to change its type.

The fix is to make sure we call dev_close() in bond_setup_by_slave()
so that the potential linked lapbether devices (or any other devices
having assumptions on the physical device) are removed.

A similar bug has been addressed in commit 40baec225765
("bonding: fix panic on non-ARPHRD_ETHER enslave failure")

[1]
skbuff: skb_under_panic: text:ffff800089508810 len:44 put:40 head:ffff0000c78e7c00 data:ffff0000c78e7bea tail:0x16 end:0x140 dev:bond0
kernel BUG at net/core/skbuff.c:192 !
Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP
Modules linked in:
CPU: 0 PID: 6007 Comm: syz-executor383 Not tainted 6.6.0-rc3-syzkaller-gbf6547d8715b #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/04/2023
pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
pc : skb_panic net/core/skbuff.c:188 [inline]
pc : skb_under_panic+0x13c/0x140 net/core/skbuff.c:202
lr : skb_panic net/core/skbuff.c:188 [inline]
lr : skb_under_panic+0x13c/0x140 net/core/skbuff.c:202
sp : ffff800096a06aa0
x29: ffff800096a06ab0 x28: ffff800096a06ba0 x27: dfff800000000000
x26: ffff0000ce9b9b50 x25: 0000000000000016 x24: ffff0000c78e7bea
x23: ffff0000c78e7c00 x22: 000000000000002c x21: 0000000000000140
x20: 0000000000000028 x19: ffff800089508810 x18: ffff800096a06100
x17: 0000000000000000 x16: ffff80008a629a3c x15: 0000000000000001
x14: 1fffe00036837a32 x13: 0000000000000000 x12: 0000000000000000
x11: 0000000000000201 x10: 0000000000000000 x9 : cb50b496c519aa00
x8 : cb50b496c519aa00 x7 : 0000000000000001 x6 : 0000000000000001
x5 : ffff800096a063b8 x4 : ffff80008e280f80 x3 : ffff8000805ad11c
x2 : 0000000000000001 x1 : 0000000100000201 x0 : 0000000000000086
Call trace:
skb_panic net/core/skbuff.c:188 [inline]
skb_under_panic+0x13c/0x140 net/core/skbuff.c:202
skb_push+0xf0/0x108 net/core/skbuff.c:2446
ip6gre_header+0xbc/0x738 net/ipv6/ip6_gre.c:1384
dev_hard_header include/linux/netdevice.h:3136 [inline]
lapbeth_data_transmit+0x1c4/0x298 drivers/net/wan/lapbether.c:257
lapb_data_transmit+0x8c/0xb0 net/lapb/lapb_iface.c:447
lapb_transmit_buffer+0x178/0x204 net/lapb/lapb_out.c:149
lapb_send_control+0x220/0x320 net/lapb/lapb_subr.c:251
__lapb_disconnect_request+0x9c/0x17c net/lapb/lapb_iface.c:326
lapb_device_event+0x288/0x4e0 net/lapb/lapb_iface.c:492
notifier_call_chain+0x1a4/0x510 kernel/notifier.c:93
raw_notifier_call_chain+0x3c/0x50 kernel/notifier.c:461
call_netdevice_notifiers_info net/core/dev.c:1970 [inline]
call_netdevice_notifiers_extack net/core/dev.c:2008 [inline]
call_netdevice_notifiers net/core/dev.c:2022 [inline]
__dev_close_many+0x1b8/0x3c4 net/core/dev.c:1508
dev_close_many+0x1e0/0x470 net/core/dev.c:1559
dev_close+0x174/0x250 net/core/dev.c:1585
lapbeth_device_event+0x2e4/0x958 drivers/net/wan/lapbether.c:466
notifier_call_chain+0x1a4/0x510 kernel/notifier.c:93
raw_notifier_call_chain+0x3c/0x50 kernel/notifier.c:461
call_netdevice_notifiers_info net/core/dev.c:1970 [inline]
call_netdevice_notifiers_extack net/core/dev.c:2008 [inline]
call_netdevice_notifiers net/core/dev.c:2022 [inline]
__dev_close_many+0x1b8/0x3c4 net/core/dev.c:1508
dev_close_many+0x1e0/0x470 net/core/dev.c:1559
dev_close+0x174/0x250 net/core/dev.c:1585
bond_enslave+0x2298/0x30cc drivers/net/bonding/bond_main.c:2332
bond_do_ioctl+0x268/0xc64 drivers/net/bonding/bond_main.c:4539
dev_ifsioc+0x754/0x9ac
dev_ioctl+0x4d8/0xd34 net/core/dev_ioctl.c:786
sock_do_ioctl+0x1d4/0x2d0 net/socket.c:1217
sock_ioctl+0x4e8/0x834 net/socket.c:1322
vfs_ioctl fs/ioctl.c:51 [inline]
__do_sys_ioctl fs/ioctl.c:871 [inline]
__se_sys_ioctl fs/ioctl.c:857 [inline]
__arm64_sys_ioctl+0x14c/0x1c8 fs/ioctl.c:857
__invoke_syscall arch/arm64/kernel/syscall.c:37 [inline]
invoke_syscall+0x98/0x2b8 arch/arm64/kernel/syscall.c:51
el0_svc_common+0x130/0x23c arch/arm64/kernel/syscall.c:136
do_el0_svc+0x48/0x58 arch/arm64/kernel/syscall.c:155
el0_svc+0x58/0x16c arch/arm64/kernel/entry-common.c:678
el0t_64_sync_handler+0x84/0xfc arch/arm64/kernel/entry-common.c:696
el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:591
Code: aa1803e6 aa1903e7 a90023f5 94785b8b (d4210000)

Fixes: 872254dd6b1f ("net/bonding: Enable bonding to enslave non ARPHRD_ETHER")
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Jay Vosburgh <jay.vosburgh@canonical.com>
Reviewed-by: Hangbin Liu <liuhangbin@gmail.com>
Link: https://lore.kernel.org/r/20231109180102.4085183-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-11-13 20:55:08 -08:00
Eric Dumazet
73bde5a329 ptp: annotate data-race around q->head and q->tail
As I was working on a syzbot report, I found that KCSAN would
probably complain that reading q->head or q->tail without
barriers could lead to invalid results.

Add corresponding READ_ONCE() and WRITE_ONCE() to avoid
load-store tearing.

Fixes: d94ba80ebbea ("ptp: Added a brand new class driver for ptp clocks.")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Link: https://lore.kernel.org/r/20231109174859.3995880-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-11-13 20:51:37 -08:00
Jakub Kicinski
4b3812d90b Revert "ptp: Fixes a null pointer dereference in ptp_ioctl"
This reverts commit 8a4f030dbced6fc255cbe67b2d0a129947e18493.

Richard says:

  The test itself is harmless, but keeping it will make people think,
  "oh this pointer can be invalid."

  In fact the core stack ensures that ioctl() can't be invoked after
  release(), otherwise Bad Stuff happens.

Fixes: 8a4f030dbced ("ptp: Fixes a null pointer dereference in ptp_ioctl")
Link: https://lore.kernel.org/all/ZVAf_qdRfDAQYUt-@hoboy.vegasvil.org/
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-11-13 20:50:12 -08:00
Dan Nowlin
a778616e4c ice: fix DDP package download for packages without signature segment
Commit 3cbdb0343022 ("ice: Add support for E830 DDP package segment")
incorrectly removed support for package download for packages without a
signature segment. These packages include the signature buffer inline
in the configurations buffers, and not in a signature segment.

Fix package download by providing download support for both packages
with (ice_download_pkg_with_sig_seg()) and without signature segment
(ice_download_pkg_without_sig_seg()).

Fixes: 3cbdb0343022 ("ice: Add support for E830 DDP package segment")
Reported-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Closes: https://lore.kernel.org/netdev/ZUT50a94kk2pMGKb@boxer/
Tested-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Reviewed-by: Wojciech Drewek <wojciech.drewek@intel.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: Dan Nowlin <dan.nowlin@intel.com>
Signed-off-by: Paul Greenwalt <paul.greenwalt@intel.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Tested-by: Arpana Arland <arpanax.arland@intel.com> (A Contingent worker at Intel)
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2023-11-13 10:56:58 -08:00
Arkadiusz Kubalewski
6db5f2cd9e ice: dpll: fix output pin capabilities
The dpll output pins which are used to feed clock signal of PHY and MAC
circuits cannot be disconnected, those integrated circuits require clock
signal for operation.
By stopping assignment of DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE pin
capability, prevent the user from invoking the state set callback on
those pins, setting the state on those pins already returns error, as
firmware doesn't allow the change of their state.

Fixes: d7999f5ea64b ("ice: implement dpll interface to control cgu")
Fixes: 8a3a565ff210 ("ice: add admin commands to access cgu configuration")
Reviewed-by: Andrii Staikov <andrii.staikov@intel.com>
Signed-off-by: Arkadiusz Kubalewski <arkadiusz.kubalewski@intel.com>
Tested-by: Sunitha Mekala <sunithax.d.mekala@intel.com> (A Contingent worker at Intel)
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2023-11-13 10:56:38 -08:00
Arkadiusz Kubalewski
4a4027f25d ice: dpll: fix check for dpll input priority range
Supported priority value for input pins may differ with regard of NIC
firmware version. E810T NICs with 3.20/4.00 FW versions would accept
priority range 0-31, where firmware 4.10+ would support the range 0-9
and extra value of 255.
Remove the in-range check as the driver has no information on supported
values from the running firmware, let firmware decide if given value is
correct and return extack error if the value is not supported.

Fixes: d7999f5ea64b ("ice: implement dpll interface to control cgu")
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: Arkadiusz Kubalewski <arkadiusz.kubalewski@intel.com>
Tested-by: Sunitha Mekala <sunithax.d.mekala@intel.com> (A Contingent worker at Intel)
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2023-11-13 10:02:15 -08:00
Arkadiusz Kubalewski
7a1aba89ac ice: dpll: fix initial lock status of dpll
When dpll device is registered and dpll subsystem performs notify of a
new device, the lock state value provided to dpll subsystem equals 0
which is invalid value for the `enum dpll_lock_status`.
Provide correct value by obtaining it from firmware before registering
the dpll device.

Fixes: d7999f5ea64b ("ice: implement dpll interface to control cgu")
Signed-off-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Arkadiusz Kubalewski <arkadiusz.kubalewski@intel.com>
Tested-by: Sunitha Mekala <sunithax.d.mekala@intel.com> (A Contingent worker at Intel)
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2023-11-13 09:48:09 -08:00
Juergen Gross
f96c6c588c xen/events: remove unused functions
There are no users of xen_irq_from_pirq() and xen_set_irq_pending().

Remove those functions.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
2023-11-13 15:45:20 +01:00
Juergen Gross
47d9702040 xen/events: fix delayed eoi list handling
When delaying eoi handling of events, the related elements are queued
into the percpu lateeoi list. In case the list isn't empty, the
elements should be sorted by the time when eoi handling is to happen.

Unfortunately a new element will never be queued at the start of the
list, even if it has a handling time lower than all other list
elements.

Fix that by handling that case the same way as for an empty list.

Fixes: e99502f76271 ("xen/events: defer eoi in case of excessive number of events")
Reported-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
2023-11-13 15:21:08 +01:00
Willem de Bruijn
c0a2a1b0d6 ppp: limit MRU to 64K
ppp_sync_ioctl allows setting device MRU, but does not sanity check
this input.

Limit to a sane upper bound of 64KB.

No implementation I could find generates larger than 64KB frames.
RFC 2823 mentions an upper bound of PPP over SDL of 64KB based on the
16-bit length field. Other protocols will be smaller, such as PPPoE
(9KB jumbo frame) and PPPoA (18190 maximum CPCS-SDU size, RFC 2364).
PPTP and L2TP encapsulate in IP.

Syzbot managed to trigger alloc warning in __alloc_pages:

	if (WARN_ON_ONCE_GFP(order > MAX_ORDER, gfp))

    WARNING: CPU: 1 PID: 37 at mm/page_alloc.c:4544 __alloc_pages+0x3ab/0x4a0 mm/page_alloc.c:4544

    __alloc_skb+0x12b/0x330 net/core/skbuff.c:651
    __netdev_alloc_skb+0x72/0x3f0 net/core/skbuff.c:715
    netdev_alloc_skb include/linux/skbuff.h:3225 [inline]
    dev_alloc_skb include/linux/skbuff.h:3238 [inline]
    ppp_sync_input drivers/net/ppp/ppp_synctty.c:669 [inline]
    ppp_sync_receive+0xff/0x680 drivers/net/ppp/ppp_synctty.c:334
    tty_ldisc_receive_buf+0x14c/0x180 drivers/tty/tty_buffer.c:390
    tty_port_default_receive_buf+0x70/0xb0 drivers/tty/tty_port.c:37
    receive_buf drivers/tty/tty_buffer.c:444 [inline]
    flush_to_ldisc+0x261/0x780 drivers/tty/tty_buffer.c:494
    process_one_work+0x884/0x15c0 kernel/workqueue.c:2630

With call

    ioctl$PPPIOCSMRU1(r1, 0x40047452, &(0x7f0000000100)=0x5e6417a8)

Similar code exists in other drivers that implement ppp_channel_ops
ioctl PPPIOCSMRU. Those might also be in scope. Notably excluded from
this are pppol2tp_ioctl and pppoe_ioctl.

This code goes back to the start of git history.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-by: syzbot+6177e1f90d92583bcc58@syzkaller.appspotmail.com
Signed-off-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2023-11-13 11:09:20 +00:00
Sven Auhagen
ca8add922f net: mvneta: fix calls to page_pool_get_stats
Calling page_pool_get_stats in the mvneta driver without checks
leads to kernel crashes.
First the page pool is only available if the bm is not used.
The page pool is also not allocated when the port is stopped.
It can also be not allocated in case of errors.

The current implementation leads to the following crash calling
ethstats on a port that is down or when calling it at the wrong moment:

ble to handle kernel NULL pointer dereference at virtual address 00000070
[00000070] *pgd=00000000
Internal error: Oops: 5 [#1] SMP ARM
Hardware name: Marvell Armada 380/385 (Device Tree)
PC is at page_pool_get_stats+0x18/0x1cc
LR is at mvneta_ethtool_get_stats+0xa0/0xe0 [mvneta]
pc : [<c0b413cc>]    lr : [<bf0a98d8>]    psr: a0000013
sp : f1439d48  ip : f1439dc0  fp : 0000001d
r10: 00000100  r9 : c4816b80  r8 : f0d75150
r7 : bf0b400c  r6 : c238f000  r5 : 00000000  r4 : f1439d68
r3 : c2091040  r2 : ffffffd8  r1 : f1439d68  r0 : 00000000
Flags: NzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
Control: 10c5387d  Table: 066b004a  DAC: 00000051
Register r0 information: NULL pointer
Register r1 information: 2-page vmalloc region starting at 0xf1438000 allocated at kernel_clone+0x9c/0x390
Register r2 information: non-paged memory
Register r3 information: slab kmalloc-2k start c2091000 pointer offset 64 size 2048
Register r4 information: 2-page vmalloc region starting at 0xf1438000 allocated at kernel_clone+0x9c/0x390
Register r5 information: NULL pointer
Register r6 information: slab kmalloc-cg-4k start c238f000 pointer offset 0 size 4096
Register r7 information: 15-page vmalloc region starting at 0xbf0a8000 allocated at load_module+0xa30/0x219c
Register r8 information: 1-page vmalloc region starting at 0xf0d75000 allocated at ethtool_get_stats+0x138/0x208
Register r9 information: slab task_struct start c4816b80 pointer offset 0
Register r10 information: non-paged memory
Register r11 information: non-paged memory
Register r12 information: 2-page vmalloc region starting at 0xf1438000 allocated at kernel_clone+0x9c/0x390
Process snmpd (pid: 733, stack limit = 0x38de3a88)
Stack: (0xf1439d48 to 0xf143a000)
9d40:                   000000c0 00000001 c238f000 bf0b400c f0d75150 c4816b80
9d60: 00000100 bf0a98d8 00000000 00000000 00000000 00000000 00000000 00000000
9d80: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
9da0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
9dc0: 00000dc0 5335509c 00000035 c238f000 bf0b2214 01067f50 f0d75000 c0b9b9c8
9de0: 0000001d 00000035 c2212094 5335509c c4816b80 c238f000 c5ad6e00 01067f50
9e00: c1b0be80 c4816b80 00014813 c0b9d7f0 00000000 00000000 0000001d 0000001d
9e20: 00000000 00001200 00000000 00000000 c216ed90 c73943b8 00000000 00000000
9e40: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
9e60: 00000000 c0ad9034 00000000 00000000 00000000 00000000 00000000 00000000
9e80: 00000000 00000000 00000000 5335509c c1b0be80 f1439ee4 00008946 c1b0be80
9ea0: 01067f50 f1439ee3 00000000 00000046 b6d77ae0 c0b383f0 00008946 becc83e8
9ec0: c1b0be80 00000051 0000000b c68ca480 c7172d00 c0ad8ff0 f1439ee3 cf600e40
9ee0: 01600e40 32687465 00000000 00000000 00000000 01067f50 00000000 00000000
9f00: 00000000 5335509c 00008946 00008946 00000000 c68ca480 becc83e8 c05e2de0
9f20: f1439fb0 c03002f0 00000006 5ac3c35a c4816b80 00000006 b6d77ae0 c030caf0
9f40: c4817350 00000014 f1439e1c 0000000c 00000000 00000051 01000000 00000014
9f60: 00003fec f1439edc 00000001 c0372abc b6d77ae0 c0372abc cf600e40 5335509c
9f80: c21e6800 01015c9c 0000000b 00008946 00000036 c03002f0 c4816b80 00000036
9fa0: b6d77ae0 c03000c0 01015c9c 0000000b 0000000b 00008946 becc83e8 00000000
9fc0: 01015c9c 0000000b 00008946 00000036 00000035 010678a0 b6d797ec b6d77ae0
9fe0: b6dbf738 becc838c b6d186d7 b6baa858 40000030 0000000b 00000000 00000000
 page_pool_get_stats from mvneta_ethtool_get_stats+0xa0/0xe0 [mvneta]
 mvneta_ethtool_get_stats [mvneta] from ethtool_get_stats+0x154/0x208
 ethtool_get_stats from dev_ethtool+0xf48/0x2480
 dev_ethtool from dev_ioctl+0x538/0x63c
 dev_ioctl from sock_ioctl+0x49c/0x53c
 sock_ioctl from sys_ioctl+0x134/0xbd8
 sys_ioctl from ret_fast_syscall+0x0/0x1c
Exception stack(0xf1439fa8 to 0xf1439ff0)
9fa0:                   01015c9c 0000000b 0000000b 00008946 becc83e8 00000000
9fc0: 01015c9c 0000000b 00008946 00000036 00000035 010678a0 b6d797ec b6d77ae0
9fe0: b6dbf738 becc838c b6d186d7 b6baa858
Code: e28dd004 e1a05000 e2514000 0a00006a (e5902070)

This commit adds the proper checks before calling page_pool_get_stats.

Fixes: b3fc79225f05 ("net: mvneta: add support for page_pool_get_stats")
Signed-off-by: Sven Auhagen <sven.auhagen@voleatech.de>
Reported-by: Paulo Da Silva <Paulo.DaSilva@kyberna.com>
Acked-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2023-11-13 11:06:59 +00:00
Shigeru Yoshida
fb317eb23b tipc: Fix kernel-infoleak due to uninitialized TLV value
KMSAN reported the following kernel-infoleak issue:

=====================================================
BUG: KMSAN: kernel-infoleak in instrument_copy_to_user include/linux/instrumented.h:114 [inline]
BUG: KMSAN: kernel-infoleak in copy_to_user_iter lib/iov_iter.c:24 [inline]
BUG: KMSAN: kernel-infoleak in iterate_ubuf include/linux/iov_iter.h:29 [inline]
BUG: KMSAN: kernel-infoleak in iterate_and_advance2 include/linux/iov_iter.h:245 [inline]
BUG: KMSAN: kernel-infoleak in iterate_and_advance include/linux/iov_iter.h:271 [inline]
BUG: KMSAN: kernel-infoleak in _copy_to_iter+0x4ec/0x2bc0 lib/iov_iter.c:186
 instrument_copy_to_user include/linux/instrumented.h:114 [inline]
 copy_to_user_iter lib/iov_iter.c:24 [inline]
 iterate_ubuf include/linux/iov_iter.h:29 [inline]
 iterate_and_advance2 include/linux/iov_iter.h:245 [inline]
 iterate_and_advance include/linux/iov_iter.h:271 [inline]
 _copy_to_iter+0x4ec/0x2bc0 lib/iov_iter.c:186
 copy_to_iter include/linux/uio.h:197 [inline]
 simple_copy_to_iter net/core/datagram.c:532 [inline]
 __skb_datagram_iter.5+0x148/0xe30 net/core/datagram.c:420
 skb_copy_datagram_iter+0x52/0x210 net/core/datagram.c:546
 skb_copy_datagram_msg include/linux/skbuff.h:3960 [inline]
 netlink_recvmsg+0x43d/0x1630 net/netlink/af_netlink.c:1967
 sock_recvmsg_nosec net/socket.c:1044 [inline]
 sock_recvmsg net/socket.c:1066 [inline]
 __sys_recvfrom+0x476/0x860 net/socket.c:2246
 __do_sys_recvfrom net/socket.c:2264 [inline]
 __se_sys_recvfrom net/socket.c:2260 [inline]
 __x64_sys_recvfrom+0x130/0x200 net/socket.c:2260
 do_syscall_x64 arch/x86/entry/common.c:51 [inline]
 do_syscall_64+0x44/0x110 arch/x86/entry/common.c:82
 entry_SYSCALL_64_after_hwframe+0x63/0x6b

Uninit was created at:
 slab_post_alloc_hook+0x103/0x9e0 mm/slab.h:768
 slab_alloc_node mm/slub.c:3478 [inline]
 kmem_cache_alloc_node+0x5f7/0xb50 mm/slub.c:3523
 kmalloc_reserve+0x13c/0x4a0 net/core/skbuff.c:560
 __alloc_skb+0x2fd/0x770 net/core/skbuff.c:651
 alloc_skb include/linux/skbuff.h:1286 [inline]
 tipc_tlv_alloc net/tipc/netlink_compat.c:156 [inline]
 tipc_get_err_tlv+0x90/0x5d0 net/tipc/netlink_compat.c:170
 tipc_nl_compat_recv+0x1042/0x15d0 net/tipc/netlink_compat.c:1324
 genl_family_rcv_msg_doit net/netlink/genetlink.c:972 [inline]
 genl_family_rcv_msg net/netlink/genetlink.c:1052 [inline]
 genl_rcv_msg+0x1220/0x12c0 net/netlink/genetlink.c:1067
 netlink_rcv_skb+0x4a4/0x6a0 net/netlink/af_netlink.c:2545
 genl_rcv+0x41/0x60 net/netlink/genetlink.c:1076
 netlink_unicast_kernel net/netlink/af_netlink.c:1342 [inline]
 netlink_unicast+0xf4b/0x1230 net/netlink/af_netlink.c:1368
 netlink_sendmsg+0x1242/0x1420 net/netlink/af_netlink.c:1910
 sock_sendmsg_nosec net/socket.c:730 [inline]
 __sock_sendmsg net/socket.c:745 [inline]
 ____sys_sendmsg+0x997/0xd60 net/socket.c:2588
 ___sys_sendmsg+0x271/0x3b0 net/socket.c:2642
 __sys_sendmsg net/socket.c:2671 [inline]
 __do_sys_sendmsg net/socket.c:2680 [inline]
 __se_sys_sendmsg net/socket.c:2678 [inline]
 __x64_sys_sendmsg+0x2fa/0x4a0 net/socket.c:2678
 do_syscall_x64 arch/x86/entry/common.c:51 [inline]
 do_syscall_64+0x44/0x110 arch/x86/entry/common.c:82
 entry_SYSCALL_64_after_hwframe+0x63/0x6b

Bytes 34-35 of 36 are uninitialized
Memory access of size 36 starts at ffff88802d464a00
Data copied to user address 00007ff55033c0a0

CPU: 0 PID: 30322 Comm: syz-executor.0 Not tainted 6.6.0-14500-g1c41041124bd #10
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-1.fc38 04/01/2014
=====================================================

tipc_add_tlv() puts TLV descriptor and value onto `skb`. This size is
calculated with TLV_SPACE() macro. It adds the size of struct tlv_desc and
the length of TLV value passed as an argument, and aligns the result to a
multiple of TLV_ALIGNTO, i.e., a multiple of 4 bytes.

If the size of struct tlv_desc plus the length of TLV value is not aligned,
the current implementation leaves the remaining bytes uninitialized. This
is the cause of the above kernel-infoleak issue.

This patch resolves this issue by clearing data up to an aligned size.

Fixes: d0796d1ef63d ("tipc: convert legacy nl bearer dump to nl compat")
Signed-off-by: Shigeru Yoshida <syoshida@redhat.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2023-11-13 11:05:47 +00:00
Willem de Bruijn
e6daf129cc net: gso_test: support CONFIG_MAX_SKB_FRAGS up to 45
The test allocs a single page to hold all the frag_list skbs. This
is insufficient on kernels with CONFIG_MAX_SKB_FRAGS=45, due to the
increased skb_shared_info frags[] array length.

        gso_test_func: ASSERTION FAILED at net/core/gso_test.c:210
        Expected alloc_size <= ((1UL) << 12), but
            alloc_size == 5075 (0x13d3)
            ((1UL) << 12) == 4096 (0x1000)

Simplify the logic. Just allocate a page for each frag_list skb.

Fixes: 4688ecb1385f ("net: expand skb_segment unit test with frag_list coverage")
Signed-off-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2023-11-13 11:04:38 +00:00
Marek Behún
438cbcdf10 net: mdio: fix typo in header
The quotes symbol in
  "EEE "link partner ability 1
should be at the end of the register name
  "EEE link partner ability 1"

Signed-off-by: Marek Behún <kabel@kernel.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
2023-11-13 11:02:30 +00:00
MD Danish Anwar
6979a51eca MAINTAINERS: add entry for TI ICSSG Ethernet driver
Add record for TI Industrial Communication Subsystem - Gigabit (ICSSG)
Ethernet driver.

Also add Roger and myself as maintainer.

Signed-off-by: MD Danish Anwar <danishanwar@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2023-11-13 11:00:52 +00:00
David S. Miller
5d64075cd8 Merge branch 'hns3-fixes'
Jijie Shao says:

====================
There are some bugfix for the HNS3 ethernet driver

There are some bugfix for the HNS3 ethernet driver

---
ChangeLog:
v1 -> v2:
  - net: hns3: fix add VLAN fail issue, net: hns3: fix VF reset fail issue
    are modified suggested by Paolo
  v1: https://lore.kernel.org/all/20231028025917.314305-1-shaojijie@huawei.com/
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
2023-11-13 09:06:58 +00:00
Jijie Shao
dff655e82f net: hns3: fix VF wrong speed and duplex issue
If PF is down, firmware will returns 10 Mbit/s rate and half-duplex mode
when PF queries the port information from firmware.

After imp reset command is executed, PF status changes to down,
and PF will query link status and updates port information
from firmware in a periodic scheduled task.

However, there is a low probability that port information is updated
when PF is down, and then PF link status changes to up.
In this case, PF synchronizes incorrect rate and duplex mode to VF.

This patch fixes it by updating port information before
PF synchronizes the rate and duplex to the VF
when PF changes to up.

Fixes: 18b6e31f8bf4 ("net: hns3: PF add support for pushing link status to VFs")
Signed-off-by: Jijie Shao <shaojijie@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2023-11-13 09:06:58 +00:00
Jijie Shao
65e98bb56f net: hns3: fix VF reset fail issue
Currently the reset process in hns3 and firmware watchdog init process is
asynchronous. We think firmware watchdog initialization is completed
before VF clear the interrupt source. However, firmware initialization
may not complete early. So VF will receive multiple reset interrupts
and fail to reset.

So we add delay before VF interrupt source and 5 ms delay
is enough to avoid second reset interrupt.

Fixes: 427900d27d86 ("net: hns3: fix the timing issue of VF clearing interrupt sources")
Signed-off-by: Jijie Shao <shaojijie@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2023-11-13 09:06:58 +00:00
Yonglong Liu
dbd2f3b20c net: hns3: fix variable may not initialized problem in hns3_init_mac_addr()
When a VF is calling hns3_init_mac_addr(), get_mac_addr() may
return fail, then the value of mac_addr_temp is not initialized.

Fixes: 76ad4f0ee747 ("net: hns3: Add support of HNS3 Ethernet Driver for hip08 SoC")
Signed-off-by: Yonglong Liu <liuyonglong@huawei.com>
Signed-off-by: Jijie Shao <shaojijie@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2023-11-13 09:06:58 +00:00
Yonglong Liu
53aba458f2 net: hns3: fix out-of-bounds access may occur when coalesce info is read via debugfs
The hns3 driver define an array of string to show the coalesce
info, but if the kernel adds a new mode or a new state,
out-of-bounds access may occur when coalesce info is read via
debugfs, this patch fix the problem.

Fixes: c99fead7cb07 ("net: hns3: add debugfs support for interrupt coalesce")
Signed-off-by: Yonglong Liu <liuyonglong@huawei.com>
Signed-off-by: Jijie Shao <shaojijie@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2023-11-13 09:06:58 +00:00
Jian Shen
75b247b57d net: hns3: fix incorrect capability bit display for copper port
Currently, the FEC capability bit is default set for device version V2.
It's incorrect for the copper port. Eventhough it doesn't make the nic
work abnormal, but the capability information display in debugfs may
confuse user. So clear it when driver get the port type inforamtion.

Fixes: 433ccce83504 ("net: hns3: use FEC capability queried from firmware")
Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Jijie Shao <shaojijie@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2023-11-13 09:06:58 +00:00
Yonglong Liu
ac92c0a9a0 net: hns3: add barrier in vf mailbox reply process
In hclgevf_mbx_handler() and hclgevf_get_mbx_resp() functions,
there is a typical store-store and load-load scenario between
received_resp and additional_info. This patch adds barrier
to fix the problem.

Fixes: 4671042f1ef0 ("net: hns3: add match_id to check mailbox response from PF to VF")
Signed-off-by: Yonglong Liu <liuyonglong@huawei.com>
Signed-off-by: Jijie Shao <shaojijie@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2023-11-13 09:06:58 +00:00
Jian Shen
472a2ff63e net: hns3: fix add VLAN fail issue
The hclge_sync_vlan_filter is called in periodic task,
trying to remove VLAN from vlan_del_fail_bmap. It can
be concurrence with VLAN adding operation from user.
So once user failed to delete a VLAN id, and add it
again soon, it may be removed by the periodic task,
which may cause the software configuration being
inconsistent with hardware. So add mutex handling
to avoid this.

     user                        hns3 driver

                                           periodic task
                                                │
  add vlan 10 ───── hns3_vlan_rx_add_vid        │
       │             (suppose success)          │
       │                                        │
  del vlan 10 ─────  hns3_vlan_rx_kill_vid      │
       │           (suppose fail,add to         │
       │             vlan_del_fail_bmap)        │
       │                                        │
  add vlan 10 ───── hns3_vlan_rx_add_vid        │
                     (suppose success)          │
                                       foreach vlan_del_fail_bmp
                                            del vlan 10

Fixes: fe4144d47eef ("net: hns3: sync VLAN filter entries when kill VLAN ID failed")
Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Jijie Shao <shaojijie@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2023-11-13 09:06:58 +00:00
Randy Dunlap
50e865a568 xen/shbuf: eliminate 17 kernel-doc warnings
Don't use kernel-doc markers ("/**") for comments that are not in
kernel-doc format. This prevents multiple kernel-doc warnings:

xen-front-pgdir-shbuf.c:25: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
 * This structure represents the structure of a shared page
xen-front-pgdir-shbuf.c:37: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
 * Shared buffer ops which are differently implemented
xen-front-pgdir-shbuf.c:65: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
 * Get granted reference to the very first page of the
xen-front-pgdir-shbuf.c:85: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
 * Map granted references of the shared buffer.
xen-front-pgdir-shbuf.c:106: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
 * Unmap granted references of the shared buffer.
xen-front-pgdir-shbuf.c:127: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
 * Free all the resources of the shared buffer.
xen-front-pgdir-shbuf.c:154: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
 * Get the number of pages the page directory consumes itself.
xen-front-pgdir-shbuf.c:164: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
 * Calculate the number of grant references needed to share the buffer
xen-front-pgdir-shbuf.c:176: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
 * Calculate the number of grant references needed to share the buffer
xen-front-pgdir-shbuf.c:194: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
 * Unmap the buffer previously mapped with grant references
xen-front-pgdir-shbuf.c:242: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
 * Map the buffer with grant references provided by the backend.
xen-front-pgdir-shbuf.c:324: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
 * Fill page directory with grant references to the pages of the
xen-front-pgdir-shbuf.c:354: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
 * Fill page directory with grant references to the pages of the
xen-front-pgdir-shbuf.c:393: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
 * Grant references to the frontend's buffer pages.
xen-front-pgdir-shbuf.c:422: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
 * Grant all the references needed to share the buffer.
xen-front-pgdir-shbuf.c:470: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
 * Allocate all required structures to mange shared buffer.
xen-front-pgdir-shbuf.c:510: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
 * Allocate a new instance of a shared buffer.

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: kernel test robot <lkp@intel.com>
Closes: lore.kernel.org/r/202311060203.yQrpPZhm-lkp@intel.com
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
Cc: xen-devel@lists.xenproject.org
Link: https://lore.kernel.org/r/20231106055631.21520-1-rdunlap@infradead.org
Signed-off-by: Juergen Gross <jgross@suse.com>
2023-11-13 08:14:42 +01:00
Roger Pau Monne
bfa993b355 acpi/processor: sanitize _OSC/_PDC capabilities for Xen dom0
The Processor capability bits notify ACPI of the OS capabilities, and
so ACPI can adjust the return of other Processor methods taking the OS
capabilities into account.

When Linux is running as a Xen dom0, the hypervisor is the entity
in charge of processor power management, and hence Xen needs to make
sure the capabilities reported by _OSC/_PDC match the capabilities of
the driver in Xen.

Introduce a small helper to sanitize the buffer when running as Xen
dom0.

When Xen supports HWP, this serves as the equivalent of commit
a21211672c9a ("ACPI / processor: Request native thermal interrupt
handling via _OSC") to avoid SMM crashes.  Xen will set bit
ACPI_PROC_CAP_COLLAB_PROC_PERF (bit 12) in the capability bits and the
_OSC/_PDC call will apply it.

[ jandryuk: Mention Xen HWP's need.  Support _OSC & _PDC ]
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Cc: stable@vger.kernel.org
Signed-off-by: Jason Andryuk <jandryuk@gmail.com>
Reviewed-by: Michal Wilczynski <michal.wilczynski@intel.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20231108212517.72279-1-jandryuk@gmail.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2023-11-13 07:22:00 +01:00
Juergen Gross
e64e7c74b9 xen/events: avoid using info_for_irq() in xen_send_IPI_one()
xen_send_IPI_one() is being used by cpuhp_report_idle_dead() after
it calls rcu_report_dead(), meaning that any RCU usage by
xen_send_IPI_one() is a bad idea.

Unfortunately xen_send_IPI_one() is using notify_remote_via_irq()
today, which is using irq_get_chip_data() via info_for_irq(). And
irq_get_chip_data() in turn is using a maple-tree lookup requiring
RCU.

Avoid this problem by caching the ipi event channels in another
percpu variable, allowing the use notify_remote_via_evtchn() in
xen_send_IPI_one().

Fixes: 721255b9826b ("genirq: Use a maple tree for interrupt descriptor management")
Reported-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Juergen Gross <jgross@suse.com>
Tested-by: David Woodhouse <dwmw@amazon.co.uk>
Acked-by: Stefano Stabellini <sstabellini@kernel.org>
Signed-off-by: Juergen Gross <jgross@suse.com>
2023-11-13 07:22:00 +01:00
Jan Kiszka
2bd5b559a1 net: ti: icssg-prueth: Fix error cleanup on failing pruss_request_mem_region
We were just continuing in this case, surely not desired.

Fixes: 128d5874c082 ("net: ti: icssg-prueth: Add ICSSG ethernet driver")
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Reviewed-by: Wojciech Drewek <wojciech.drewek@intel.com>
Reviewed-by: Roger Quadros <rogerq@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2023-11-11 19:49:21 +00:00
Jan Kiszka
e409d73466 net: ti: icssg-prueth: Add missing icss_iep_put to error path
Analogously to prueth_remove, just also taking care for NULL'ing the
iep pointers.

Fixes: 186734c15886 ("net: ti: icssg-prueth: add packet timestamping and ptp support")
Fixes: 443a2367ba3c ("net: ti: icssg-prueth: am65x SR2.0 add 10M full duplex support")
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Reviewed-by: Wojciech Drewek <wojciech.drewek@intel.com>
Reviewed-by: MD Danish Anwar <danishanwar@ti.com>
Reviewed-by: Roger Quadros <rogerq@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2023-11-11 19:48:37 +00:00
Shigeru Yoshida
719639853d tty: Fix uninit-value access in ppp_sync_receive()
KMSAN reported the following uninit-value access issue:

=====================================================
BUG: KMSAN: uninit-value in ppp_sync_input drivers/net/ppp/ppp_synctty.c:690 [inline]
BUG: KMSAN: uninit-value in ppp_sync_receive+0xdc9/0xe70 drivers/net/ppp/ppp_synctty.c:334
 ppp_sync_input drivers/net/ppp/ppp_synctty.c:690 [inline]
 ppp_sync_receive+0xdc9/0xe70 drivers/net/ppp/ppp_synctty.c:334
 tiocsti+0x328/0x450 drivers/tty/tty_io.c:2295
 tty_ioctl+0x808/0x1920 drivers/tty/tty_io.c:2694
 vfs_ioctl fs/ioctl.c:51 [inline]
 __do_sys_ioctl fs/ioctl.c:871 [inline]
 __se_sys_ioctl+0x211/0x400 fs/ioctl.c:857
 __x64_sys_ioctl+0x97/0xe0 fs/ioctl.c:857
 do_syscall_x64 arch/x86/entry/common.c:51 [inline]
 do_syscall_64+0x44/0x110 arch/x86/entry/common.c:82
 entry_SYSCALL_64_after_hwframe+0x63/0x6b

Uninit was created at:
 __alloc_pages+0x75d/0xe80 mm/page_alloc.c:4591
 __alloc_pages_node include/linux/gfp.h:238 [inline]
 alloc_pages_node include/linux/gfp.h:261 [inline]
 __page_frag_cache_refill+0x9a/0x2c0 mm/page_alloc.c:4691
 page_frag_alloc_align+0x91/0x5d0 mm/page_alloc.c:4722
 page_frag_alloc include/linux/gfp.h:322 [inline]
 __netdev_alloc_skb+0x215/0x6d0 net/core/skbuff.c:728
 netdev_alloc_skb include/linux/skbuff.h:3225 [inline]
 dev_alloc_skb include/linux/skbuff.h:3238 [inline]
 ppp_sync_input drivers/net/ppp/ppp_synctty.c:669 [inline]
 ppp_sync_receive+0x237/0xe70 drivers/net/ppp/ppp_synctty.c:334
 tiocsti+0x328/0x450 drivers/tty/tty_io.c:2295
 tty_ioctl+0x808/0x1920 drivers/tty/tty_io.c:2694
 vfs_ioctl fs/ioctl.c:51 [inline]
 __do_sys_ioctl fs/ioctl.c:871 [inline]
 __se_sys_ioctl+0x211/0x400 fs/ioctl.c:857
 __x64_sys_ioctl+0x97/0xe0 fs/ioctl.c:857
 do_syscall_x64 arch/x86/entry/common.c:51 [inline]
 do_syscall_64+0x44/0x110 arch/x86/entry/common.c:82
 entry_SYSCALL_64_after_hwframe+0x63/0x6b

CPU: 0 PID: 12950 Comm: syz-executor.1 Not tainted 6.6.0-14500-g1c41041124bd #10
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-1.fc38 04/01/2014
=====================================================

ppp_sync_input() checks the first 2 bytes of the data are PPP_ALLSTATIONS
and PPP_UI. However, if the data length is 1 and the first byte is
PPP_ALLSTATIONS, an access to an uninitialized value occurs when checking
PPP_UI. This patch resolves this issue by checking the data length.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Shigeru Yoshida <syoshida@redhat.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2023-11-10 11:09:53 +00:00
Eric Dumazet
18f039428c ipvlan: add ipvlan_route_v6_outbound() helper
Inspired by syzbot reports using a stack of multiple ipvlan devices.

Reduce stack size needed in ipvlan_process_v6_outbound() by moving
the flowi6 struct used for the route lookup in an non inlined
helper. ipvlan_route_v6_outbound() needs 120 bytes on the stack,
immediately reclaimed.

Also make sure ipvlan_process_v4_outbound() is not inlined.

We might also have to lower MAX_NEST_DEV, because only syzbot uses
setups with more than four stacked devices.

BUG: TASK stack guard page was hit at ffffc9000e803ff8 (stack is ffffc9000e804000..ffffc9000e808000)
stack guard page: 0000 [#1] SMP KASAN
CPU: 0 PID: 13442 Comm: syz-executor.4 Not tainted 6.1.52-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/09/2023
RIP: 0010:kasan_check_range+0x4/0x2a0 mm/kasan/generic.c:188
Code: 48 01 c6 48 89 c7 e8 db 4e c1 03 31 c0 5d c3 cc 0f 0b eb 02 0f 0b b8 ea ff ff ff 5d c3 cc 00 00 cc cc 00 00 cc cc 55 48 89 e5 <41> 57 41 56 41 55 41 54 53 b0 01 48 85 f6 0f 84 a4 01 00 00 48 89
RSP: 0018:ffffc9000e804000 EFLAGS: 00010246
RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffffff817e5bf2
RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffffffff887c6568
RBP: ffffc9000e804000 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: dffffc0000000001 R12: 1ffff92001d0080c
R13: dffffc0000000000 R14: ffffffff87e6b100 R15: 0000000000000000
FS: 00007fd0c55826c0(0000) GS:ffff8881f6800000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: ffffc9000e803ff8 CR3: 0000000170ef7000 CR4: 00000000003506f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
<#DF>
</#DF>
<TASK>
[<ffffffff81f281d1>] __kasan_check_read+0x11/0x20 mm/kasan/shadow.c:31
[<ffffffff817e5bf2>] instrument_atomic_read include/linux/instrumented.h:72 [inline]
[<ffffffff817e5bf2>] _test_bit include/asm-generic/bitops/instrumented-non-atomic.h:141 [inline]
[<ffffffff817e5bf2>] cpumask_test_cpu include/linux/cpumask.h:506 [inline]
[<ffffffff817e5bf2>] cpu_online include/linux/cpumask.h:1092 [inline]
[<ffffffff817e5bf2>] trace_lock_acquire include/trace/events/lock.h:24 [inline]
[<ffffffff817e5bf2>] lock_acquire+0xe2/0x590 kernel/locking/lockdep.c:5632
[<ffffffff8563221e>] rcu_lock_acquire+0x2e/0x40 include/linux/rcupdate.h:306
[<ffffffff8561464d>] rcu_read_lock include/linux/rcupdate.h:747 [inline]
[<ffffffff8561464d>] ip6_pol_route+0x15d/0x1440 net/ipv6/route.c:2221
[<ffffffff85618120>] ip6_pol_route_output+0x50/0x80 net/ipv6/route.c:2606
[<ffffffff856f65b5>] pol_lookup_func include/net/ip6_fib.h:584 [inline]
[<ffffffff856f65b5>] fib6_rule_lookup+0x265/0x620 net/ipv6/fib6_rules.c:116
[<ffffffff85618009>] ip6_route_output_flags_noref+0x2d9/0x3a0 net/ipv6/route.c:2638
[<ffffffff8561821a>] ip6_route_output_flags+0xca/0x340 net/ipv6/route.c:2651
[<ffffffff838bd5a3>] ip6_route_output include/net/ip6_route.h:100 [inline]
[<ffffffff838bd5a3>] ipvlan_process_v6_outbound drivers/net/ipvlan/ipvlan_core.c:473 [inline]
[<ffffffff838bd5a3>] ipvlan_process_outbound drivers/net/ipvlan/ipvlan_core.c:529 [inline]
[<ffffffff838bd5a3>] ipvlan_xmit_mode_l3 drivers/net/ipvlan/ipvlan_core.c:602 [inline]
[<ffffffff838bd5a3>] ipvlan_queue_xmit+0xc33/0x1be0 drivers/net/ipvlan/ipvlan_core.c:677
[<ffffffff838c2909>] ipvlan_start_xmit+0x49/0x100 drivers/net/ipvlan/ipvlan_main.c:229
[<ffffffff84d03900>] netdev_start_xmit include/linux/netdevice.h:4966 [inline]
[<ffffffff84d03900>] xmit_one net/core/dev.c:3644 [inline]
[<ffffffff84d03900>] dev_hard_start_xmit+0x320/0x980 net/core/dev.c:3660
[<ffffffff84d080e2>] __dev_queue_xmit+0x16b2/0x3370 net/core/dev.c:4324
[<ffffffff855ce4cd>] dev_queue_xmit include/linux/netdevice.h:3067 [inline]
[<ffffffff855ce4cd>] neigh_hh_output include/net/neighbour.h:529 [inline]
[<ffffffff855ce4cd>] neigh_output include/net/neighbour.h:543 [inline]
[<ffffffff855ce4cd>] ip6_finish_output2+0x160d/0x1ae0 net/ipv6/ip6_output.c:139
[<ffffffff855b8616>] __ip6_finish_output net/ipv6/ip6_output.c:200 [inline]
[<ffffffff855b8616>] ip6_finish_output+0x6c6/0xb10 net/ipv6/ip6_output.c:211
[<ffffffff855b7e3c>] NF_HOOK_COND include/linux/netfilter.h:298 [inline]
[<ffffffff855b7e3c>] ip6_output+0x2bc/0x3d0 net/ipv6/ip6_output.c:232
[<ffffffff8575d27f>] dst_output include/net/dst.h:444 [inline]
[<ffffffff8575d27f>] ip6_local_out+0x10f/0x140 net/ipv6/output_core.c:161
[<ffffffff838bdae4>] ipvlan_process_v6_outbound drivers/net/ipvlan/ipvlan_core.c:483 [inline]
[<ffffffff838bdae4>] ipvlan_process_outbound drivers/net/ipvlan/ipvlan_core.c:529 [inline]
[<ffffffff838bdae4>] ipvlan_xmit_mode_l3 drivers/net/ipvlan/ipvlan_core.c:602 [inline]
[<ffffffff838bdae4>] ipvlan_queue_xmit+0x1174/0x1be0 drivers/net/ipvlan/ipvlan_core.c:677
[<ffffffff838c2909>] ipvlan_start_xmit+0x49/0x100 drivers/net/ipvlan/ipvlan_main.c:229
[<ffffffff84d03900>] netdev_start_xmit include/linux/netdevice.h:4966 [inline]
[<ffffffff84d03900>] xmit_one net/core/dev.c:3644 [inline]
[<ffffffff84d03900>] dev_hard_start_xmit+0x320/0x980 net/core/dev.c:3660
[<ffffffff84d080e2>] __dev_queue_xmit+0x16b2/0x3370 net/core/dev.c:4324
[<ffffffff855ce4cd>] dev_queue_xmit include/linux/netdevice.h:3067 [inline]
[<ffffffff855ce4cd>] neigh_hh_output include/net/neighbour.h:529 [inline]
[<ffffffff855ce4cd>] neigh_output include/net/neighbour.h:543 [inline]
[<ffffffff855ce4cd>] ip6_finish_output2+0x160d/0x1ae0 net/ipv6/ip6_output.c:139
[<ffffffff855b8616>] __ip6_finish_output net/ipv6/ip6_output.c:200 [inline]
[<ffffffff855b8616>] ip6_finish_output+0x6c6/0xb10 net/ipv6/ip6_output.c:211
[<ffffffff855b7e3c>] NF_HOOK_COND include/linux/netfilter.h:298 [inline]
[<ffffffff855b7e3c>] ip6_output+0x2bc/0x3d0 net/ipv6/ip6_output.c:232
[<ffffffff8575d27f>] dst_output include/net/dst.h:444 [inline]
[<ffffffff8575d27f>] ip6_local_out+0x10f/0x140 net/ipv6/output_core.c:161
[<ffffffff838bdae4>] ipvlan_process_v6_outbound drivers/net/ipvlan/ipvlan_core.c:483 [inline]
[<ffffffff838bdae4>] ipvlan_process_outbound drivers/net/ipvlan/ipvlan_core.c:529 [inline]
[<ffffffff838bdae4>] ipvlan_xmit_mode_l3 drivers/net/ipvlan/ipvlan_core.c:602 [inline]
[<ffffffff838bdae4>] ipvlan_queue_xmit+0x1174/0x1be0 drivers/net/ipvlan/ipvlan_core.c:677
[<ffffffff838c2909>] ipvlan_start_xmit+0x49/0x100 drivers/net/ipvlan/ipvlan_main.c:229
[<ffffffff84d03900>] netdev_start_xmit include/linux/netdevice.h:4966 [inline]
[<ffffffff84d03900>] xmit_one net/core/dev.c:3644 [inline]
[<ffffffff84d03900>] dev_hard_start_xmit+0x320/0x980 net/core/dev.c:3660
[<ffffffff84d080e2>] __dev_queue_xmit+0x16b2/0x3370 net/core/dev.c:4324
[<ffffffff855ce4cd>] dev_queue_xmit include/linux/netdevice.h:3067 [inline]
[<ffffffff855ce4cd>] neigh_hh_output include/net/neighbour.h:529 [inline]
[<ffffffff855ce4cd>] neigh_output include/net/neighbour.h:543 [inline]
[<ffffffff855ce4cd>] ip6_finish_output2+0x160d/0x1ae0 net/ipv6/ip6_output.c:139
[<ffffffff855b8616>] __ip6_finish_output net/ipv6/ip6_output.c:200 [inline]
[<ffffffff855b8616>] ip6_finish_output+0x6c6/0xb10 net/ipv6/ip6_output.c:211
[<ffffffff855b7e3c>] NF_HOOK_COND include/linux/netfilter.h:298 [inline]
[<ffffffff855b7e3c>] ip6_output+0x2bc/0x3d0 net/ipv6/ip6_output.c:232
[<ffffffff8575d27f>] dst_output include/net/dst.h:444 [inline]
[<ffffffff8575d27f>] ip6_local_out+0x10f/0x140 net/ipv6/output_core.c:161
[<ffffffff838bdae4>] ipvlan_process_v6_outbound drivers/net/ipvlan/ipvlan_core.c:483 [inline]
[<ffffffff838bdae4>] ipvlan_process_outbound drivers/net/ipvlan/ipvlan_core.c:529 [inline]
[<ffffffff838bdae4>] ipvlan_xmit_mode_l3 drivers/net/ipvlan/ipvlan_core.c:602 [inline]
[<ffffffff838bdae4>] ipvlan_queue_xmit+0x1174/0x1be0 drivers/net/ipvlan/ipvlan_core.c:677
[<ffffffff838c2909>] ipvlan_start_xmit+0x49/0x100 drivers/net/ipvlan/ipvlan_main.c:229
[<ffffffff84d03900>] netdev_start_xmit include/linux/netdevice.h:4966 [inline]
[<ffffffff84d03900>] xmit_one net/core/dev.c:3644 [inline]
[<ffffffff84d03900>] dev_hard_start_xmit+0x320/0x980 net/core/dev.c:3660
[<ffffffff84d080e2>] __dev_queue_xmit+0x16b2/0x3370 net/core/dev.c:4324
[<ffffffff855ce4cd>] dev_queue_xmit include/linux/netdevice.h:3067 [inline]
[<ffffffff855ce4cd>] neigh_hh_output include/net/neighbour.h:529 [inline]
[<ffffffff855ce4cd>] neigh_output include/net/neighbour.h:543 [inline]
[<ffffffff855ce4cd>] ip6_finish_output2+0x160d/0x1ae0 net/ipv6/ip6_output.c:139
[<ffffffff855b8616>] __ip6_finish_output net/ipv6/ip6_output.c:200 [inline]
[<ffffffff855b8616>] ip6_finish_output+0x6c6/0xb10 net/ipv6/ip6_output.c:211
[<ffffffff855b7e3c>] NF_HOOK_COND include/linux/netfilter.h:298 [inline]
[<ffffffff855b7e3c>] ip6_output+0x2bc/0x3d0 net/ipv6/ip6_output.c:232
[<ffffffff8575d27f>] dst_output include/net/dst.h:444 [inline]
[<ffffffff8575d27f>] ip6_local_out+0x10f/0x140 net/ipv6/output_core.c:161
[<ffffffff838bdae4>] ipvlan_process_v6_outbound drivers/net/ipvlan/ipvlan_core.c:483 [inline]
[<ffffffff838bdae4>] ipvlan_process_outbound drivers/net/ipvlan/ipvlan_core.c:529 [inline]
[<ffffffff838bdae4>] ipvlan_xmit_mode_l3 drivers/net/ipvlan/ipvlan_core.c:602 [inline]
[<ffffffff838bdae4>] ipvlan_queue_xmit+0x1174/0x1be0 drivers/net/ipvlan/ipvlan_core.c:677
[<ffffffff838c2909>] ipvlan_start_xmit+0x49/0x100 drivers/net/ipvlan/ipvlan_main.c:229
[<ffffffff84d03900>] netdev_start_xmit include/linux/netdevice.h:4966 [inline]
[<ffffffff84d03900>] xmit_one net/core/dev.c:3644 [inline]
[<ffffffff84d03900>] dev_hard_start_xmit+0x320/0x980 net/core/dev.c:3660
[<ffffffff84d080e2>] __dev_queue_xmit+0x16b2/0x3370 net/core/dev.c:4324
[<ffffffff84d4a65e>] dev_queue_xmit include/linux/netdevice.h:3067 [inline]
[<ffffffff84d4a65e>] neigh_resolve_output+0x64e/0x750 net/core/neighbour.c:1560
[<ffffffff855ce503>] neigh_output include/net/neighbour.h:545 [inline]
[<ffffffff855ce503>] ip6_finish_output2+0x1643/0x1ae0 net/ipv6/ip6_output.c:139
[<ffffffff855b8616>] __ip6_finish_output net/ipv6/ip6_output.c:200 [inline]
[<ffffffff855b8616>] ip6_finish_output+0x6c6/0xb10 net/ipv6/ip6_output.c:211
[<ffffffff855b7e3c>] NF_HOOK_COND include/linux/netfilter.h:298 [inline]
[<ffffffff855b7e3c>] ip6_output+0x2bc/0x3d0 net/ipv6/ip6_output.c:232
[<ffffffff855b9ce4>] dst_output include/net/dst.h:444 [inline]
[<ffffffff855b9ce4>] NF_HOOK include/linux/netfilter.h:309 [inline]
[<ffffffff855b9ce4>] ip6_xmit+0x11a4/0x1b20 net/ipv6/ip6_output.c:352
[<ffffffff8597984e>] sctp_v6_xmit+0x9ae/0x1230 net/sctp/ipv6.c:250
[<ffffffff8594623e>] sctp_packet_transmit+0x25de/0x2bc0 net/sctp/output.c:653
[<ffffffff858f5142>] sctp_packet_singleton+0x202/0x310 net/sctp/outqueue.c:783
[<ffffffff858ea411>] sctp_outq_flush_ctrl net/sctp/outqueue.c:914 [inline]
[<ffffffff858ea411>] sctp_outq_flush+0x661/0x3d40 net/sctp/outqueue.c:1212
[<ffffffff858f02f9>] sctp_outq_uncork+0x79/0xb0 net/sctp/outqueue.c:764
[<ffffffff8589f060>] sctp_side_effects net/sctp/sm_sideeffect.c:1199 [inline]
[<ffffffff8589f060>] sctp_do_sm+0x55c0/0x5c30 net/sctp/sm_sideeffect.c:1170
[<ffffffff85941567>] sctp_primitive_ASSOCIATE+0x97/0xc0 net/sctp/primitive.c:73
[<ffffffff859408b2>] sctp_sendmsg_to_asoc+0xf62/0x17b0 net/sctp/socket.c:1839
[<ffffffff85910b5e>] sctp_sendmsg+0x212e/0x33b0 net/sctp/socket.c:2029
[<ffffffff8544d559>] inet_sendmsg+0x149/0x310 net/ipv4/af_inet.c:849
[<ffffffff84c6c4d2>] sock_sendmsg_nosec net/socket.c:716 [inline]
[<ffffffff84c6c4d2>] sock_sendmsg net/socket.c:736 [inline]
[<ffffffff84c6c4d2>] ____sys_sendmsg+0x572/0x8c0 net/socket.c:2504
[<ffffffff84c6ca91>] ___sys_sendmsg net/socket.c:2558 [inline]
[<ffffffff84c6ca91>] __sys_sendmsg+0x271/0x360 net/socket.c:2587
[<ffffffff84c6cbff>] __do_sys_sendmsg net/socket.c:2596 [inline]
[<ffffffff84c6cbff>] __se_sys_sendmsg net/socket.c:2594 [inline]
[<ffffffff84c6cbff>] __x64_sys_sendmsg+0x7f/0x90 net/socket.c:2594
[<ffffffff85b32553>] do_syscall_x64 arch/x86/entry/common.c:51 [inline]
[<ffffffff85b32553>] do_syscall_64+0x53/0x80 arch/x86/entry/common.c:84
[<ffffffff85c00087>] entry_SYSCALL_64_after_hwframe+0x63/0xcd

Fixes: 2ad7bf363841 ("ipvlan: Initial check-in of the IPVLAN driver.")
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Mahesh Bandewar <maheshb@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2023-11-10 10:59:22 +00:00
Ravi Gunasekaran
cbe9e68e1e MAINTAINERS: net: Update reviewers for TI's Ethernet drivers
Grygorii is no longer associated with TI and messages addressed to
him bounce.

Add Siddharth, Roger and myself as reviewers.

Signed-off-by: Ravi Gunasekaran <r-gunasekaran@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2023-11-10 10:57:52 +00:00
Stanislav Fomichev
871019b22d net: set SOCK_RCU_FREE before inserting socket into hashtable
We've started to see the following kernel traces:

 WARNING: CPU: 83 PID: 0 at net/core/filter.c:6641 sk_lookup+0x1bd/0x1d0

 Call Trace:
  <IRQ>
  __bpf_skc_lookup+0x10d/0x120
  bpf_sk_lookup+0x48/0xd0
  bpf_sk_lookup_tcp+0x19/0x20
  bpf_prog_<redacted>+0x37c/0x16a3
  cls_bpf_classify+0x205/0x2e0
  tcf_classify+0x92/0x160
  __netif_receive_skb_core+0xe52/0xf10
  __netif_receive_skb_list_core+0x96/0x2b0
  napi_complete_done+0x7b5/0xb70
  <redacted>_poll+0x94/0xb0
  net_rx_action+0x163/0x1d70
  __do_softirq+0xdc/0x32e
  asm_call_irq_on_stack+0x12/0x20
  </IRQ>
  do_softirq_own_stack+0x36/0x50
  do_softirq+0x44/0x70

__inet_hash can race with lockless (rcu) readers on the other cpus:

  __inet_hash
    __sk_nulls_add_node_rcu
    <- (bpf triggers here)
    sock_set_flag(SOCK_RCU_FREE)

Let's move the SOCK_RCU_FREE part up a bit, before we are inserting
the socket into hashtables. Note, that the race is really harmless;
the bpf callers are handling this situation (where listener socket
doesn't have SOCK_RCU_FREE set) correctly, so the only
annoyance is a WARN_ONCE.

More details from Eric regarding SOCK_RCU_FREE timeline:

Commit 3b24d854cb35 ("tcp/dccp: do not touch listener sk_refcnt under
synflood") added SOCK_RCU_FREE. At that time, the precise location of
sock_set_flag(sk, SOCK_RCU_FREE) did not matter, because the thread calling
__inet_hash() owns a reference on sk. SOCK_RCU_FREE was only tested
at dismantle time.

Commit 6acc9b432e67 ("bpf: Add helper to retrieve socket in BPF")
started checking SOCK_RCU_FREE _after_ the lookup to infer whether
the refcount has been taken care of.

Fixes: 6acc9b432e67 ("bpf: Add helper to retrieve socket in BPF")
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2023-11-10 08:37:09 +00:00
Yuran Pereira
8a4f030dbc ptp: Fixes a null pointer dereference in ptp_ioctl
Syzkaller found a null pointer dereference in ptp_ioctl
originating from the lack of a null check for tsevq.

```
general protection fault, probably for non-canonical
	address 0xdffffc000000020b: 0000 [#1] PREEMPT SMP KASAN
KASAN: probably user-memory-access in range
	[0x0000000000001058-0x000000000000105f]
CPU: 0 PID: 5053 Comm: syz-executor353 Not tainted
	6.6.0-syzkaller-10396-g4652b8e4f3ff #0
Hardware name: Google Google Compute Engine/Google Compute Engine,
	BIOS Google 10/09/2023
RIP: 0010:ptp_ioctl+0xcb7/0x1d10 drivers/ptp/ptp_chardev.c:476
...
Call Trace:
 <TASK>
 posix_clock_ioctl+0xf8/0x160 kernel/time/posix-clock.c:86
 vfs_ioctl fs/ioctl.c:51 [inline]
 __do_sys_ioctl fs/ioctl.c:871 [inline]
 __se_sys_ioctl fs/ioctl.c:857 [inline]
 __x64_sys_ioctl+0x18f/0x210 fs/ioctl.c:857
 do_syscall_x64 arch/x86/entry/common.c:51 [inline]
 do_syscall_64+0x3f/0x110 arch/x86/entry/common.c:82
 entry_SYSCALL_64_after_hwframe+0x63/0x6b
```

This patch fixes the issue by adding a check for tsevq and
ensuring ptp_ioctl returns with an error if tsevq is null.

Reported-by: syzbot+8a78ecea7ac1a2ea26e5@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=8a78ecea7ac1a2ea26e5
Fixes: c5a445b1e934 ("ptp: support event queue reader channel masks")
Signed-off-by: Yuran Pereira <yuran.pereira@hotmail.com>
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2023-11-10 08:35:59 +00:00
Andrii Nakryiko
e2e57d637a selftests/bpf: add more test cases for check_cfg()
Add a few more simple cases to validate proper privileged vs unprivileged
loop detection behavior. conditional_loop2 is the one reported by Hao
Sun that triggered this set of fixes.

Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Suggested-by: Hao Sun <sunhao.th@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231110061412.2995786-2-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-11-09 22:57:25 -08:00
Andrii Nakryiko
10e14e9652 bpf: fix control-flow graph checking in privileged mode
When BPF program is verified in privileged mode, BPF verifier allows
bounded loops. This means that from CFG point of view there are
definitely some back-edges. Original commit adjusted check_cfg() logic
to not detect back-edges in control flow graph if they are resulting
from conditional jumps, which the idea that subsequent full BPF
verification process will determine whether such loops are bounded or
not, and either accept or reject the BPF program. At least that's my
reading of the intent.

Unfortunately, the implementation of this idea doesn't work correctly in
all possible situations. Conditional jump might not result in immediate
back-edge, but just a few unconditional instructions later we can arrive
at back-edge. In such situations check_cfg() would reject BPF program
even in privileged mode, despite it might be bounded loop. Next patch
adds one simple program demonstrating such scenario.

To keep things simple, instead of trying to detect back edges in
privileged mode, just assume every back edge is valid and let subsequent
BPF verification prove or reject bounded loops.

Note a few test changes. For unknown reason, we have a few tests that
are specified to detect a back-edge in a privileged mode, but looking at
their code it seems like the right outcome is passing check_cfg() and
letting subsequent verification to make a decision about bounded or not
bounded looping.

Bounded recursion case is also interesting. The example should pass, as
recursion is limited to just a few levels and so we never reach maximum
number of nested frames and never exhaust maximum stack depth. But the
way that max stack depth logic works today it falsely detects this as
exceeding max nested frame count. This patch series doesn't attempt to
fix this orthogonal problem, so we just adjust expected verifier failure.

Suggested-by: Alexei Starovoitov <ast@kernel.org>
Fixes: 2589726d12a1 ("bpf: introduce bounded loops")
Reported-by: Hao Sun <sunhao.th@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231110061412.2995786-1-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-11-09 22:57:24 -08:00
Alexei Starovoitov
8c74b27f4b Merge branch 'bpf-control-flow-graph-and-precision-backtrack-fixes'
Andrii Nakryiko says:

====================
BPF control flow graph and precision backtrack fixes

A small fix to BPF verifier's CFG logic around handling and reporting ldimm64
instructions. Patch #1 was previously submitted separately ([0]), and so this
patch set supersedes that patch.

Second patch is fixing obscure corner case in mark_chain_precise() logic. See
patch for details. Patch #3 adds a dedicated test, however fragile it might.

  [0] https://patchwork.kernel.org/project/netdevbpf/patch/20231101205626.119243-1-andrii@kernel.org/
====================

Link: https://lore.kernel.org/r/20231110002638.4168352-1-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-11-09 20:11:20 -08:00
Andrii Nakryiko
62ccdb11d3 selftests/bpf: add edge case backtracking logic test
Add a dedicated selftests to try to set up conditions to have a state
with same first and last instruction index, but it actually is a loop
3->4->1->2->3. This confuses mark_chain_precision() if verifier doesn't
take into account jump history.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231110002638.4168352-4-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-11-09 20:11:20 -08:00
Andrii Nakryiko
4bb7ea946a bpf: fix precision backtracking instruction iteration
Fix an edge case in __mark_chain_precision() which prematurely stops
backtracking instructions in a state if it happens that state's first
and last instruction indexes are the same. This situations doesn't
necessarily mean that there were no instructions simulated in a state,
but rather that we starting from the instruction, jumped around a bit,
and then ended up at the same instruction before checkpointing or
marking precision.

To distinguish between these two possible situations, we need to consult
jump history. If it's empty or contain a single record "bridging" parent
state and first instruction of processed state, then we indeed
backtracked all instructions in this state. But if history is not empty,
we are definitely not done yet.

Move this logic inside get_prev_insn_idx() to contain it more nicely.
Use -ENOENT return code to denote "we are out of instructions"
situation.

This bug was exposed by verifier_loop1.c's bounded_recursion subtest, once
the next fix in this patch set is applied.

Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Fixes: b5dc0163d8fd ("bpf: precise scalar_value tracking")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231110002638.4168352-3-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-11-09 20:11:20 -08:00
Andrii Nakryiko
3feb263bb5 bpf: handle ldimm64 properly in check_cfg()
ldimm64 instructions are 16-byte long, and so have to be handled
appropriately in check_cfg(), just like the rest of BPF verifier does.

This has implications in three places:
  - when determining next instruction for non-jump instructions;
  - when determining next instruction for callback address ldimm64
    instructions (in visit_func_call_insn());
  - when checking for unreachable instructions, where second half of
    ldimm64 is expected to be unreachable;

We take this also as an opportunity to report jump into the middle of
ldimm64. And adjust few test_verifier tests accordingly.

Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Reported-by: Hao Sun <sunhao.th@gmail.com>
Fixes: 475fb78fbf48 ("bpf: verifier (add branch/goto checks)")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231110002638.4168352-2-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-11-09 20:11:20 -08:00
Anders Roxell
fe69a1b1b6 selftests: bpf: xskxceiver: ksft_print_msg: fix format type error
Crossbuilding selftests/bpf for architecture arm64, format specifies
type error show up like.

xskxceiver.c:912:34: error: format specifies type 'int' but the argument
has type '__u64' (aka 'unsigned long long') [-Werror,-Wformat]
 ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%d]\n",
                                                                ~~
                                                                %llu
                __func__, pkt->pkt_nb, meta->count);
                                       ^~~~~~~~~~~
xskxceiver.c:929:55: error: format specifies type 'unsigned long long' but
 the argument has type 'u64' (aka 'unsigned long') [-Werror,-Wformat]
 ksft_print_msg("Frag invalid addr: %llx len: %u\n", addr, len);
                                    ~~~~             ^~~~

Fixing the issues by casting to (unsigned long long) and changing the
specifiers to be %llu from %d and %u, since with u64s it might be %llx
or %lx, depending on architecture.

Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
Link: https://lore.kernel.org/r/20231109174328.1774571-1-anders.roxell@linaro.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-11-09 19:18:12 -08:00
Dan Carpenter
e07754e0a1 vhost-vdpa: fix use after free in vhost_vdpa_probe()
The put_device() calls vhost_vdpa_release_dev() which calls
ida_simple_remove() and frees "v".  So this call to
ida_simple_remove() is a use after free and a double free.

Fixes: ebe6a354fa7e ("vhost-vdpa: Call ida_simple_remove() when failed")
Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
Message-Id: <cf53cb61-0699-4e36-a980-94fd4268ff00@moroto.mountain>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
2023-11-01 09:31:16 -04:00
Jakub Sitnicki
b2c8b644fa virtio_pci: Switch away from deprecated irq_set_affinity_hint
Since commit 65c7cdedeb30 ("genirq: Provide new interfaces for affinity
hints") irq_set_affinity_hint is being phased out.

Switch to new interfaces for setting and applying irq affinity hints.

Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Message-Id: <20231025145319.380775-1-jakub@cloudflare.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
2023-11-01 09:31:16 -04:00
Björn Töpel
f2de37a572 riscv, qemu_fw_cfg: Add support for RISC-V architecture
Qemu fw_cfg support was missing for RISC-V, which made it hard to do
proper vmcore dumps from qemu.

Add the missing RISC-V arch-defines.

You can now do vmcore dumps from qemu. Add "-device vmcoreinfo" to the
qemu command-line. From the qemu monitor:
  (qemu) dump-guest-memory vmcore

The vmcore can now be used, e.g., with the "crash" utility.

Acked-by: "Michael S. Tsirkin" <mst@redhat.com>
Acked-by: Alistair Francis <alistair.francis@wdc.com>
Tested-by: Clément Léger <cleger@rivosinc.com>
Signed-off-by: Björn Töpel <bjorn@rivosinc.com>
Message-Id: <20231012102852.234442-1-bjorn@kernel.org>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2023-11-01 09:31:16 -04:00
Stefano Garzarella
0d82410252 vdpa_sim_blk: allocate the buffer zeroed
Deleting and recreating a device can lead to having the same
content as the old device, so let's always allocate buffers
completely zeroed out.

Fixes: abebb16254b3 ("vdpa_sim_blk: support shared backend")
Suggested-by: Qing Wang <qinwang@redhat.com>
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Message-Id: <20231031144339.121453-1-sgarzare@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Eugenio Pérez <eperezma@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
2023-11-01 09:31:16 -04:00
Michael S. Tsirkin
3503895788 virtio_pci: move structure to a header
These are guest/host interfaces, so they belong in the header where e.g.
qemu will know to find them.

Note: we added a new structure as opposed to extending existing one
because someone might be relying on the size of the existing structure
staying unchanged.  Add a warning to avoid using sizeof.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
2023-11-01 09:31:16 -04:00
96 changed files with 1237 additions and 808 deletions

View File

@ -275,12 +275,12 @@ allOf:
properties:
rx-internal-delay-ps:
description:
RGMII Receive Clock Delay defined in pico seconds.This is used for
RGMII Receive Clock Delay defined in pico seconds. This is used for
controllers that have configurable RX internal delays. If this
property is present then the MAC applies the RX delay.
tx-internal-delay-ps:
description:
RGMII Transmit Clock Delay defined in pico seconds.This is used for
RGMII Transmit Clock Delay defined in pico seconds. This is used for
controllers that have configurable TX internal delays. If this
property is present then the MAC applies the TX delay.

View File

@ -21768,7 +21768,9 @@ F: Documentation/devicetree/bindings/counter/ti-eqep.yaml
F: drivers/counter/ti-eqep.c
TI ETHERNET SWITCH DRIVER (CPSW)
R: Grygorii Strashko <grygorii.strashko@ti.com>
R: Siddharth Vadapalli <s-vadapalli@ti.com>
R: Ravi Gunasekaran <r-gunasekaran@ti.com>
R: Roger Quadros <rogerq@kernel.org>
L: linux-omap@vger.kernel.org
L: netdev@vger.kernel.org
S: Maintained
@ -21792,6 +21794,15 @@ F: Documentation/devicetree/bindings/media/i2c/ti,ds90*
F: drivers/media/i2c/ds90*
F: include/media/i2c/ds90*
TI ICSSG ETHERNET DRIVER (ICSSG)
R: MD Danish Anwar <danishanwar@ti.com>
R: Roger Quadros <rogerq@kernel.org>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
L: netdev@vger.kernel.org
S: Maintained
F: Documentation/devicetree/bindings/net/ti,icss*.yaml
F: drivers/net/ethernet/ti/icssg/*
TI J721E CSI2RX DRIVER
M: Jai Luthra <j-luthra@ti.com>
L: linux-media@vger.kernel.org

View File

@ -16,6 +16,9 @@
#include <asm/x86_init.h>
#include <asm/cpufeature.h>
#include <asm/irq_vectors.h>
#include <asm/xen/hypervisor.h>
#include <xen/xen.h>
#ifdef CONFIG_ACPI_APEI
# include <asm/pgtable_types.h>
@ -127,6 +130,17 @@ static inline void arch_acpi_set_proc_cap_bits(u32 *cap)
if (!cpu_has(c, X86_FEATURE_MWAIT) ||
boot_option_idle_override == IDLE_NOMWAIT)
*cap &= ~(ACPI_PROC_CAP_C_C1_FFH | ACPI_PROC_CAP_C_C2C3_FFH);
if (xen_initial_domain()) {
/*
* When Linux is running as Xen dom0, the hypervisor is the
* entity in charge of the processor power management, and so
* Xen needs to check the OS capabilities reported in the
* processor capabilities buffer matches what the hypervisor
* driver supports.
*/
xen_sanitize_proc_cap_bits(cap);
}
}
static inline bool acpi_has_cpu_in_madt(void)

View File

@ -100,4 +100,13 @@ static inline void leave_lazy(enum xen_lazy_mode mode)
enum xen_lazy_mode xen_get_lazy_mode(void);
#if defined(CONFIG_XEN_DOM0) && defined(CONFIG_ACPI)
void xen_sanitize_proc_cap_bits(uint32_t *buf);
#else
static inline void xen_sanitize_proc_cap_bits(uint32_t *buf)
{
BUG();
}
#endif
#endif /* _ASM_X86_XEN_HYPERVISOR_H */

View File

@ -131,7 +131,7 @@ config RASPBERRYPI_FIRMWARE
config FW_CFG_SYSFS
tristate "QEMU fw_cfg device support in sysfs"
depends on SYSFS && (ARM || ARM64 || PARISC || PPC_PMAC || SPARC || X86)
depends on SYSFS && (ARM || ARM64 || PARISC || PPC_PMAC || RISCV || SPARC || X86)
depends on HAS_IOPORT_MAP
default n
help

View File

@ -211,7 +211,7 @@ static void fw_cfg_io_cleanup(void)
/* arch-specific ctrl & data register offsets are not available in ACPI, DT */
#if !(defined(FW_CFG_CTRL_OFF) && defined(FW_CFG_DATA_OFF))
# if (defined(CONFIG_ARM) || defined(CONFIG_ARM64))
# if (defined(CONFIG_ARM) || defined(CONFIG_ARM64) || defined(CONFIG_RISCV))
# define FW_CFG_CTRL_OFF 0x08
# define FW_CFG_DATA_OFF 0x00
# define FW_CFG_DMA_OFF 0x10

View File

@ -1500,6 +1500,10 @@ done:
static void bond_setup_by_slave(struct net_device *bond_dev,
struct net_device *slave_dev)
{
bool was_up = !!(bond_dev->flags & IFF_UP);
dev_close(bond_dev);
bond_dev->header_ops = slave_dev->header_ops;
bond_dev->type = slave_dev->type;
@ -1514,6 +1518,8 @@ static void bond_setup_by_slave(struct net_device *bond_dev,
bond_dev->flags &= ~(IFF_BROADCAST | IFF_MULTICAST);
bond_dev->flags |= (IFF_POINTOPOINT | IFF_NOARP);
}
if (was_up)
dev_open(bond_dev, NULL);
}
/* On bonding slaves other than the currently active slave, suppress

View File

@ -146,7 +146,7 @@ irqreturn_t pdsc_adminq_isr(int irq, void *data)
}
queue_work(pdsc->wq, &qcq->work);
pds_core_intr_mask(&pdsc->intr_ctrl[irq], PDS_CORE_INTR_MASK_CLEAR);
pds_core_intr_mask(&pdsc->intr_ctrl[qcq->intx], PDS_CORE_INTR_MASK_CLEAR);
return IRQ_HANDLED;
}

View File

@ -15,7 +15,7 @@
#define PDSC_DRV_DESCRIPTION "AMD/Pensando Core Driver"
#define PDSC_WATCHDOG_SECS 5
#define PDSC_QUEUE_NAME_MAX_SZ 32
#define PDSC_QUEUE_NAME_MAX_SZ 16
#define PDSC_ADMINQ_MIN_LENGTH 16 /* must be a power of two */
#define PDSC_NOTIFYQ_LENGTH 64 /* must be a power of two */
#define PDSC_TEARDOWN_RECOVERY false

View File

@ -261,10 +261,14 @@ static int pdsc_identify(struct pdsc *pdsc)
struct pds_core_drv_identity drv = {};
size_t sz;
int err;
int n;
drv.drv_type = cpu_to_le32(PDS_DRIVER_LINUX);
snprintf(drv.driver_ver_str, sizeof(drv.driver_ver_str),
"%s %s", PDS_CORE_DRV_NAME, utsname()->release);
/* Catching the return quiets a Wformat-truncation complaint */
n = snprintf(drv.driver_ver_str, sizeof(drv.driver_ver_str),
"%s %s", PDS_CORE_DRV_NAME, utsname()->release);
if (n > sizeof(drv.driver_ver_str))
dev_dbg(pdsc->dev, "release name truncated, don't care\n");
/* Next let's get some info about the device
* We use the devcmd_lock at this level in order to

View File

@ -104,7 +104,7 @@ int pdsc_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
struct pds_core_fw_list_info fw_list;
struct pdsc *pdsc = devlink_priv(dl);
union pds_core_dev_comp comp;
char buf[16];
char buf[32];
int listlen;
int err;
int i;

View File

@ -6889,7 +6889,7 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget)
desc_idx, *post_ptr);
drop_it_no_recycle:
/* Other statistics kept track of by card. */
tp->rx_dropped++;
tnapi->rx_dropped++;
goto next_pkt;
}
@ -7918,8 +7918,10 @@ static int tg3_tso_bug(struct tg3 *tp, struct tg3_napi *tnapi,
segs = skb_gso_segment(skb, tp->dev->features &
~(NETIF_F_TSO | NETIF_F_TSO6));
if (IS_ERR(segs) || !segs)
if (IS_ERR(segs) || !segs) {
tnapi->tx_dropped++;
goto tg3_tso_bug_end;
}
skb_list_walk_safe(segs, seg, next) {
skb_mark_not_on_list(seg);
@ -8190,7 +8192,7 @@ dma_error:
drop:
dev_kfree_skb_any(skb);
drop_nofree:
tp->tx_dropped++;
tnapi->tx_dropped++;
return NETDEV_TX_OK;
}
@ -9405,7 +9407,7 @@ static void __tg3_set_rx_mode(struct net_device *);
/* tp->lock is held. */
static int tg3_halt(struct tg3 *tp, int kind, bool silent)
{
int err;
int err, i;
tg3_stop_fw(tp);
@ -9426,6 +9428,13 @@ static int tg3_halt(struct tg3 *tp, int kind, bool silent)
/* And make sure the next sample is new data */
memset(tp->hw_stats, 0, sizeof(struct tg3_hw_stats));
for (i = 0; i < TG3_IRQ_MAX_VECS; ++i) {
struct tg3_napi *tnapi = &tp->napi[i];
tnapi->rx_dropped = 0;
tnapi->tx_dropped = 0;
}
}
return err;
@ -11975,6 +11984,9 @@ static void tg3_get_nstats(struct tg3 *tp, struct rtnl_link_stats64 *stats)
{
struct rtnl_link_stats64 *old_stats = &tp->net_stats_prev;
struct tg3_hw_stats *hw_stats = tp->hw_stats;
unsigned long rx_dropped;
unsigned long tx_dropped;
int i;
stats->rx_packets = old_stats->rx_packets +
get_stat64(&hw_stats->rx_ucast_packets) +
@ -12021,8 +12033,26 @@ static void tg3_get_nstats(struct tg3 *tp, struct rtnl_link_stats64 *stats)
stats->rx_missed_errors = old_stats->rx_missed_errors +
get_stat64(&hw_stats->rx_discards);
stats->rx_dropped = tp->rx_dropped;
stats->tx_dropped = tp->tx_dropped;
/* Aggregate per-queue counters. The per-queue counters are updated
* by a single writer, race-free. The result computed by this loop
* might not be 100% accurate (counters can be updated in the middle of
* the loop) but the next tg3_get_nstats() will recompute the current
* value so it is acceptable.
*
* Note that these counters wrap around at 4G on 32bit machines.
*/
rx_dropped = (unsigned long)(old_stats->rx_dropped);
tx_dropped = (unsigned long)(old_stats->tx_dropped);
for (i = 0; i < tp->irq_cnt; i++) {
struct tg3_napi *tnapi = &tp->napi[i];
rx_dropped += tnapi->rx_dropped;
tx_dropped += tnapi->tx_dropped;
}
stats->rx_dropped = rx_dropped;
stats->tx_dropped = tx_dropped;
}
static int tg3_get_regs_len(struct net_device *dev)

View File

@ -3018,6 +3018,7 @@ struct tg3_napi {
u16 *rx_rcb_prod_idx;
struct tg3_rx_prodring_set prodring;
struct tg3_rx_buffer_desc *rx_rcb;
unsigned long rx_dropped;
u32 tx_prod ____cacheline_aligned;
u32 tx_cons;
@ -3026,6 +3027,7 @@ struct tg3_napi {
u32 prodmbox;
struct tg3_tx_buffer_desc *tx_ring;
struct tg3_tx_ring_info *tx_buffers;
unsigned long tx_dropped;
dma_addr_t status_mapping;
dma_addr_t rx_rcb_mapping;
@ -3220,8 +3222,6 @@ struct tg3 {
/* begin "everything else" cacheline(s) section */
unsigned long rx_dropped;
unsigned long tx_dropped;
struct rtnl_link_stats64 net_stats_prev;
struct tg3_ethtool_stats estats_prev;

View File

@ -432,8 +432,8 @@ static const struct gmac_max_framelen gmac_maxlens[] = {
.val = CONFIG0_MAXLEN_1536,
},
{
.max_l3_len = 1542,
.val = CONFIG0_MAXLEN_1542,
.max_l3_len = 1548,
.val = CONFIG0_MAXLEN_1548,
},
{
.max_l3_len = 9212,
@ -1145,6 +1145,7 @@ static int gmac_map_tx_bufs(struct net_device *netdev, struct sk_buff *skb,
dma_addr_t mapping;
unsigned short mtu;
void *buffer;
int ret;
mtu = ETH_HLEN;
mtu += netdev->mtu;
@ -1159,9 +1160,30 @@ static int gmac_map_tx_bufs(struct net_device *netdev, struct sk_buff *skb,
word3 |= mtu;
}
if (skb->ip_summed != CHECKSUM_NONE) {
if (skb->len >= ETH_FRAME_LEN) {
/* Hardware offloaded checksumming isn't working on frames
* bigger than 1514 bytes. A hypothesis about this is that the
* checksum buffer is only 1518 bytes, so when the frames get
* bigger they get truncated, or the last few bytes get
* overwritten by the FCS.
*
* Just use software checksumming and bypass on bigger frames.
*/
if (skb->ip_summed == CHECKSUM_PARTIAL) {
ret = skb_checksum_help(skb);
if (ret)
return ret;
}
word1 |= TSS_BYPASS_BIT;
} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
int tcp = 0;
/* We do not switch off the checksumming on non TCP/UDP
* frames: as is shown from tests, the checksumming engine
* is smart enough to see that a frame is not actually TCP
* or UDP and then just pass it through without any changes
* to the frame.
*/
if (skb->protocol == htons(ETH_P_IP)) {
word1 |= TSS_IP_CHKSUM_BIT;
tcp = ip_hdr(skb)->protocol == IPPROTO_TCP;
@ -1978,15 +2000,6 @@ static int gmac_change_mtu(struct net_device *netdev, int new_mtu)
return 0;
}
static netdev_features_t gmac_fix_features(struct net_device *netdev,
netdev_features_t features)
{
if (netdev->mtu + ETH_HLEN + VLAN_HLEN > MTU_SIZE_BIT_MASK)
features &= ~GMAC_OFFLOAD_FEATURES;
return features;
}
static int gmac_set_features(struct net_device *netdev,
netdev_features_t features)
{
@ -2212,7 +2225,6 @@ static const struct net_device_ops gmac_351x_ops = {
.ndo_set_mac_address = gmac_set_mac_address,
.ndo_get_stats64 = gmac_get_stats64,
.ndo_change_mtu = gmac_change_mtu,
.ndo_fix_features = gmac_fix_features,
.ndo_set_features = gmac_set_features,
};
@ -2464,11 +2476,12 @@ static int gemini_ethernet_port_probe(struct platform_device *pdev)
netdev->hw_features = GMAC_OFFLOAD_FEATURES;
netdev->features |= GMAC_OFFLOAD_FEATURES | NETIF_F_GRO;
/* We can handle jumbo frames up to 10236 bytes so, let's accept
* payloads of 10236 bytes minus VLAN and ethernet header
/* We can receive jumbo frames up to 10236 bytes but only
* transmit 2047 bytes so, let's accept payloads of 2047
* bytes minus VLAN and ethernet header
*/
netdev->min_mtu = ETH_MIN_MTU;
netdev->max_mtu = 10236 - VLAN_ETH_HLEN;
netdev->max_mtu = MTU_SIZE_BIT_MASK - VLAN_ETH_HLEN;
port->freeq_refill = 0;
netif_napi_add(netdev, &port->napi, gmac_napi_poll);

View File

@ -502,7 +502,7 @@ union gmac_txdesc_3 {
#define SOF_BIT 0x80000000
#define EOF_BIT 0x40000000
#define EOFIE_BIT BIT(29)
#define MTU_SIZE_BIT_MASK 0x1fff
#define MTU_SIZE_BIT_MASK 0x7ff /* Max MTU 2047 bytes */
/* GMAC Tx Descriptor */
struct gmac_txdesc {
@ -787,7 +787,7 @@ union gmac_config0 {
#define CONFIG0_MAXLEN_1536 0
#define CONFIG0_MAXLEN_1518 1
#define CONFIG0_MAXLEN_1522 2
#define CONFIG0_MAXLEN_1542 3
#define CONFIG0_MAXLEN_1548 3
#define CONFIG0_MAXLEN_9k 4 /* 9212 */
#define CONFIG0_MAXLEN_10k 5 /* 10236 */
#define CONFIG0_MAXLEN_1518__6 6

View File

@ -254,10 +254,13 @@ static int gve_napi_poll(struct napi_struct *napi, int budget)
if (block->tx) {
if (block->tx->q_num < priv->tx_cfg.num_queues)
reschedule |= gve_tx_poll(block, budget);
else
else if (budget)
reschedule |= gve_xdp_poll(block, budget);
}
if (!budget)
return 0;
if (block->rx) {
work_done = gve_rx_poll(block, budget);
reschedule |= work_done == budget;
@ -298,6 +301,9 @@ static int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
if (block->tx)
reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true);
if (!budget)
return 0;
if (block->rx) {
work_done = gve_rx_poll_dqo(block, budget);
reschedule |= work_done == budget;

View File

@ -1007,10 +1007,6 @@ int gve_rx_poll(struct gve_notify_block *block, int budget)
feat = block->napi.dev->features;
/* If budget is 0, do all the work */
if (budget == 0)
budget = INT_MAX;
if (budget > 0)
work_done = gve_clean_rx_done(rx, budget, feat);

View File

@ -925,10 +925,6 @@ bool gve_xdp_poll(struct gve_notify_block *block, int budget)
bool repoll;
u32 to_do;
/* If budget is 0, do all the work */
if (budget == 0)
budget = INT_MAX;
/* Find out how much work there is to be done */
nic_done = gve_tx_load_event_counter(priv, tx);
to_do = min_t(u32, (nic_done - tx->done), budget);

View File

@ -503,11 +503,14 @@ static void hns3_get_coal_info(struct hns3_enet_tqp_vector *tqp_vector,
}
sprintf(result[j++], "%d", i);
sprintf(result[j++], "%s", dim_state_str[dim->state]);
sprintf(result[j++], "%s", dim->state < ARRAY_SIZE(dim_state_str) ?
dim_state_str[dim->state] : "unknown");
sprintf(result[j++], "%u", dim->profile_ix);
sprintf(result[j++], "%s", dim_cqe_mode_str[dim->mode]);
sprintf(result[j++], "%s", dim->mode < ARRAY_SIZE(dim_cqe_mode_str) ?
dim_cqe_mode_str[dim->mode] : "unknown");
sprintf(result[j++], "%s",
dim_tune_stat_str[dim->tune_state]);
dim->tune_state < ARRAY_SIZE(dim_tune_stat_str) ?
dim_tune_stat_str[dim->tune_state] : "unknown");
sprintf(result[j++], "%u", dim->steps_left);
sprintf(result[j++], "%u", dim->steps_right);
sprintf(result[j++], "%u", dim->tired);

View File

@ -5139,7 +5139,7 @@ static int hns3_init_mac_addr(struct net_device *netdev)
struct hns3_nic_priv *priv = netdev_priv(netdev);
char format_mac_addr[HNAE3_FORMAT_MAC_ADDR_LEN];
struct hnae3_handle *h = priv->ae_handle;
u8 mac_addr_temp[ETH_ALEN];
u8 mac_addr_temp[ETH_ALEN] = {0};
int ret = 0;
if (h->ae_algo->ops->get_mac_addr)

View File

@ -61,6 +61,7 @@ static void hclge_sync_fd_table(struct hclge_dev *hdev);
static void hclge_update_fec_stats(struct hclge_dev *hdev);
static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret,
int wait_cnt);
static int hclge_update_port_info(struct hclge_dev *hdev);
static struct hnae3_ae_algo ae_algo;
@ -3041,6 +3042,9 @@ static void hclge_update_link_status(struct hclge_dev *hdev)
if (state != hdev->hw.mac.link) {
hdev->hw.mac.link = state;
if (state == HCLGE_LINK_STATUS_UP)
hclge_update_port_info(hdev);
client->ops->link_status_change(handle, state);
hclge_config_mac_tnl_int(hdev, state);
if (rclient && rclient->ops->link_status_change)
@ -10025,8 +10029,6 @@ static void hclge_rm_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id,
struct hclge_vport_vlan_cfg *vlan, *tmp;
struct hclge_dev *hdev = vport->back;
mutex_lock(&hdev->vport_lock);
list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) {
if (vlan->vlan_id == vlan_id) {
if (is_write_tbl && vlan->hd_tbl_status)
@ -10041,8 +10043,6 @@ static void hclge_rm_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id,
break;
}
}
mutex_unlock(&hdev->vport_lock);
}
void hclge_rm_vport_all_vlan_table(struct hclge_vport *vport, bool is_del_list)
@ -10451,11 +10451,16 @@ int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto,
* handle mailbox. Just record the vlan id, and remove it after
* reset finished.
*/
mutex_lock(&hdev->vport_lock);
if ((test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) ||
test_bit(HCLGE_STATE_RST_FAIL, &hdev->state)) && is_kill) {
set_bit(vlan_id, vport->vlan_del_fail_bmap);
mutex_unlock(&hdev->vport_lock);
return -EBUSY;
} else if (!is_kill && test_bit(vlan_id, vport->vlan_del_fail_bmap)) {
clear_bit(vlan_id, vport->vlan_del_fail_bmap);
}
mutex_unlock(&hdev->vport_lock);
/* when port base vlan enabled, we use port base vlan as the vlan
* filter entry. In this case, we don't update vlan filter table
@ -10470,17 +10475,22 @@ int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto,
}
if (!ret) {
if (!is_kill)
if (!is_kill) {
hclge_add_vport_vlan_table(vport, vlan_id,
writen_to_tbl);
else if (is_kill && vlan_id != 0)
} else if (is_kill && vlan_id != 0) {
mutex_lock(&hdev->vport_lock);
hclge_rm_vport_vlan_table(vport, vlan_id, false);
mutex_unlock(&hdev->vport_lock);
}
} else if (is_kill) {
/* when remove hw vlan filter failed, record the vlan id,
* and try to remove it from hw later, to be consistence
* with stack
*/
mutex_lock(&hdev->vport_lock);
set_bit(vlan_id, vport->vlan_del_fail_bmap);
mutex_unlock(&hdev->vport_lock);
}
hclge_set_vport_vlan_fltr_change(vport);
@ -10520,6 +10530,7 @@ static void hclge_sync_vlan_filter(struct hclge_dev *hdev)
int i, ret, sync_cnt = 0;
u16 vlan_id;
mutex_lock(&hdev->vport_lock);
/* start from vport 1 for PF is always alive */
for (i = 0; i < hdev->num_alloc_vport; i++) {
struct hclge_vport *vport = &hdev->vport[i];
@ -10530,21 +10541,26 @@ static void hclge_sync_vlan_filter(struct hclge_dev *hdev)
ret = hclge_set_vlan_filter_hw(hdev, htons(ETH_P_8021Q),
vport->vport_id, vlan_id,
true);
if (ret && ret != -EINVAL)
if (ret && ret != -EINVAL) {
mutex_unlock(&hdev->vport_lock);
return;
}
clear_bit(vlan_id, vport->vlan_del_fail_bmap);
hclge_rm_vport_vlan_table(vport, vlan_id, false);
hclge_set_vport_vlan_fltr_change(vport);
sync_cnt++;
if (sync_cnt >= HCLGE_MAX_SYNC_COUNT)
if (sync_cnt >= HCLGE_MAX_SYNC_COUNT) {
mutex_unlock(&hdev->vport_lock);
return;
}
vlan_id = find_first_bit(vport->vlan_del_fail_bmap,
VLAN_N_VID);
}
}
mutex_unlock(&hdev->vport_lock);
hclge_sync_vlan_fltr_state(hdev);
}
@ -11651,6 +11667,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
goto err_msi_irq_uninit;
if (hdev->hw.mac.media_type == HNAE3_MEDIA_TYPE_COPPER) {
clear_bit(HNAE3_DEV_SUPPORT_FEC_B, ae_dev->caps);
if (hnae3_dev_phy_imp_supported(hdev))
ret = hclge_update_tp_port_info(hdev);
else

View File

@ -1206,6 +1206,8 @@ static int hclgevf_set_vlan_filter(struct hnae3_handle *handle,
test_bit(HCLGEVF_STATE_RST_FAIL, &hdev->state)) && is_kill) {
set_bit(vlan_id, hdev->vlan_del_fail_bmap);
return -EBUSY;
} else if (!is_kill && test_bit(vlan_id, hdev->vlan_del_fail_bmap)) {
clear_bit(vlan_id, hdev->vlan_del_fail_bmap);
}
hclgevf_build_send_msg(&send_msg, HCLGE_MBX_SET_VLAN,
@ -1233,20 +1235,25 @@ static void hclgevf_sync_vlan_filter(struct hclgevf_dev *hdev)
int ret, sync_cnt = 0;
u16 vlan_id;
if (bitmap_empty(hdev->vlan_del_fail_bmap, VLAN_N_VID))
return;
rtnl_lock();
vlan_id = find_first_bit(hdev->vlan_del_fail_bmap, VLAN_N_VID);
while (vlan_id != VLAN_N_VID) {
ret = hclgevf_set_vlan_filter(handle, htons(ETH_P_8021Q),
vlan_id, true);
if (ret)
return;
break;
clear_bit(vlan_id, hdev->vlan_del_fail_bmap);
sync_cnt++;
if (sync_cnt >= HCLGEVF_MAX_SYNC_COUNT)
return;
break;
vlan_id = find_first_bit(hdev->vlan_del_fail_bmap, VLAN_N_VID);
}
rtnl_unlock();
}
static int hclgevf_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable)
@ -1974,8 +1981,18 @@ static enum hclgevf_evt_cause hclgevf_check_evt_cause(struct hclgevf_dev *hdev,
return HCLGEVF_VECTOR0_EVENT_OTHER;
}
static void hclgevf_reset_timer(struct timer_list *t)
{
struct hclgevf_dev *hdev = from_timer(hdev, t, reset_timer);
hclgevf_clear_event_cause(hdev, HCLGEVF_VECTOR0_EVENT_RST);
hclgevf_reset_task_schedule(hdev);
}
static irqreturn_t hclgevf_misc_irq_handle(int irq, void *data)
{
#define HCLGEVF_RESET_DELAY 5
enum hclgevf_evt_cause event_cause;
struct hclgevf_dev *hdev = data;
u32 clearval;
@ -1987,7 +2004,8 @@ static irqreturn_t hclgevf_misc_irq_handle(int irq, void *data)
switch (event_cause) {
case HCLGEVF_VECTOR0_EVENT_RST:
hclgevf_reset_task_schedule(hdev);
mod_timer(&hdev->reset_timer,
jiffies + msecs_to_jiffies(HCLGEVF_RESET_DELAY));
break;
case HCLGEVF_VECTOR0_EVENT_MBX:
hclgevf_mbx_handler(hdev);
@ -2930,6 +2948,7 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev)
HCLGEVF_DRIVER_NAME);
hclgevf_task_schedule(hdev, round_jiffies_relative(HZ));
timer_setup(&hdev->reset_timer, hclgevf_reset_timer, 0);
return 0;

View File

@ -219,6 +219,7 @@ struct hclgevf_dev {
enum hnae3_reset_type reset_level;
unsigned long reset_pending;
enum hnae3_reset_type reset_type;
struct timer_list reset_timer;
#define HCLGEVF_RESET_REQUESTED 0
#define HCLGEVF_RESET_PENDING 1

View File

@ -63,6 +63,9 @@ static int hclgevf_get_mbx_resp(struct hclgevf_dev *hdev, u16 code0, u16 code1,
i++;
}
/* ensure additional_info will be seen after received_resp */
smp_rmb();
if (i >= HCLGEVF_MAX_TRY_TIMES) {
dev_err(&hdev->pdev->dev,
"VF could not get mbx(%u,%u) resp(=%d) from PF in %d tries\n",
@ -178,6 +181,10 @@ static void hclgevf_handle_mbx_response(struct hclgevf_dev *hdev,
resp->resp_status = hclgevf_resp_to_errno(resp_status);
memcpy(resp->additional_info, req->msg.resp_data,
HCLGE_MBX_MAX_RESP_DATA_SIZE * sizeof(u8));
/* ensure additional_info will be seen before setting received_resp */
smp_wmb();
if (match_id) {
/* If match_id is not zero, it means PF support match_id.
* if the match_id is right, VF get the right response, or

View File

@ -1479,14 +1479,14 @@ ice_post_dwnld_pkg_actions(struct ice_hw *hw)
}
/**
* ice_download_pkg
* ice_download_pkg_with_sig_seg
* @hw: pointer to the hardware structure
* @pkg_hdr: pointer to package header
*
* Handles the download of a complete package.
*/
static enum ice_ddp_state
ice_download_pkg(struct ice_hw *hw, struct ice_pkg_hdr *pkg_hdr)
ice_download_pkg_with_sig_seg(struct ice_hw *hw, struct ice_pkg_hdr *pkg_hdr)
{
enum ice_aq_err aq_err = hw->adminq.sq_last_status;
enum ice_ddp_state state = ICE_DDP_PKG_ERR;
@ -1519,6 +1519,103 @@ ice_download_pkg(struct ice_hw *hw, struct ice_pkg_hdr *pkg_hdr)
state = ice_post_dwnld_pkg_actions(hw);
ice_release_global_cfg_lock(hw);
return state;
}
/**
* ice_dwnld_cfg_bufs
* @hw: pointer to the hardware structure
* @bufs: pointer to an array of buffers
* @count: the number of buffers in the array
*
* Obtains global config lock and downloads the package configuration buffers
* to the firmware.
*/
static enum ice_ddp_state
ice_dwnld_cfg_bufs(struct ice_hw *hw, struct ice_buf *bufs, u32 count)
{
enum ice_ddp_state state;
struct ice_buf_hdr *bh;
int status;
if (!bufs || !count)
return ICE_DDP_PKG_ERR;
/* If the first buffer's first section has its metadata bit set
* then there are no buffers to be downloaded, and the operation is
* considered a success.
*/
bh = (struct ice_buf_hdr *)bufs;
if (le32_to_cpu(bh->section_entry[0].type) & ICE_METADATA_BUF)
return ICE_DDP_PKG_SUCCESS;
status = ice_acquire_global_cfg_lock(hw, ICE_RES_WRITE);
if (status) {
if (status == -EALREADY)
return ICE_DDP_PKG_ALREADY_LOADED;
return ice_map_aq_err_to_ddp_state(hw->adminq.sq_last_status);
}
state = ice_dwnld_cfg_bufs_no_lock(hw, bufs, 0, count, true);
if (!state)
state = ice_post_dwnld_pkg_actions(hw);
ice_release_global_cfg_lock(hw);
return state;
}
/**
* ice_download_pkg_without_sig_seg
* @hw: pointer to the hardware structure
* @ice_seg: pointer to the segment of the package to be downloaded
*
* Handles the download of a complete package without signature segment.
*/
static enum ice_ddp_state
ice_download_pkg_without_sig_seg(struct ice_hw *hw, struct ice_seg *ice_seg)
{
struct ice_buf_table *ice_buf_tbl;
ice_debug(hw, ICE_DBG_PKG, "Segment format version: %d.%d.%d.%d\n",
ice_seg->hdr.seg_format_ver.major,
ice_seg->hdr.seg_format_ver.minor,
ice_seg->hdr.seg_format_ver.update,
ice_seg->hdr.seg_format_ver.draft);
ice_debug(hw, ICE_DBG_PKG, "Seg: type 0x%X, size %d, name %s\n",
le32_to_cpu(ice_seg->hdr.seg_type),
le32_to_cpu(ice_seg->hdr.seg_size), ice_seg->hdr.seg_id);
ice_buf_tbl = ice_find_buf_table(ice_seg);
ice_debug(hw, ICE_DBG_PKG, "Seg buf count: %d\n",
le32_to_cpu(ice_buf_tbl->buf_count));
return ice_dwnld_cfg_bufs(hw, ice_buf_tbl->buf_array,
le32_to_cpu(ice_buf_tbl->buf_count));
}
/**
* ice_download_pkg
* @hw: pointer to the hardware structure
* @pkg_hdr: pointer to package header
* @ice_seg: pointer to the segment of the package to be downloaded
*
* Handles the download of a complete package.
*/
static enum ice_ddp_state
ice_download_pkg(struct ice_hw *hw, struct ice_pkg_hdr *pkg_hdr,
struct ice_seg *ice_seg)
{
enum ice_ddp_state state;
if (hw->pkg_has_signing_seg)
state = ice_download_pkg_with_sig_seg(hw, pkg_hdr);
else
state = ice_download_pkg_without_sig_seg(hw, ice_seg);
ice_post_pkg_dwnld_vlan_mode_cfg(hw);
return state;
@ -2083,7 +2180,7 @@ enum ice_ddp_state ice_init_pkg(struct ice_hw *hw, u8 *buf, u32 len)
/* initialize package hints and then download package */
ice_init_pkg_hints(hw, seg);
state = ice_download_pkg(hw, pkg);
state = ice_download_pkg(hw, pkg, seg);
if (state == ICE_DDP_PKG_ALREADY_LOADED) {
ice_debug(hw, ICE_DBG_INIT,
"package previously loaded - no work.\n");

View File

@ -815,12 +815,6 @@ ice_dpll_input_prio_set(const struct dpll_pin *pin, void *pin_priv,
struct ice_pf *pf = d->pf;
int ret;
if (prio > ICE_DPLL_PRIO_MAX) {
NL_SET_ERR_MSG_FMT(extack, "prio out of supported range 0-%d",
ICE_DPLL_PRIO_MAX);
return -EINVAL;
}
mutex_lock(&pf->dplls.lock);
ret = ice_dpll_hw_input_prio_set(pf, d, p, prio, extack);
mutex_unlock(&pf->dplls.lock);
@ -1756,6 +1750,7 @@ ice_dpll_init_dpll(struct ice_pf *pf, struct ice_dpll *d, bool cgu,
}
d->pf = pf;
if (cgu) {
ice_dpll_update_state(pf, d, true);
ret = dpll_device_register(d->dpll, type, &ice_dpll_ops, d);
if (ret) {
dpll_device_put(d->dpll);
@ -1796,8 +1791,6 @@ static int ice_dpll_init_worker(struct ice_pf *pf)
struct ice_dplls *d = &pf->dplls;
struct kthread_worker *kworker;
ice_dpll_update_state(pf, &d->eec, true);
ice_dpll_update_state(pf, &d->pps, true);
kthread_init_delayed_work(&d->work, ice_dpll_periodic_work);
kworker = kthread_create_worker(0, "ice-dplls-%s",
dev_name(ice_pf_to_dev(pf)));
@ -1830,6 +1823,7 @@ ice_dpll_init_info_direct_pins(struct ice_pf *pf,
int num_pins, i, ret = -EINVAL;
struct ice_hw *hw = &pf->hw;
struct ice_dpll_pin *pins;
unsigned long caps;
u8 freq_supp_num;
bool input;
@ -1849,6 +1843,7 @@ ice_dpll_init_info_direct_pins(struct ice_pf *pf,
}
for (i = 0; i < num_pins; i++) {
caps = 0;
pins[i].idx = i;
pins[i].prop.board_label = ice_cgu_get_pin_name(hw, i, input);
pins[i].prop.type = ice_cgu_get_pin_type(hw, i, input);
@ -1861,8 +1856,8 @@ ice_dpll_init_info_direct_pins(struct ice_pf *pf,
&dp->input_prio[i]);
if (ret)
return ret;
pins[i].prop.capabilities |=
DPLL_PIN_CAPABILITIES_PRIORITY_CAN_CHANGE;
caps |= (DPLL_PIN_CAPABILITIES_PRIORITY_CAN_CHANGE |
DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE);
pins[i].prop.phase_range.min =
pf->dplls.input_phase_adj_max;
pins[i].prop.phase_range.max =
@ -1872,9 +1867,11 @@ ice_dpll_init_info_direct_pins(struct ice_pf *pf,
pf->dplls.output_phase_adj_max;
pins[i].prop.phase_range.max =
-pf->dplls.output_phase_adj_max;
ret = ice_cgu_get_output_pin_state_caps(hw, i, &caps);
if (ret)
return ret;
}
pins[i].prop.capabilities |=
DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE;
pins[i].prop.capabilities = caps;
ret = ice_dpll_pin_state_update(pf, &pins[i], pin_type, NULL);
if (ret)
return ret;

View File

@ -6,7 +6,6 @@
#include "ice.h"
#define ICE_DPLL_PRIO_MAX 0xF
#define ICE_DPLL_RCLK_NUM_MAX 4
/** ice_dpll_pin - store info about pins

View File

@ -3961,3 +3961,57 @@ int ice_get_cgu_rclk_pin_info(struct ice_hw *hw, u8 *base_idx, u8 *pin_num)
return ret;
}
/**
* ice_cgu_get_output_pin_state_caps - get output pin state capabilities
* @hw: pointer to the hw struct
* @pin_id: id of a pin
* @caps: capabilities to modify
*
* Return:
* * 0 - success, state capabilities were modified
* * negative - failure, capabilities were not modified
*/
int ice_cgu_get_output_pin_state_caps(struct ice_hw *hw, u8 pin_id,
unsigned long *caps)
{
bool can_change = true;
switch (hw->device_id) {
case ICE_DEV_ID_E810C_SFP:
if (pin_id == ZL_OUT2 || pin_id == ZL_OUT3)
can_change = false;
break;
case ICE_DEV_ID_E810C_QSFP:
if (pin_id == ZL_OUT2 || pin_id == ZL_OUT3 || pin_id == ZL_OUT4)
can_change = false;
break;
case ICE_DEV_ID_E823L_10G_BASE_T:
case ICE_DEV_ID_E823L_1GBE:
case ICE_DEV_ID_E823L_BACKPLANE:
case ICE_DEV_ID_E823L_QSFP:
case ICE_DEV_ID_E823L_SFP:
case ICE_DEV_ID_E823C_10G_BASE_T:
case ICE_DEV_ID_E823C_BACKPLANE:
case ICE_DEV_ID_E823C_QSFP:
case ICE_DEV_ID_E823C_SFP:
case ICE_DEV_ID_E823C_SGMII:
if (hw->cgu_part_number ==
ICE_AQC_GET_LINK_TOPO_NODE_NR_ZL30632_80032 &&
pin_id == ZL_OUT2)
can_change = false;
else if (hw->cgu_part_number ==
ICE_AQC_GET_LINK_TOPO_NODE_NR_SI5383_5384 &&
pin_id == SI_OUT1)
can_change = false;
break;
default:
return -EINVAL;
}
if (can_change)
*caps |= DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE;
else
*caps &= ~DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE;
return 0;
}

View File

@ -282,6 +282,8 @@ int ice_get_cgu_state(struct ice_hw *hw, u8 dpll_idx,
int ice_get_cgu_rclk_pin_info(struct ice_hw *hw, u8 *base_idx, u8 *pin_num);
void ice_ptp_init_phy_model(struct ice_hw *hw);
int ice_cgu_get_output_pin_state_caps(struct ice_hw *hw, u8 pin_id,
unsigned long *caps);
#define PFTSYN_SEM_BYTES 4

View File

@ -4790,14 +4790,17 @@ static void mvneta_ethtool_get_strings(struct net_device *netdev, u32 sset,
u8 *data)
{
if (sset == ETH_SS_STATS) {
struct mvneta_port *pp = netdev_priv(netdev);
int i;
for (i = 0; i < ARRAY_SIZE(mvneta_statistics); i++)
memcpy(data + i * ETH_GSTRING_LEN,
mvneta_statistics[i].name, ETH_GSTRING_LEN);
data += ETH_GSTRING_LEN * ARRAY_SIZE(mvneta_statistics);
page_pool_ethtool_stats_get_strings(data);
if (!pp->bm_priv) {
data += ETH_GSTRING_LEN * ARRAY_SIZE(mvneta_statistics);
page_pool_ethtool_stats_get_strings(data);
}
}
}
@ -4915,8 +4918,10 @@ static void mvneta_ethtool_pp_stats(struct mvneta_port *pp, u64 *data)
struct page_pool_stats stats = {};
int i;
for (i = 0; i < rxq_number; i++)
page_pool_get_stats(pp->rxqs[i].page_pool, &stats);
for (i = 0; i < rxq_number; i++) {
if (pp->rxqs[i].page_pool)
page_pool_get_stats(pp->rxqs[i].page_pool, &stats);
}
page_pool_ethtool_stats_get(data, &stats);
}
@ -4932,14 +4937,21 @@ static void mvneta_ethtool_get_stats(struct net_device *dev,
for (i = 0; i < ARRAY_SIZE(mvneta_statistics); i++)
*data++ = pp->ethtool_stats[i];
mvneta_ethtool_pp_stats(pp, data);
if (!pp->bm_priv)
mvneta_ethtool_pp_stats(pp, data);
}
static int mvneta_ethtool_get_sset_count(struct net_device *dev, int sset)
{
if (sset == ETH_SS_STATS)
return ARRAY_SIZE(mvneta_statistics) +
page_pool_ethtool_stats_get_count();
if (sset == ETH_SS_STATS) {
int count = ARRAY_SIZE(mvneta_statistics);
struct mvneta_port *pp = netdev_priv(dev);
if (!pp->bm_priv)
count += page_pool_ethtool_stats_get_count();
return count;
}
return -EOPNOTSUPP;
}

View File

@ -177,6 +177,8 @@ static void mlx5e_ptpsq_mark_ts_cqes_undelivered(struct mlx5e_ptpsq *ptpsq,
static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
struct mlx5_cqe64 *cqe,
u8 *md_buff,
u8 *md_buff_sz,
int budget)
{
struct mlx5e_ptp_port_ts_cqe_list *pending_cqe_list = ptpsq->ts_cqe_pending_list;
@ -211,19 +213,24 @@ static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
mlx5e_ptpsq_mark_ts_cqes_undelivered(ptpsq, hwtstamp);
out:
napi_consume_skb(skb, budget);
mlx5e_ptp_metadata_fifo_push(&ptpsq->metadata_freelist, metadata_id);
md_buff[*md_buff_sz++] = metadata_id;
if (unlikely(mlx5e_ptp_metadata_map_unhealthy(&ptpsq->metadata_map)) &&
!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state))
queue_work(ptpsq->txqsq.priv->wq, &ptpsq->report_unhealthy_work);
}
static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget)
static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int napi_budget)
{
struct mlx5e_ptpsq *ptpsq = container_of(cq, struct mlx5e_ptpsq, ts_cq);
struct mlx5_cqwq *cqwq = &cq->wq;
int budget = min(napi_budget, MLX5E_TX_CQ_POLL_BUDGET);
u8 metadata_buff[MLX5E_TX_CQ_POLL_BUDGET];
u8 metadata_buff_sz = 0;
struct mlx5_cqwq *cqwq;
struct mlx5_cqe64 *cqe;
int work_done = 0;
cqwq = &cq->wq;
if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &ptpsq->txqsq.state)))
return false;
@ -234,7 +241,8 @@ static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget)
do {
mlx5_cqwq_pop(cqwq);
mlx5e_ptp_handle_ts_cqe(ptpsq, cqe, budget);
mlx5e_ptp_handle_ts_cqe(ptpsq, cqe,
metadata_buff, &metadata_buff_sz, napi_budget);
} while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(cqwq)));
mlx5_cqwq_update_db_record(cqwq);
@ -242,6 +250,10 @@ static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget)
/* ensure cq space is freed before enabling more cqes */
wmb();
while (metadata_buff_sz > 0)
mlx5e_ptp_metadata_fifo_push(&ptpsq->metadata_freelist,
metadata_buff[--metadata_buff_sz]);
mlx5e_txqsq_wake(&ptpsq->txqsq);
return work_done == budget;

View File

@ -492,11 +492,11 @@ static int mlx5e_rx_reporter_dump(struct devlink_health_reporter *reporter,
void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq)
{
char icosq_str[MLX5E_REPORTER_PER_Q_MAX_LEN] = {};
char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
struct mlx5e_icosq *icosq = rq->icosq;
struct mlx5e_priv *priv = rq->priv;
struct mlx5e_err_ctx err_ctx = {};
char icosq_str[32] = {};
err_ctx.ctx = rq;
err_ctx.recover = mlx5e_rx_reporter_timeout_recover;
@ -505,7 +505,7 @@ void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq)
if (icosq)
snprintf(icosq_str, sizeof(icosq_str), "ICOSQ: 0x%x, ", icosq->sqn);
snprintf(err_str, sizeof(err_str),
"RX timeout on channel: %d, %sRQ: 0x%x, CQ: 0x%x",
"RX timeout on channel: %d, %s RQ: 0x%x, CQ: 0x%x",
rq->ix, icosq_str, rq->rqn, rq->cq.mcq.cqn);
mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);

View File

@ -300,9 +300,6 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
if (err)
goto destroy_neigh_entry;
e->encap_size = ipv4_encap_size;
e->encap_header = encap_header;
if (!(nud_state & NUD_VALID)) {
neigh_event_send(attr.n, NULL);
/* the encap entry will be made valid on neigh update event
@ -322,6 +319,8 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
goto destroy_neigh_entry;
}
e->encap_size = ipv4_encap_size;
e->encap_header = encap_header;
e->flags |= MLX5_ENCAP_ENTRY_VALID;
mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
mlx5e_route_lookup_ipv4_put(&attr);
@ -404,16 +403,12 @@ int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv,
if (err)
goto free_encap;
e->encap_size = ipv4_encap_size;
kfree(e->encap_header);
e->encap_header = encap_header;
if (!(nud_state & NUD_VALID)) {
neigh_event_send(attr.n, NULL);
/* the encap entry will be made valid on neigh update event
* and not used before that.
*/
goto release_neigh;
goto free_encap;
}
memset(&reformat_params, 0, sizeof(reformat_params));
@ -427,6 +422,10 @@ int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv,
goto free_encap;
}
e->encap_size = ipv4_encap_size;
kfree(e->encap_header);
e->encap_header = encap_header;
e->flags |= MLX5_ENCAP_ENTRY_VALID;
mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
mlx5e_route_lookup_ipv4_put(&attr);
@ -568,9 +567,6 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
if (err)
goto destroy_neigh_entry;
e->encap_size = ipv6_encap_size;
e->encap_header = encap_header;
if (!(nud_state & NUD_VALID)) {
neigh_event_send(attr.n, NULL);
/* the encap entry will be made valid on neigh update event
@ -590,6 +586,8 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
goto destroy_neigh_entry;
}
e->encap_size = ipv6_encap_size;
e->encap_header = encap_header;
e->flags |= MLX5_ENCAP_ENTRY_VALID;
mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
mlx5e_route_lookup_ipv6_put(&attr);
@ -671,16 +669,12 @@ int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv,
if (err)
goto free_encap;
e->encap_size = ipv6_encap_size;
kfree(e->encap_header);
e->encap_header = encap_header;
if (!(nud_state & NUD_VALID)) {
neigh_event_send(attr.n, NULL);
/* the encap entry will be made valid on neigh update event
* and not used before that.
*/
goto release_neigh;
goto free_encap;
}
memset(&reformat_params, 0, sizeof(reformat_params));
@ -694,6 +688,10 @@ int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv,
goto free_encap;
}
e->encap_size = ipv6_encap_size;
kfree(e->encap_header);
e->encap_header = encap_header;
e->flags |= MLX5_ENCAP_ENTRY_VALID;
mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
mlx5e_route_lookup_ipv6_put(&attr);

View File

@ -43,12 +43,17 @@ void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv,
struct ethtool_drvinfo *drvinfo)
{
struct mlx5_core_dev *mdev = priv->mdev;
int count;
strscpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver));
snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
"%d.%d.%04d (%.16s)",
fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev),
mdev->board_id);
count = snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
"%d.%d.%04d (%.16s)", fw_rev_maj(mdev),
fw_rev_min(mdev), fw_rev_sub(mdev), mdev->board_id);
if (count == sizeof(drvinfo->fw_version))
snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
"%d.%d.%04d", fw_rev_maj(mdev),
fw_rev_min(mdev), fw_rev_sub(mdev));
strscpy(drvinfo->bus_info, dev_name(mdev->device),
sizeof(drvinfo->bus_info));
}

View File

@ -71,13 +71,17 @@ static void mlx5e_rep_get_drvinfo(struct net_device *dev,
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5_core_dev *mdev = priv->mdev;
int count;
strscpy(drvinfo->driver, mlx5e_rep_driver_name,
sizeof(drvinfo->driver));
snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
"%d.%d.%04d (%.16s)",
fw_rev_maj(mdev), fw_rev_min(mdev),
fw_rev_sub(mdev), mdev->board_id);
count = snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
"%d.%d.%04d (%.16s)", fw_rev_maj(mdev),
fw_rev_min(mdev), fw_rev_sub(mdev), mdev->board_id);
if (count == sizeof(drvinfo->fw_version))
snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
"%d.%d.%04d", fw_rev_maj(mdev),
fw_rev_min(mdev), fw_rev_sub(mdev));
}
static const struct counter_desc sw_rep_stats_desc[] = {

View File

@ -3147,7 +3147,7 @@ static struct mlx5_fields fields[] = {
OFFLOAD(DIPV6_31_0, 32, U32_MAX, ip6.daddr.s6_addr32[3], 0,
dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]),
OFFLOAD(IPV6_HOPLIMIT, 8, U8_MAX, ip6.hop_limit, 0, ttl_hoplimit),
OFFLOAD(IP_DSCP, 16, 0xc00f, ip6, 0, ip_dscp),
OFFLOAD(IP_DSCP, 16, 0x0fc0, ip6, 0, ip_dscp),
OFFLOAD(TCP_SPORT, 16, U16_MAX, tcp.source, 0, tcp_sport),
OFFLOAD(TCP_DPORT, 16, U16_MAX, tcp.dest, 0, tcp_dport),
@ -3158,21 +3158,31 @@ static struct mlx5_fields fields[] = {
OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest, 0, udp_dport),
};
static unsigned long mask_to_le(unsigned long mask, int size)
static u32 mask_field_get(void *mask, struct mlx5_fields *f)
{
__be32 mask_be32;
__be16 mask_be16;
if (size == 32) {
mask_be32 = (__force __be32)(mask);
mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32));
} else if (size == 16) {
mask_be32 = (__force __be32)(mask);
mask_be16 = *(__be16 *)&mask_be32;
mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16));
switch (f->field_bsize) {
case 32:
return be32_to_cpu(*(__be32 *)mask) & f->field_mask;
case 16:
return be16_to_cpu(*(__be16 *)mask) & (u16)f->field_mask;
default:
return *(u8 *)mask & (u8)f->field_mask;
}
}
return mask;
static void mask_field_clear(void *mask, struct mlx5_fields *f)
{
switch (f->field_bsize) {
case 32:
*(__be32 *)mask &= ~cpu_to_be32(f->field_mask);
break;
case 16:
*(__be16 *)mask &= ~cpu_to_be16((u16)f->field_mask);
break;
default:
*(u8 *)mask &= ~(u8)f->field_mask;
break;
}
}
static int offload_pedit_fields(struct mlx5e_priv *priv,
@ -3184,11 +3194,12 @@ static int offload_pedit_fields(struct mlx5e_priv *priv,
struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
struct pedit_headers_action *hdrs = parse_attr->hdrs;
void *headers_c, *headers_v, *action, *vals_p;
u32 *s_masks_p, *a_masks_p, s_mask, a_mask;
struct mlx5e_tc_mod_hdr_acts *mod_acts;
unsigned long mask, field_mask;
void *s_masks_p, *a_masks_p;
int i, first, last, next_z;
struct mlx5_fields *f;
unsigned long mask;
u32 s_mask, a_mask;
u8 cmd;
mod_acts = &parse_attr->mod_hdr_acts;
@ -3204,15 +3215,11 @@ static int offload_pedit_fields(struct mlx5e_priv *priv,
bool skip;
f = &fields[i];
/* avoid seeing bits set from previous iterations */
s_mask = 0;
a_mask = 0;
s_masks_p = (void *)set_masks + f->offset;
a_masks_p = (void *)add_masks + f->offset;
s_mask = *s_masks_p & f->field_mask;
a_mask = *a_masks_p & f->field_mask;
s_mask = mask_field_get(s_masks_p, f);
a_mask = mask_field_get(a_masks_p, f);
if (!s_mask && !a_mask) /* nothing to offload here */
continue;
@ -3239,22 +3246,20 @@ static int offload_pedit_fields(struct mlx5e_priv *priv,
match_mask, f->field_bsize))
skip = true;
/* clear to denote we consumed this field */
*s_masks_p &= ~f->field_mask;
mask_field_clear(s_masks_p, f);
} else {
cmd = MLX5_ACTION_TYPE_ADD;
mask = a_mask;
vals_p = (void *)add_vals + f->offset;
/* add 0 is no change */
if ((*(u32 *)vals_p & f->field_mask) == 0)
if (!mask_field_get(vals_p, f))
skip = true;
/* clear to denote we consumed this field */
*a_masks_p &= ~f->field_mask;
mask_field_clear(a_masks_p, f);
}
if (skip)
continue;
mask = mask_to_le(mask, f->field_bsize);
first = find_first_bit(&mask, f->field_bsize);
next_z = find_next_zero_bit(&mask, f->field_bsize, first);
last = find_last_bit(&mask, f->field_bsize);
@ -3281,10 +3286,9 @@ static int offload_pedit_fields(struct mlx5e_priv *priv,
MLX5_SET(set_action_in, action, field, f->field);
if (cmd == MLX5_ACTION_TYPE_SET) {
unsigned long field_mask = f->field_mask;
int start;
field_mask = mask_to_le(f->field_mask, f->field_bsize);
/* if field is bit sized it can start not from first bit */
start = find_first_bit(&field_mask, f->field_bsize);

View File

@ -399,9 +399,9 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
u8 metadata_index = be32_to_cpu(eseg->flow_table_metadata);
mlx5e_skb_cb_hwtstamp_init(skb);
mlx5e_ptpsq_track_metadata(sq->ptpsq, metadata_index);
mlx5e_ptp_metadata_map_put(&sq->ptpsq->metadata_map, skb,
metadata_index);
mlx5e_ptpsq_track_metadata(sq->ptpsq, metadata_index);
if (!netif_tx_queue_stopped(sq->txq) &&
mlx5e_ptpsq_metadata_freelist_empty(sq->ptpsq)) {
netif_tx_stop_queue(sq->txq);
@ -494,10 +494,10 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
err_drop:
stats->dropped++;
dev_kfree_skb_any(skb);
if (unlikely(sq->ptpsq && (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)))
mlx5e_ptp_metadata_fifo_push(&sq->ptpsq->metadata_freelist,
be32_to_cpu(eseg->flow_table_metadata));
dev_kfree_skb_any(skb);
mlx5e_tx_flush(sq);
}

View File

@ -885,11 +885,14 @@ static void comp_irq_release_sf(struct mlx5_core_dev *dev, u16 vecidx)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
struct mlx5_irq *irq;
int cpu;
irq = xa_load(&table->comp_irqs, vecidx);
if (!irq)
return;
cpu = cpumask_first(mlx5_irq_get_affinity_mask(irq));
cpumask_clear_cpu(cpu, &table->used_cpus);
xa_erase(&table->comp_irqs, vecidx);
mlx5_irq_affinity_irq_release(dev, irq);
}
@ -897,16 +900,26 @@ static void comp_irq_release_sf(struct mlx5_core_dev *dev, u16 vecidx)
static int comp_irq_request_sf(struct mlx5_core_dev *dev, u16 vecidx)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev);
struct irq_affinity_desc af_desc = {};
struct mlx5_irq *irq;
irq = mlx5_irq_affinity_irq_request_auto(dev, &table->used_cpus, vecidx);
if (IS_ERR(irq)) {
/* In case SF irq pool does not exist, fallback to the PF irqs*/
if (PTR_ERR(irq) == -ENOENT)
return comp_irq_request_pci(dev, vecidx);
/* In case SF irq pool does not exist, fallback to the PF irqs*/
if (!mlx5_irq_pool_is_sf_pool(pool))
return comp_irq_request_pci(dev, vecidx);
af_desc.is_managed = 1;
cpumask_copy(&af_desc.mask, cpu_online_mask);
cpumask_andnot(&af_desc.mask, &af_desc.mask, &table->used_cpus);
irq = mlx5_irq_affinity_request(pool, &af_desc);
if (IS_ERR(irq))
return PTR_ERR(irq);
}
cpumask_or(&table->used_cpus, &table->used_cpus, mlx5_irq_get_affinity_mask(irq));
mlx5_core_dbg(pool->dev, "IRQ %u mapped to cpu %*pbl, %u EQs on this irq\n",
pci_irq_vector(dev->pdev, mlx5_irq_get_index(irq)),
cpumask_pr_args(mlx5_irq_get_affinity_mask(irq)),
mlx5_irq_read_locked(irq) / MLX5_EQ_REFS_PER_IRQ);
return xa_err(xa_store(&table->comp_irqs, vecidx, irq, GFP_KERNEL));
}

View File

@ -984,7 +984,8 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw,
dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
if (rep->vport == MLX5_VPORT_UPLINK && on_esw->offloads.ft_ipsec_tx_pol) {
if (rep->vport == MLX5_VPORT_UPLINK &&
on_esw == from_esw && on_esw->offloads.ft_ipsec_tx_pol) {
dest.ft = on_esw->offloads.ft_ipsec_tx_pol;
flow_act.flags = FLOW_ACT_IGNORE_FLOW_LEVEL;
dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;

View File

@ -168,45 +168,3 @@ void mlx5_irq_affinity_irq_release(struct mlx5_core_dev *dev, struct mlx5_irq *i
if (pool->irqs_per_cpu)
cpu_put(pool, cpu);
}
/**
* mlx5_irq_affinity_irq_request_auto - request one IRQ for mlx5 device.
* @dev: mlx5 device that is requesting the IRQ.
* @used_cpus: cpumask of bounded cpus by the device
* @vecidx: vector index to request an IRQ for.
*
* Each IRQ is bounded to at most 1 CPU.
* This function is requesting an IRQ according to the default assignment.
* The default assignment policy is:
* - request the least loaded IRQ which is not bound to any
* CPU of the previous IRQs requested.
*
* On success, this function updates used_cpus mask and returns an irq pointer.
* In case of an error, an appropriate error pointer is returned.
*/
struct mlx5_irq *mlx5_irq_affinity_irq_request_auto(struct mlx5_core_dev *dev,
struct cpumask *used_cpus, u16 vecidx)
{
struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev);
struct irq_affinity_desc af_desc = {};
struct mlx5_irq *irq;
if (!mlx5_irq_pool_is_sf_pool(pool))
return ERR_PTR(-ENOENT);
af_desc.is_managed = 1;
cpumask_copy(&af_desc.mask, cpu_online_mask);
cpumask_andnot(&af_desc.mask, &af_desc.mask, used_cpus);
irq = mlx5_irq_affinity_request(pool, &af_desc);
if (IS_ERR(irq))
return irq;
cpumask_or(used_cpus, used_cpus, mlx5_irq_get_affinity_mask(irq));
mlx5_core_dbg(pool->dev, "IRQ %u mapped to cpu %*pbl, %u EQs on this irq\n",
pci_irq_vector(dev->pdev, mlx5_irq_get_index(irq)),
cpumask_pr_args(mlx5_irq_get_affinity_mask(irq)),
mlx5_irq_read_locked(irq) / MLX5_EQ_REFS_PER_IRQ);
return irq;
}

View File

@ -384,7 +384,12 @@ static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
static int mlx5_ptp_adjphase(struct ptp_clock_info *ptp, s32 delta)
{
return mlx5_ptp_adjtime(ptp, delta);
struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
struct mlx5_core_dev *mdev;
mdev = container_of(clock, struct mlx5_core_dev, clock);
return mlx5_ptp_adjtime_real_time(mdev, delta);
}
static int mlx5_ptp_freq_adj_real_time(struct mlx5_core_dev *mdev, long scaled_ppm)

View File

@ -28,7 +28,7 @@
struct mlx5_irq {
struct atomic_notifier_head nh;
cpumask_var_t mask;
char name[MLX5_MAX_IRQ_NAME];
char name[MLX5_MAX_IRQ_FORMATTED_NAME];
struct mlx5_irq_pool *pool;
int refcount;
struct msi_map map;
@ -292,8 +292,8 @@ struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
else
irq_sf_set_name(pool, name, i);
ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh);
snprintf(irq->name, MLX5_MAX_IRQ_NAME,
"%s@pci:%s", name, pci_name(dev->pdev));
snprintf(irq->name, MLX5_MAX_IRQ_FORMATTED_NAME,
MLX5_IRQ_NAME_FORMAT_STR, name, pci_name(dev->pdev));
err = request_irq(irq->map.virq, irq_int_handler, 0, irq->name,
&irq->nh);
if (err) {

View File

@ -7,6 +7,9 @@
#include <linux/mlx5/driver.h>
#define MLX5_MAX_IRQ_NAME (32)
#define MLX5_IRQ_NAME_FORMAT_STR ("%s@pci:%s")
#define MLX5_MAX_IRQ_FORMATTED_NAME \
(MLX5_MAX_IRQ_NAME + sizeof(MLX5_IRQ_NAME_FORMAT_STR))
/* max irq_index is 2047, so four chars */
#define MLX5_MAX_IRQ_IDX_CHARS (4)
#define MLX5_EQ_REFS_PER_IRQ (2)

View File

@ -57,7 +57,8 @@ static const char *dr_action_id_to_str(enum mlx5dr_action_type action_id)
static bool mlx5dr_action_supp_fwd_fdb_multi_ft(struct mlx5_core_dev *dev)
{
return (MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_any_table_limit_regc) ||
return (MLX5_CAP_GEN(dev, steering_format_version) < MLX5_STEERING_FORMAT_CONNECTX_6DX ||
MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_any_table_limit_regc) ||
MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_any_table));
}

View File

@ -52,7 +52,6 @@ struct dr_qp_init_attr {
u32 cqn;
u32 pdn;
u32 max_send_wr;
u32 max_send_sge;
struct mlx5_uars_page *uar;
u8 isolate_vl_tc:1;
};
@ -247,37 +246,6 @@ static int dr_poll_cq(struct mlx5dr_cq *dr_cq, int ne)
return err == CQ_POLL_ERR ? err : npolled;
}
static int dr_qp_get_args_update_send_wqe_size(struct dr_qp_init_attr *attr)
{
return roundup_pow_of_two(sizeof(struct mlx5_wqe_ctrl_seg) +
sizeof(struct mlx5_wqe_flow_update_ctrl_seg) +
sizeof(struct mlx5_wqe_header_modify_argument_update_seg));
}
/* We calculate for specific RC QP with the required functionality */
static int dr_qp_calc_rc_send_wqe(struct dr_qp_init_attr *attr)
{
int update_arg_size;
int inl_size = 0;
int tot_size;
int size;
update_arg_size = dr_qp_get_args_update_send_wqe_size(attr);
size = sizeof(struct mlx5_wqe_ctrl_seg) +
sizeof(struct mlx5_wqe_raddr_seg);
inl_size = size + ALIGN(sizeof(struct mlx5_wqe_inline_seg) +
DR_STE_SIZE, 16);
size += attr->max_send_sge * sizeof(struct mlx5_wqe_data_seg);
size = max(size, update_arg_size);
tot_size = max(size, inl_size);
return ALIGN(tot_size, MLX5_SEND_WQE_BB);
}
static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev,
struct dr_qp_init_attr *attr)
{
@ -285,7 +253,6 @@ static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev,
u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {};
struct mlx5_wq_param wqp;
struct mlx5dr_qp *dr_qp;
int wqe_size;
int inlen;
void *qpc;
void *in;
@ -365,15 +332,6 @@ static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev,
if (err)
goto err_in;
dr_qp->uar = attr->uar;
wqe_size = dr_qp_calc_rc_send_wqe(attr);
dr_qp->max_inline_data = min(wqe_size -
(sizeof(struct mlx5_wqe_ctrl_seg) +
sizeof(struct mlx5_wqe_raddr_seg) +
sizeof(struct mlx5_wqe_inline_seg)),
(2 * MLX5_SEND_WQE_BB -
(sizeof(struct mlx5_wqe_ctrl_seg) +
sizeof(struct mlx5_wqe_raddr_seg) +
sizeof(struct mlx5_wqe_inline_seg))));
return dr_qp;
@ -437,48 +395,8 @@ dr_rdma_handle_flow_access_arg_segments(struct mlx5_wqe_ctrl_seg *wq_ctrl,
MLX5_SEND_WQE_DS;
}
static int dr_set_data_inl_seg(struct mlx5dr_qp *dr_qp,
struct dr_data_seg *data_seg, void *wqe)
{
int inline_header_size = sizeof(struct mlx5_wqe_ctrl_seg) +
sizeof(struct mlx5_wqe_raddr_seg) +
sizeof(struct mlx5_wqe_inline_seg);
struct mlx5_wqe_inline_seg *seg;
int left_space;
int inl = 0;
void *addr;
int len;
int idx;
seg = wqe;
wqe += sizeof(*seg);
addr = (void *)(unsigned long)(data_seg->addr);
len = data_seg->length;
inl += len;
left_space = MLX5_SEND_WQE_BB - inline_header_size;
if (likely(len > left_space)) {
memcpy(wqe, addr, left_space);
len -= left_space;
addr += left_space;
idx = (dr_qp->sq.pc + 1) & (dr_qp->sq.wqe_cnt - 1);
wqe = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx);
}
memcpy(wqe, addr, len);
if (likely(inl)) {
seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG);
return DIV_ROUND_UP(inl + sizeof(seg->byte_count),
MLX5_SEND_WQE_DS);
} else {
return 0;
}
}
static void
dr_rdma_handle_icm_write_segments(struct mlx5dr_qp *dr_qp,
struct mlx5_wqe_ctrl_seg *wq_ctrl,
dr_rdma_handle_icm_write_segments(struct mlx5_wqe_ctrl_seg *wq_ctrl,
u64 remote_addr,
u32 rkey,
struct dr_data_seg *data_seg,
@ -494,17 +412,15 @@ dr_rdma_handle_icm_write_segments(struct mlx5dr_qp *dr_qp,
wq_raddr->reserved = 0;
wq_dseg = (void *)(wq_raddr + 1);
/* WQE ctrl segment + WQE remote addr segment */
*size = (sizeof(*wq_ctrl) + sizeof(*wq_raddr)) / MLX5_SEND_WQE_DS;
if (data_seg->send_flags & IB_SEND_INLINE) {
*size += dr_set_data_inl_seg(dr_qp, data_seg, wq_dseg);
} else {
wq_dseg->byte_count = cpu_to_be32(data_seg->length);
wq_dseg->lkey = cpu_to_be32(data_seg->lkey);
wq_dseg->addr = cpu_to_be64(data_seg->addr);
*size += sizeof(*wq_dseg) / MLX5_SEND_WQE_DS; /* WQE data segment */
}
wq_dseg->byte_count = cpu_to_be32(data_seg->length);
wq_dseg->lkey = cpu_to_be32(data_seg->lkey);
wq_dseg->addr = cpu_to_be64(data_seg->addr);
*size = (sizeof(*wq_ctrl) + /* WQE ctrl segment */
sizeof(*wq_dseg) + /* WQE data segment */
sizeof(*wq_raddr)) / /* WQE remote addr segment */
MLX5_SEND_WQE_DS;
}
static void dr_set_ctrl_seg(struct mlx5_wqe_ctrl_seg *wq_ctrl,
@ -535,7 +451,7 @@ static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr,
switch (opcode) {
case MLX5_OPCODE_RDMA_READ:
case MLX5_OPCODE_RDMA_WRITE:
dr_rdma_handle_icm_write_segments(dr_qp, wq_ctrl, remote_addr,
dr_rdma_handle_icm_write_segments(wq_ctrl, remote_addr,
rkey, data_seg, &size);
break;
case MLX5_OPCODE_FLOW_TBL_ACCESS:
@ -656,7 +572,7 @@ static void dr_fill_write_args_segs(struct mlx5dr_send_ring *send_ring,
if (send_ring->pending_wqe % send_ring->signal_th == 0)
send_info->write.send_flags |= IB_SEND_SIGNALED;
else
send_info->write.send_flags &= ~IB_SEND_SIGNALED;
send_info->write.send_flags = 0;
}
static void dr_fill_write_icm_segs(struct mlx5dr_domain *dmn,
@ -680,13 +596,9 @@ static void dr_fill_write_icm_segs(struct mlx5dr_domain *dmn,
}
send_ring->pending_wqe++;
if (!send_info->write.lkey)
send_info->write.send_flags |= IB_SEND_INLINE;
if (send_ring->pending_wqe % send_ring->signal_th == 0)
send_info->write.send_flags |= IB_SEND_SIGNALED;
else
send_info->write.send_flags &= ~IB_SEND_SIGNALED;
send_ring->pending_wqe++;
send_info->read.length = send_info->write.length;
@ -696,9 +608,9 @@ static void dr_fill_write_icm_segs(struct mlx5dr_domain *dmn,
send_info->read.lkey = send_ring->sync_mr->mkey;
if (send_ring->pending_wqe % send_ring->signal_th == 0)
send_info->read.send_flags |= IB_SEND_SIGNALED;
send_info->read.send_flags = IB_SEND_SIGNALED;
else
send_info->read.send_flags &= ~IB_SEND_SIGNALED;
send_info->read.send_flags = 0;
}
static void dr_fill_data_segs(struct mlx5dr_domain *dmn,
@ -1345,7 +1257,6 @@ int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn)
dmn->send_ring->cq->qp = dmn->send_ring->qp;
dmn->info.max_send_wr = QUEUE_SIZE;
init_attr.max_send_sge = 1;
dmn->info.max_inline_size = min(dmn->send_ring->qp->max_inline_data,
DR_STE_SIZE);

View File

@ -624,6 +624,7 @@ struct rtl8169_private {
unsigned supports_gmii:1;
unsigned aspm_manageable:1;
unsigned dash_enabled:1;
dma_addr_t counters_phys_addr;
struct rtl8169_counters *counters;
struct rtl8169_tc_offsets tc_offset;
@ -1253,14 +1254,26 @@ static bool r8168ep_check_dash(struct rtl8169_private *tp)
return r8168ep_ocp_read(tp, 0x128) & BIT(0);
}
static enum rtl_dash_type rtl_check_dash(struct rtl8169_private *tp)
static bool rtl_dash_is_enabled(struct rtl8169_private *tp)
{
switch (tp->dash_type) {
case RTL_DASH_DP:
return r8168dp_check_dash(tp);
case RTL_DASH_EP:
return r8168ep_check_dash(tp);
default:
return false;
}
}
static enum rtl_dash_type rtl_get_dash_type(struct rtl8169_private *tp)
{
switch (tp->mac_version) {
case RTL_GIGA_MAC_VER_28:
case RTL_GIGA_MAC_VER_31:
return r8168dp_check_dash(tp) ? RTL_DASH_DP : RTL_DASH_NONE;
return RTL_DASH_DP;
case RTL_GIGA_MAC_VER_51 ... RTL_GIGA_MAC_VER_53:
return r8168ep_check_dash(tp) ? RTL_DASH_EP : RTL_DASH_NONE;
return RTL_DASH_EP;
default:
return RTL_DASH_NONE;
}
@ -1453,7 +1466,7 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
device_set_wakeup_enable(tp_to_dev(tp), wolopts);
if (tp->dash_type == RTL_DASH_NONE) {
if (!tp->dash_enabled) {
rtl_set_d3_pll_down(tp, !wolopts);
tp->dev->wol_enabled = wolopts ? 1 : 0;
}
@ -2512,7 +2525,7 @@ static void rtl_wol_enable_rx(struct rtl8169_private *tp)
static void rtl_prepare_power_down(struct rtl8169_private *tp)
{
if (tp->dash_type != RTL_DASH_NONE)
if (tp->dash_enabled)
return;
if (tp->mac_version == RTL_GIGA_MAC_VER_32 ||
@ -4648,10 +4661,16 @@ static void rtl8169_down(struct rtl8169_private *tp)
rtl8169_cleanup(tp);
rtl_disable_exit_l1(tp);
rtl_prepare_power_down(tp);
if (tp->dash_type != RTL_DASH_NONE)
rtl8168_driver_stop(tp);
}
static void rtl8169_up(struct rtl8169_private *tp)
{
if (tp->dash_type != RTL_DASH_NONE)
rtl8168_driver_start(tp);
pci_set_master(tp->pci_dev);
phy_init_hw(tp->phydev);
phy_resume(tp->phydev);
@ -4869,7 +4888,7 @@ static int rtl8169_runtime_idle(struct device *device)
{
struct rtl8169_private *tp = dev_get_drvdata(device);
if (tp->dash_type != RTL_DASH_NONE)
if (tp->dash_enabled)
return -EBUSY;
if (!netif_running(tp->dev) || !netif_carrier_ok(tp->dev))
@ -4895,8 +4914,7 @@ static void rtl_shutdown(struct pci_dev *pdev)
/* Restore original MAC address */
rtl_rar_set(tp, tp->dev->perm_addr);
if (system_state == SYSTEM_POWER_OFF &&
tp->dash_type == RTL_DASH_NONE) {
if (system_state == SYSTEM_POWER_OFF && !tp->dash_enabled) {
pci_wake_from_d3(pdev, tp->saved_wolopts);
pci_set_power_state(pdev, PCI_D3hot);
}
@ -5254,7 +5272,8 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1);
tp->aspm_manageable = !rc;
tp->dash_type = rtl_check_dash(tp);
tp->dash_type = rtl_get_dash_type(tp);
tp->dash_enabled = rtl_dash_is_enabled(tp);
tp->cp_cmd = RTL_R16(tp, CPlusCmd) & CPCMD_MASK;
@ -5325,7 +5344,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
/* configure chip for default features */
rtl8169_set_features(dev, dev->features);
if (tp->dash_type == RTL_DASH_NONE) {
if (!tp->dash_enabled) {
rtl_set_d3_pll_down(tp, true);
} else {
rtl_set_d3_pll_down(tp, false);
@ -5365,7 +5384,8 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
"ok" : "ko");
if (tp->dash_type != RTL_DASH_NONE) {
netdev_info(dev, "DASH enabled\n");
netdev_info(dev, "DASH %s\n",
tp->dash_enabled ? "enabled" : "disabled");
rtl8168_driver_start(tp);
}

View File

@ -5293,6 +5293,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
dma_dir = page_pool_get_dma_dir(rx_q->page_pool);
buf_sz = DIV_ROUND_UP(priv->dma_conf.dma_buf_sz, PAGE_SIZE) * PAGE_SIZE;
limit = min(priv->dma_conf.dma_rx_size - 1, (unsigned int)limit);
if (netif_msg_rx_status(priv)) {
void *rx_head;
@ -5328,10 +5329,10 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
len = 0;
}
read_again:
if (count >= limit)
break;
read_again:
buf1_len = 0;
buf2_len = 0;
entry = next_entry;

View File

@ -2063,7 +2063,7 @@ static int prueth_probe(struct platform_device *pdev)
&prueth->shram);
if (ret) {
dev_err(dev, "unable to get PRUSS SHRD RAM2: %d\n", ret);
pruss_put(prueth->pruss);
goto put_pruss;
}
prueth->sram_pool = of_gen_pool_get(np, "sram", 0);
@ -2105,10 +2105,7 @@ static int prueth_probe(struct platform_device *pdev)
prueth->iep1 = icss_iep_get_idx(np, 1);
if (IS_ERR(prueth->iep1)) {
ret = dev_err_probe(dev, PTR_ERR(prueth->iep1), "iep1 get failed\n");
icss_iep_put(prueth->iep0);
prueth->iep0 = NULL;
prueth->iep1 = NULL;
goto free_pool;
goto put_iep0;
}
if (prueth->pdata.quirk_10m_link_issue) {
@ -2205,6 +2202,12 @@ netdev_exit:
exit_iep:
if (prueth->pdata.quirk_10m_link_issue)
icss_iep_exit_fw(prueth->iep1);
icss_iep_put(prueth->iep1);
put_iep0:
icss_iep_put(prueth->iep0);
prueth->iep0 = NULL;
prueth->iep1 = NULL;
free_pool:
gen_pool_free(prueth->sram_pool,
@ -2212,6 +2215,8 @@ free_pool:
put_mem:
pruss_release_mem_region(prueth->pruss, &prueth->shram);
put_pruss:
pruss_put(prueth->pruss);
put_cores:

View File

@ -411,7 +411,7 @@ struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port, void *lyr3h,
return addr;
}
static int ipvlan_process_v4_outbound(struct sk_buff *skb)
static noinline_for_stack int ipvlan_process_v4_outbound(struct sk_buff *skb)
{
const struct iphdr *ip4h = ip_hdr(skb);
struct net_device *dev = skb->dev;
@ -453,13 +453,11 @@ out:
}
#if IS_ENABLED(CONFIG_IPV6)
static int ipvlan_process_v6_outbound(struct sk_buff *skb)
static noinline_for_stack int
ipvlan_route_v6_outbound(struct net_device *dev, struct sk_buff *skb)
{
const struct ipv6hdr *ip6h = ipv6_hdr(skb);
struct net_device *dev = skb->dev;
struct net *net = dev_net(dev);
struct dst_entry *dst;
int err, ret = NET_XMIT_DROP;
struct flowi6 fl6 = {
.flowi6_oif = dev->ifindex,
.daddr = ip6h->daddr,
@ -469,27 +467,38 @@ static int ipvlan_process_v6_outbound(struct sk_buff *skb)
.flowi6_mark = skb->mark,
.flowi6_proto = ip6h->nexthdr,
};
struct dst_entry *dst;
int err;
dst = ip6_route_output(net, NULL, &fl6);
if (dst->error) {
ret = dst->error;
dst = ip6_route_output(dev_net(dev), NULL, &fl6);
err = dst->error;
if (err) {
dst_release(dst);
goto err;
return err;
}
skb_dst_set(skb, dst);
return 0;
}
static int ipvlan_process_v6_outbound(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
int err, ret = NET_XMIT_DROP;
err = ipvlan_route_v6_outbound(dev, skb);
if (unlikely(err)) {
DEV_STATS_INC(dev, tx_errors);
kfree_skb(skb);
return err;
}
memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
err = ip6_local_out(net, skb->sk, skb);
err = ip6_local_out(dev_net(dev), skb->sk, skb);
if (unlikely(net_xmit_eval(err)))
DEV_STATS_INC(dev, tx_errors);
else
ret = NET_XMIT_SUCCESS;
goto out;
err:
DEV_STATS_INC(dev, tx_errors);
kfree_skb(skb);
out:
return ret;
}
#else

View File

@ -780,7 +780,7 @@ static void macvlan_change_rx_flags(struct net_device *dev, int change)
if (dev->flags & IFF_UP) {
if (change & IFF_ALLMULTI)
dev_set_allmulti(lowerdev, dev->flags & IFF_ALLMULTI ? 1 : -1);
if (change & IFF_PROMISC)
if (!macvlan_passthru(vlan->port) && change & IFF_PROMISC)
dev_set_promiscuity(lowerdev,
dev->flags & IFF_PROMISC ? 1 : -1);

View File

@ -453,6 +453,10 @@ ppp_sync_ioctl(struct ppp_channel *chan, unsigned int cmd, unsigned long arg)
case PPPIOCSMRU:
if (get_user(val, (int __user *) argp))
break;
if (val > U16_MAX) {
err = -EINVAL;
break;
}
if (val < PPP_MRU)
val = PPP_MRU;
ap->mru = val;
@ -687,7 +691,7 @@ ppp_sync_input(struct syncppp *ap, const u8 *buf, const u8 *flags, int count)
/* strip address/control field if present */
p = skb->data;
if (p[0] == PPP_ALLSTATIONS && p[1] == PPP_UI) {
if (skb->len >= 2 && p[0] == PPP_ALLSTATIONS && p[1] == PPP_UI) {
/* chop off address/control */
if (skb->len < 3)
goto err;

View File

@ -572,7 +572,8 @@ ssize_t ptp_read(struct posix_clock_context *pccontext, uint rdflags,
for (i = 0; i < cnt; i++) {
event[i] = queue->buf[queue->head];
queue->head = (queue->head + 1) % PTP_MAX_TIMESTAMPS;
/* Paired with READ_ONCE() in queue_cnt() */
WRITE_ONCE(queue->head, (queue->head + 1) % PTP_MAX_TIMESTAMPS);
}
spin_unlock_irqrestore(&queue->lock, flags);

View File

@ -57,10 +57,11 @@ static void enqueue_external_timestamp(struct timestamp_event_queue *queue,
dst->t.sec = seconds;
dst->t.nsec = remainder;
/* Both WRITE_ONCE() are paired with READ_ONCE() in queue_cnt() */
if (!queue_free(queue))
queue->head = (queue->head + 1) % PTP_MAX_TIMESTAMPS;
WRITE_ONCE(queue->head, (queue->head + 1) % PTP_MAX_TIMESTAMPS);
queue->tail = (queue->tail + 1) % PTP_MAX_TIMESTAMPS;
WRITE_ONCE(queue->tail, (queue->tail + 1) % PTP_MAX_TIMESTAMPS);
spin_unlock_irqrestore(&queue->lock, flags);
}

View File

@ -85,9 +85,13 @@ struct ptp_vclock {
* that a writer might concurrently increment the tail does not
* matter, since the queue remains nonempty nonetheless.
*/
static inline int queue_cnt(struct timestamp_event_queue *q)
static inline int queue_cnt(const struct timestamp_event_queue *q)
{
int cnt = q->tail - q->head;
/*
* Paired with WRITE_ONCE() in enqueue_external_timestamp(),
* ptp_read(), extts_fifo_show().
*/
int cnt = READ_ONCE(q->tail) - READ_ONCE(q->head);
return cnt < 0 ? PTP_MAX_TIMESTAMPS + cnt : cnt;
}

View File

@ -94,7 +94,8 @@ static ssize_t extts_fifo_show(struct device *dev,
qcnt = queue_cnt(queue);
if (qcnt) {
event = queue->buf[queue->head];
queue->head = (queue->head + 1) % PTP_MAX_TIMESTAMPS;
/* Paired with READ_ONCE() in queue_cnt() */
WRITE_ONCE(queue->head, (queue->head + 1) % PTP_MAX_TIMESTAMPS);
}
spin_unlock_irqrestore(&queue->lock, flags);

View File

@ -437,7 +437,7 @@ static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
if (blk->shared_backend) {
blk->buffer = shared_buffer;
} else {
blk->buffer = kvmalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT,
blk->buffer = kvzalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT,
GFP_KERNEL);
if (!blk->buffer) {
ret = -ENOMEM;
@ -495,7 +495,7 @@ static int __init vdpasim_blk_init(void)
goto parent_err;
if (shared_backend) {
shared_buffer = kvmalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT,
shared_buffer = kvzalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT,
GFP_KERNEL);
if (!shared_buffer) {
ret = -ENOMEM;

View File

@ -1582,7 +1582,6 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa)
err:
put_device(&v->dev);
ida_simple_remove(&vhost_vdpa_ida, v->minor);
return r;
}

View File

@ -242,7 +242,7 @@ void vp_del_vqs(struct virtio_device *vdev)
if (v != VIRTIO_MSI_NO_VECTOR) {
int irq = pci_irq_vector(vp_dev->pci_dev, v);
irq_set_affinity_hint(irq, NULL);
irq_update_affinity_hint(irq, NULL);
free_irq(irq, vq);
}
}
@ -443,10 +443,10 @@ int vp_set_vq_affinity(struct virtqueue *vq, const struct cpumask *cpu_mask)
mask = vp_dev->msix_affinity_masks[info->msix_vector];
irq = pci_irq_vector(vp_dev->pci_dev, info->msix_vector);
if (!cpu_mask)
irq_set_affinity_hint(irq, NULL);
irq_update_affinity_hint(irq, NULL);
else {
cpumask_copy(mask, cpu_mask);
irq_set_affinity_hint(irq, mask);
irq_set_affinity_and_hint(irq, mask);
}
}
return 0;

View File

@ -294,9 +294,10 @@ int vp_modern_probe(struct virtio_pci_modern_device *mdev)
err = -EINVAL;
mdev->common = vp_modern_map_capability(mdev, common,
sizeof(struct virtio_pci_common_cfg), 4,
0, sizeof(struct virtio_pci_modern_common_cfg),
&mdev->common_len, NULL);
sizeof(struct virtio_pci_common_cfg), 4, 0,
offsetofend(struct virtio_pci_modern_common_cfg,
queue_reset),
&mdev->common_len, NULL);
if (!mdev->common)
goto err_map_common;
mdev->isr = vp_modern_map_capability(mdev, isr, sizeof(u8), 1,

View File

@ -171,11 +171,11 @@ static void evtchn_2l_handle_events(unsigned cpu, struct evtchn_loop_ctrl *ctrl)
int i;
struct shared_info *s = HYPERVISOR_shared_info;
struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
evtchn_port_t evtchn;
/* Timer interrupt has highest priority. */
irq = irq_from_virq(cpu, VIRQ_TIMER);
irq = irq_evtchn_from_virq(cpu, VIRQ_TIMER, &evtchn);
if (irq != -1) {
evtchn_port_t evtchn = evtchn_from_irq(irq);
word_idx = evtchn / BITS_PER_LONG;
bit_idx = evtchn % BITS_PER_LONG;
if (active_evtchns(cpu, s, word_idx) & (1ULL << bit_idx))
@ -328,9 +328,9 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
for (i = 0; i < EVTCHN_2L_NR_CHANNELS; i++) {
if (sync_test_bit(i, BM(sh->evtchn_pending))) {
int word_idx = i / BITS_PER_EVTCHN_WORD;
printk(" %d: event %d -> irq %d%s%s%s\n",
printk(" %d: event %d -> irq %u%s%s%s\n",
cpu_from_evtchn(i), i,
get_evtchn_to_irq(i),
irq_from_evtchn(i),
sync_test_bit(word_idx, BM(&v->evtchn_pending_sel))
? "" : " l2-clear",
!sync_test_bit(i, BM(sh->evtchn_mask))

File diff suppressed because it is too large Load Diff

View File

@ -33,7 +33,6 @@ struct evtchn_ops {
extern const struct evtchn_ops *evtchn_ops;
int get_evtchn_to_irq(evtchn_port_t evtchn);
void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl);
unsigned int cpu_from_evtchn(evtchn_port_t evtchn);

View File

@ -47,6 +47,9 @@
#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>
#ifdef CONFIG_ACPI
#include <acpi/processor.h>
#endif
/*
* @cpu_id: Xen physical cpu logic number
@ -400,4 +403,23 @@ bool __init xen_processor_present(uint32_t acpi_id)
return online;
}
void xen_sanitize_proc_cap_bits(uint32_t *cap)
{
struct xen_platform_op op = {
.cmd = XENPF_set_processor_pminfo,
.u.set_pminfo.id = -1,
.u.set_pminfo.type = XEN_PM_PDC,
};
u32 buf[3] = { ACPI_PDC_REVISION_ID, 1, *cap };
int ret;
set_xen_guest_handle(op.u.set_pminfo.pdc, buf);
ret = HYPERVISOR_platform_op(&op);
if (ret)
pr_err("sanitize of _PDC buffer bits from Xen failed: %d\n",
ret);
else
*cap = buf[2];
}
#endif

View File

@ -21,7 +21,7 @@
#include <xen/xen-front-pgdir-shbuf.h>
/**
/*
* This structure represents the structure of a shared page
* that contains grant references to the pages of the shared
* buffer. This structure is common to many Xen para-virtualized
@ -33,7 +33,7 @@ struct xen_page_directory {
grant_ref_t gref[]; /* Variable length */
};
/**
/*
* Shared buffer ops which are differently implemented
* depending on the allocation mode, e.g. if the buffer
* is allocated by the corresponding backend or frontend.
@ -61,7 +61,7 @@ struct xen_front_pgdir_shbuf_ops {
int (*unmap)(struct xen_front_pgdir_shbuf *buf);
};
/**
/*
* Get granted reference to the very first page of the
* page directory. Usually this is passed to the backend,
* so it can find/fill the grant references to the buffer's
@ -81,7 +81,7 @@ xen_front_pgdir_shbuf_get_dir_start(struct xen_front_pgdir_shbuf *buf)
}
EXPORT_SYMBOL_GPL(xen_front_pgdir_shbuf_get_dir_start);
/**
/*
* Map granted references of the shared buffer.
*
* Depending on the shared buffer mode of allocation
@ -102,7 +102,7 @@ int xen_front_pgdir_shbuf_map(struct xen_front_pgdir_shbuf *buf)
}
EXPORT_SYMBOL_GPL(xen_front_pgdir_shbuf_map);
/**
/*
* Unmap granted references of the shared buffer.
*
* Depending on the shared buffer mode of allocation
@ -123,7 +123,7 @@ int xen_front_pgdir_shbuf_unmap(struct xen_front_pgdir_shbuf *buf)
}
EXPORT_SYMBOL_GPL(xen_front_pgdir_shbuf_unmap);
/**
/*
* Free all the resources of the shared buffer.
*
* \param buf shared buffer which resources to be freed.
@ -150,7 +150,7 @@ EXPORT_SYMBOL_GPL(xen_front_pgdir_shbuf_free);
offsetof(struct xen_page_directory, \
gref)) / sizeof(grant_ref_t))
/**
/*
* Get the number of pages the page directory consumes itself.
*
* \param buf shared buffer.
@ -160,7 +160,7 @@ static int get_num_pages_dir(struct xen_front_pgdir_shbuf *buf)
return DIV_ROUND_UP(buf->num_pages, XEN_NUM_GREFS_PER_PAGE);
}
/**
/*
* Calculate the number of grant references needed to share the buffer
* and its pages when backend allocates the buffer.
*
@ -172,7 +172,7 @@ static void backend_calc_num_grefs(struct xen_front_pgdir_shbuf *buf)
buf->num_grefs = get_num_pages_dir(buf);
}
/**
/*
* Calculate the number of grant references needed to share the buffer
* and its pages when frontend allocates the buffer.
*
@ -190,7 +190,7 @@ static void guest_calc_num_grefs(struct xen_front_pgdir_shbuf *buf)
#define xen_page_to_vaddr(page) \
((uintptr_t)pfn_to_kaddr(page_to_xen_pfn(page)))
/**
/*
* Unmap the buffer previously mapped with grant references
* provided by the backend.
*
@ -238,7 +238,7 @@ static int backend_unmap(struct xen_front_pgdir_shbuf *buf)
return ret;
}
/**
/*
* Map the buffer with grant references provided by the backend.
*
* \param buf shared buffer.
@ -320,7 +320,7 @@ static int backend_map(struct xen_front_pgdir_shbuf *buf)
return ret;
}
/**
/*
* Fill page directory with grant references to the pages of the
* page directory itself.
*
@ -350,7 +350,7 @@ static void backend_fill_page_dir(struct xen_front_pgdir_shbuf *buf)
page_dir->gref_dir_next_page = XEN_GREF_LIST_END;
}
/**
/*
* Fill page directory with grant references to the pages of the
* page directory and the buffer we share with the backend.
*
@ -389,7 +389,7 @@ static void guest_fill_page_dir(struct xen_front_pgdir_shbuf *buf)
}
}
/**
/*
* Grant references to the frontend's buffer pages.
*
* These will be shared with the backend, so it can
@ -418,7 +418,7 @@ static int guest_grant_refs_for_buffer(struct xen_front_pgdir_shbuf *buf,
return 0;
}
/**
/*
* Grant all the references needed to share the buffer.
*
* Grant references to the page directory pages and, if
@ -466,7 +466,7 @@ static int grant_references(struct xen_front_pgdir_shbuf *buf)
return 0;
}
/**
/*
* Allocate all required structures to mange shared buffer.
*
* \param buf shared buffer.
@ -506,7 +506,7 @@ static const struct xen_front_pgdir_shbuf_ops local_ops = {
.grant_refs_for_buffer = guest_grant_refs_for_buffer,
};
/**
/*
* Allocate a new instance of a shared buffer.
*
* \param cfg configuration to be used while allocating a new shared buffer.

View File

@ -56,7 +56,7 @@ extern struct idr btf_idr;
extern spinlock_t btf_idr_lock;
extern struct kobject *btf_kobj;
extern struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma;
extern bool bpf_global_ma_set, bpf_global_percpu_ma_set;
extern bool bpf_global_ma_set;
typedef u64 (*bpf_callback_t)(u64, u64, u64, u64, u64);
typedef int (*bpf_iter_init_seq_priv_t)(void *private_data,
@ -909,10 +909,14 @@ bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size)
aux->ctx_field_size = size;
}
static bool bpf_is_ldimm64(const struct bpf_insn *insn)
{
return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
}
static inline bool bpf_pseudo_func(const struct bpf_insn *insn)
{
return insn->code == (BPF_LD | BPF_IMM | BPF_DW) &&
insn->src_reg == BPF_PSEUDO_FUNC;
return bpf_is_ldimm64(insn) && insn->src_reg == BPF_PSEUDO_FUNC;
}
struct bpf_prog_ops {

View File

@ -420,7 +420,7 @@ static inline u32 linkmode_adv_to_mii_t1_adv_m_t(unsigned long *advertising)
* A function that translates value of following registers to the linkmode:
* IEEE 802.3-2018 45.2.3.10 "EEE control and capability 1" register (3.20)
* IEEE 802.3-2018 45.2.7.13 "EEE advertisement 1" register (7.60)
* IEEE 802.3-2018 45.2.7.14 "EEE "link partner ability 1 register (7.61)
* IEEE 802.3-2018 45.2.7.14 "EEE link partner ability 1" register (7.61)
*/
static inline void mii_eee_cap1_mod_linkmode_t(unsigned long *adv, u32 val)
{

View File

@ -5,13 +5,6 @@
#include <linux/pci.h>
#include <linux/virtio_pci.h>
struct virtio_pci_modern_common_cfg {
struct virtio_pci_common_cfg cfg;
__le16 queue_notify_data; /* read-write */
__le16 queue_reset; /* read-write */
};
/**
* struct virtio_pci_modern_device - info for modern PCI virtio
* @pci_dev: Ptr to the PCI device struct

View File

@ -178,9 +178,9 @@ static inline __be32 nft_reg_load_be32(const u32 *sreg)
return *(__force __be32 *)sreg;
}
static inline void nft_reg_store64(u32 *dreg, u64 val)
static inline void nft_reg_store64(u64 *dreg, u64 val)
{
put_unaligned(val, (u64 *)dreg);
put_unaligned(val, dreg);
}
static inline u64 nft_reg_load64(const u32 *sreg)

View File

@ -58,6 +58,11 @@ static inline struct nf_flowtable *tcf_ct_ft(const struct tc_action *a)
return to_ct_params(a)->nf_ft;
}
static inline struct nf_conntrack_helper *tcf_ct_helper(const struct tc_action *a)
{
return to_ct_params(a)->helper;
}
#else
static inline uint16_t tcf_ct_zone(const struct tc_action *a) { return 0; }
static inline int tcf_ct_action(const struct tc_action *a) { return 0; }
@ -65,6 +70,10 @@ static inline struct nf_flowtable *tcf_ct_ft(const struct tc_action *a)
{
return NULL;
}
static inline struct nf_conntrack_helper *tcf_ct_helper(const struct tc_action *a)
{
return NULL;
}
#endif /* CONFIG_NF_CONNTRACK */
#if IS_ENABLED(CONFIG_NET_ACT_CT)

View File

@ -166,6 +166,17 @@ struct virtio_pci_common_cfg {
__le32 queue_used_hi; /* read-write */
};
/*
* Warning: do not use sizeof on this: use offsetofend for
* specific fields you need.
*/
struct virtio_pci_modern_common_cfg {
struct virtio_pci_common_cfg cfg;
__le16 queue_notify_data; /* read-write */
__le16 queue_reset; /* read-write */
};
/* Fields in VIRTIO_PCI_CAP_PCI_CFG: */
struct virtio_pci_cfg_cap {
struct virtio_pci_cap cap;

View File

@ -88,7 +88,6 @@ void xen_irq_resume(void);
/* Clear an irq's pending state, in preparation for polling on it */
void xen_clear_irq_pending(int irq);
void xen_set_irq_pending(int irq);
bool xen_test_irq_pending(int irq);
/* Poll waiting for an irq to become pending. In the usual case, the
@ -101,8 +100,8 @@ void xen_poll_irq_timeout(int irq, u64 timeout);
/* Determine the IRQ which is bound to an event channel */
unsigned int irq_from_evtchn(evtchn_port_t evtchn);
int irq_from_virq(unsigned int cpu, unsigned int virq);
evtchn_port_t evtchn_from_irq(unsigned irq);
int irq_evtchn_from_virq(unsigned int cpu, unsigned int virq,
evtchn_port_t *evtchn);
int xen_set_callback_via(uint64_t via);
int xen_evtchn_do_upcall(void);
@ -122,9 +121,6 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
/* De-allocates the above mentioned physical interrupt. */
int xen_destroy_irq(int irq);
/* Return irq from pirq */
int xen_irq_from_pirq(unsigned pirq);
/* Return the pirq allocated to the irq. */
int xen_pirq_from_irq(unsigned irq);

View File

@ -64,8 +64,8 @@
#define OFF insn->off
#define IMM insn->imm
struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma;
bool bpf_global_ma_set, bpf_global_percpu_ma_set;
struct bpf_mem_alloc bpf_global_ma;
bool bpf_global_ma_set;
/* No hurry in this branch
*
@ -2934,9 +2934,7 @@ static int __init bpf_global_ma_init(void)
ret = bpf_mem_alloc_init(&bpf_global_ma, 0, false);
bpf_global_ma_set = !ret;
ret = bpf_mem_alloc_init(&bpf_global_percpu_ma, 0, true);
bpf_global_percpu_ma_set = !ret;
return !bpf_global_ma_set || !bpf_global_percpu_ma_set;
return ret;
}
late_initcall(bpf_global_ma_init);
#endif

View File

@ -26,6 +26,7 @@
#include <linux/poison.h>
#include <linux/module.h>
#include <linux/cpumask.h>
#include <linux/bpf_mem_alloc.h>
#include <net/xdp.h>
#include "disasm.h"
@ -41,6 +42,9 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
#undef BPF_LINK_TYPE
};
struct bpf_mem_alloc bpf_global_percpu_ma;
static bool bpf_global_percpu_ma_set;
/* bpf_check() is a static code analyzer that walks eBPF program
* instruction by instruction and updates register/stack state.
* All paths of conditional branches are analyzed until 'bpf_exit' insn.
@ -336,6 +340,7 @@ struct bpf_kfunc_call_arg_meta {
struct btf *btf_vmlinux;
static DEFINE_MUTEX(bpf_verifier_lock);
static DEFINE_MUTEX(bpf_percpu_ma_lock);
static const struct bpf_line_info *
find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
@ -3516,12 +3521,29 @@ static int push_jmp_history(struct bpf_verifier_env *env,
/* Backtrack one insn at a time. If idx is not at the top of recorded
* history then previous instruction came from straight line execution.
* Return -ENOENT if we exhausted all instructions within given state.
*
* It's legal to have a bit of a looping with the same starting and ending
* insn index within the same state, e.g.: 3->4->5->3, so just because current
* instruction index is the same as state's first_idx doesn't mean we are
* done. If there is still some jump history left, we should keep going. We
* need to take into account that we might have a jump history between given
* state's parent and itself, due to checkpointing. In this case, we'll have
* history entry recording a jump from last instruction of parent state and
* first instruction of given state.
*/
static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
u32 *history)
{
u32 cnt = *history;
if (i == st->first_insn_idx) {
if (cnt == 0)
return -ENOENT;
if (cnt == 1 && st->jmp_history[0].idx == i)
return -ENOENT;
}
if (cnt && st->jmp_history[cnt - 1].idx == i) {
i = st->jmp_history[cnt - 1].prev_idx;
(*history)--;
@ -4401,10 +4423,10 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
* Nothing to be tracked further in the parent state.
*/
return 0;
if (i == first_idx)
break;
subseq_idx = i;
i = get_prev_insn_idx(st, i, &history);
if (i == -ENOENT)
break;
if (i >= env->prog->len) {
/* This can happen if backtracking reached insn 0
* and there are still reg_mask or stack_mask
@ -12074,8 +12096,19 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] && !bpf_global_ma_set)
return -ENOMEM;
if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl] && !bpf_global_percpu_ma_set)
return -ENOMEM;
if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
if (!bpf_global_percpu_ma_set) {
mutex_lock(&bpf_percpu_ma_lock);
if (!bpf_global_percpu_ma_set) {
err = bpf_mem_alloc_init(&bpf_global_percpu_ma, 0, true);
if (!err)
bpf_global_percpu_ma_set = true;
}
mutex_unlock(&bpf_percpu_ma_lock);
if (err)
return err;
}
}
if (((u64)(u32)meta.arg_constant.value) != meta.arg_constant.value) {
verbose(env, "local type ID argument must be in range [0, U32_MAX]\n");
@ -15386,8 +15419,7 @@ enum {
* w - next instruction
* e - edge
*/
static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
bool loop_ok)
static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
{
int *insn_stack = env->cfg.insn_stack;
int *insn_state = env->cfg.insn_state;
@ -15419,7 +15451,7 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
insn_stack[env->cfg.cur_stack++] = w;
return KEEP_EXPLORING;
} else if ((insn_state[w] & 0xF0) == DISCOVERED) {
if (loop_ok && env->bpf_capable)
if (env->bpf_capable)
return DONE_EXPLORING;
verbose_linfo(env, t, "%d: ", t);
verbose_linfo(env, w, "%d: ", w);
@ -15439,24 +15471,20 @@ static int visit_func_call_insn(int t, struct bpf_insn *insns,
struct bpf_verifier_env *env,
bool visit_callee)
{
int ret;
int ret, insn_sz;
ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
insn_sz = bpf_is_ldimm64(&insns[t]) ? 2 : 1;
ret = push_insn(t, t + insn_sz, FALLTHROUGH, env);
if (ret)
return ret;
mark_prune_point(env, t + 1);
mark_prune_point(env, t + insn_sz);
/* when we exit from subprog, we need to record non-linear history */
mark_jmp_point(env, t + 1);
mark_jmp_point(env, t + insn_sz);
if (visit_callee) {
mark_prune_point(env, t);
ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env,
/* It's ok to allow recursion from CFG point of
* view. __check_func_call() will do the actual
* check.
*/
bpf_pseudo_func(insns + t));
ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env);
}
return ret;
}
@ -15469,15 +15497,17 @@ static int visit_func_call_insn(int t, struct bpf_insn *insns,
static int visit_insn(int t, struct bpf_verifier_env *env)
{
struct bpf_insn *insns = env->prog->insnsi, *insn = &insns[t];
int ret, off;
int ret, off, insn_sz;
if (bpf_pseudo_func(insn))
return visit_func_call_insn(t, insns, env, true);
/* All non-branch instructions have a single fall-through edge. */
if (BPF_CLASS(insn->code) != BPF_JMP &&
BPF_CLASS(insn->code) != BPF_JMP32)
return push_insn(t, t + 1, FALLTHROUGH, env, false);
BPF_CLASS(insn->code) != BPF_JMP32) {
insn_sz = bpf_is_ldimm64(insn) ? 2 : 1;
return push_insn(t, t + insn_sz, FALLTHROUGH, env);
}
switch (BPF_OP(insn->code)) {
case BPF_EXIT:
@ -15523,8 +15553,7 @@ static int visit_insn(int t, struct bpf_verifier_env *env)
off = insn->imm;
/* unconditional jump with single edge */
ret = push_insn(t, t + off + 1, FALLTHROUGH, env,
true);
ret = push_insn(t, t + off + 1, FALLTHROUGH, env);
if (ret)
return ret;
@ -15537,11 +15566,11 @@ static int visit_insn(int t, struct bpf_verifier_env *env)
/* conditional jump with two edges */
mark_prune_point(env, t);
ret = push_insn(t, t + 1, FALLTHROUGH, env, true);
ret = push_insn(t, t + 1, FALLTHROUGH, env);
if (ret)
return ret;
return push_insn(t, t + insn->off + 1, BRANCH, env, true);
return push_insn(t, t + insn->off + 1, BRANCH, env);
}
}
@ -15607,11 +15636,21 @@ walk_cfg:
}
for (i = 0; i < insn_cnt; i++) {
struct bpf_insn *insn = &env->prog->insnsi[i];
if (insn_state[i] != EXPLORED) {
verbose(env, "unreachable insn %d\n", i);
ret = -EINVAL;
goto err_free;
}
if (bpf_is_ldimm64(insn)) {
if (insn_state[i + 1] != 0) {
verbose(env, "jump into the middle of ldimm64 insn %d\n", i);
ret = -EINVAL;
goto err_free;
}
i++; /* skip second half of ldimm64 */
}
}
ret = 0; /* cfg looks good */

View File

@ -37,7 +37,7 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk,
ktime_t tstamp = skb->tstamp;
struct ip_frag_state state;
struct iphdr *iph;
int err;
int err = 0;
/* for offloaded checksums cleanup checksum before fragmentation */
if (skb->ip_summed == CHECKSUM_PARTIAL &&

View File

@ -1119,7 +1119,9 @@ static int __dev_alloc_name(struct net *net, const char *name, char *res)
if (i == max_netdevices)
return -ENFILE;
snprintf(res, IFNAMSIZ, name, i);
/* 'res' and 'name' could overlap, use 'buf' as an intermediate buffer */
strscpy(buf, name, IFNAMSIZ);
snprintf(res, IFNAMSIZ, buf, i);
return i;
}

View File

@ -180,18 +180,17 @@ static void gso_test_func(struct kunit *test)
}
if (tcase->frag_skbs) {
unsigned int total_size = 0, total_true_size = 0, alloc_size = 0;
unsigned int total_size = 0, total_true_size = 0;
struct sk_buff *frag_skb, *prev = NULL;
page = alloc_page(GFP_KERNEL);
KUNIT_ASSERT_NOT_NULL(test, page);
page_ref_add(page, tcase->nr_frag_skbs - 1);
for (i = 0; i < tcase->nr_frag_skbs; i++) {
unsigned int frag_size;
page = alloc_page(GFP_KERNEL);
KUNIT_ASSERT_NOT_NULL(test, page);
frag_size = tcase->frag_skbs[i];
frag_skb = build_skb(page_address(page) + alloc_size,
frag_skb = build_skb(page_address(page),
frag_size + shinfo_size);
KUNIT_ASSERT_NOT_NULL(test, frag_skb);
__skb_put(frag_skb, frag_size);
@ -204,11 +203,8 @@ static void gso_test_func(struct kunit *test)
total_size += frag_size;
total_true_size += frag_skb->truesize;
alloc_size += frag_size + shinfo_size;
}
KUNIT_ASSERT_LE(test, alloc_size, PAGE_SIZE);
skb->len += total_size;
skb->data_len += total_size;
skb->truesize += total_true_size;

View File

@ -751,12 +751,12 @@ int __inet_hash(struct sock *sk, struct sock *osk)
if (err)
goto unlock;
}
sock_set_flag(sk, SOCK_RCU_FREE);
if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
sk->sk_family == AF_INET6)
__sk_nulls_add_node_tail_rcu(sk, &ilb2->nulls_head);
else
__sk_nulls_add_node_rcu(sk, &ilb2->nulls_head);
sock_set_flag(sk, SOCK_RCU_FREE);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
unlock:
spin_unlock(&ilb2->lock);

View File

@ -1515,8 +1515,9 @@ void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list)
struct mptcp_pm_addr_entry *entry;
list_for_each_entry(entry, rm_list, list) {
remove_anno_list_by_saddr(msk, &entry->addr);
if (alist.nr < MPTCP_RM_IDS_MAX)
if ((remove_anno_list_by_saddr(msk, &entry->addr) ||
lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) &&
alist.nr < MPTCP_RM_IDS_MAX)
alist.ids[alist.nr++] = entry->addr.id;
}

View File

@ -1230,6 +1230,8 @@ static void mptcp_update_infinite_map(struct mptcp_sock *msk,
mptcp_do_fallback(ssk);
}
#define MPTCP_MAX_GSO_SIZE (GSO_LEGACY_MAX_SIZE - (MAX_TCP_HEADER + 1))
static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
struct mptcp_data_frag *dfrag,
struct mptcp_sendmsg_info *info)
@ -1256,6 +1258,8 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
return -EAGAIN;
/* compute send limit */
if (unlikely(ssk->sk_gso_max_size > MPTCP_MAX_GSO_SIZE))
ssk->sk_gso_max_size = MPTCP_MAX_GSO_SIZE;
info->mss_now = tcp_send_mss(ssk, &info->size_goal, info->flags);
copy = info->size_goal;
@ -3398,10 +3402,11 @@ static void mptcp_release_cb(struct sock *sk)
if (__test_and_clear_bit(MPTCP_CLEAN_UNA, &msk->cb_flags))
__mptcp_clean_una_wakeup(sk);
if (unlikely(msk->cb_flags)) {
/* be sure to set the current sk state before tacking actions
* depending on sk_state, that is processing MPTCP_ERROR_REPORT
/* be sure to set the current sk state before taking actions
* depending on sk_state (MPTCP_ERROR_REPORT)
* On sk release avoid actions depending on the first subflow
*/
if (__test_and_clear_bit(MPTCP_CONNECTED, &msk->cb_flags))
if (__test_and_clear_bit(MPTCP_CONNECTED, &msk->cb_flags) && msk->first)
__mptcp_set_connected(sk);
if (__test_and_clear_bit(MPTCP_ERROR_REPORT, &msk->cb_flags))
__mptcp_error_report(sk);

View File

@ -738,8 +738,11 @@ static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname,
val = READ_ONCE(inet_sk(sk)->tos);
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
bool slow;
slow = lock_sock_fast(ssk);
__ip_sock_set_tos(ssk, val);
unlock_sock_fast(ssk, slow);
}
release_sock(sk);

View File

@ -89,11 +89,6 @@ static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp,
if ((had_link == has_link) || chained)
return 0;
if (had_link)
netif_carrier_off(ndp->ndev.dev);
else
netif_carrier_on(ndp->ndev.dev);
if (!ndp->multi_package && !nc->package->multi_channel) {
if (had_link) {
ndp->flags |= NCSI_DEV_RESHUFFLE;

View File

@ -61,6 +61,8 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
ip_set_dereference((inst)->ip_set_list)[id]
#define ip_set_ref_netlink(inst,id) \
rcu_dereference_raw((inst)->ip_set_list)[id]
#define ip_set_dereference_nfnl(p) \
rcu_dereference_check(p, lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET))
/* The set types are implemented in modules and registered set types
* can be found in ip_set_type_list. Adding/deleting types is
@ -708,15 +710,10 @@ __ip_set_put_netlink(struct ip_set *set)
static struct ip_set *
ip_set_rcu_get(struct net *net, ip_set_id_t index)
{
struct ip_set *set;
struct ip_set_net *inst = ip_set_pernet(net);
rcu_read_lock();
/* ip_set_list itself needs to be protected */
set = rcu_dereference(inst->ip_set_list)[index];
rcu_read_unlock();
return set;
/* ip_set_list and the set pointer need to be protected */
return ip_set_dereference_nfnl(inst->ip_set_list)[index];
}
static inline void
@ -1397,6 +1394,9 @@ static int ip_set_swap(struct sk_buff *skb, const struct nfnl_info *info,
ip_set(inst, to_id) = from;
write_unlock_bh(&ip_set_ref_lock);
/* Make sure all readers of the old set pointers are completed. */
synchronize_rcu();
return 0;
}

View File

@ -7263,10 +7263,11 @@ static int nf_tables_delsetelem(struct sk_buff *skb,
if (err < 0) {
NL_SET_BAD_ATTR(extack, attr);
break;
return err;
}
}
return err;
return 0;
}
/*
@ -9679,16 +9680,14 @@ void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans)
call_rcu(&trans->rcu, nft_trans_gc_trans_free);
}
static struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
unsigned int gc_seq,
bool sync)
struct nft_trans_gc *nft_trans_gc_catchall_async(struct nft_trans_gc *gc,
unsigned int gc_seq)
{
struct nft_set_elem_catchall *catchall, *next;
struct nft_set_elem_catchall *catchall;
const struct nft_set *set = gc->set;
struct nft_elem_priv *elem_priv;
struct nft_set_ext *ext;
list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
ext = nft_set_elem_ext(set, catchall->elem);
if (!nft_set_elem_expired(ext))
@ -9698,35 +9697,42 @@ static struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
nft_set_elem_dead(ext);
dead_elem:
if (sync)
gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
else
gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
if (!gc)
return NULL;
elem_priv = catchall->elem;
if (sync) {
nft_setelem_data_deactivate(gc->net, gc->set, elem_priv);
nft_setelem_catchall_destroy(catchall);
}
nft_trans_gc_elem_add(gc, elem_priv);
nft_trans_gc_elem_add(gc, catchall->elem);
}
return gc;
}
struct nft_trans_gc *nft_trans_gc_catchall_async(struct nft_trans_gc *gc,
unsigned int gc_seq)
{
return nft_trans_gc_catchall(gc, gc_seq, false);
}
struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc)
{
return nft_trans_gc_catchall(gc, 0, true);
struct nft_set_elem_catchall *catchall, *next;
const struct nft_set *set = gc->set;
struct nft_elem_priv *elem_priv;
struct nft_set_ext *ext;
WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net));
list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
ext = nft_set_elem_ext(set, catchall->elem);
if (!nft_set_elem_expired(ext))
continue;
gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
if (!gc)
return NULL;
elem_priv = catchall->elem;
nft_setelem_data_deactivate(gc->net, gc->set, elem_priv);
nft_setelem_catchall_destroy(catchall);
nft_trans_gc_elem_add(gc, elem_priv);
}
return gc;
}
static void nf_tables_module_autoload_cleanup(struct net *net)

View File

@ -38,13 +38,14 @@ void nft_byteorder_eval(const struct nft_expr *expr,
switch (priv->size) {
case 8: {
u64 *dst64 = (void *)dst;
u64 src64;
switch (priv->op) {
case NFT_BYTEORDER_NTOH:
for (i = 0; i < priv->len / 8; i++) {
src64 = nft_reg_load64(&src[i]);
nft_reg_store64(&dst[i],
nft_reg_store64(&dst64[i],
be64_to_cpu((__force __be64)src64));
}
break;
@ -52,7 +53,7 @@ void nft_byteorder_eval(const struct nft_expr *expr,
for (i = 0; i < priv->len / 8; i++) {
src64 = (__force __u64)
cpu_to_be64(nft_reg_load64(&src[i]));
nft_reg_store64(&dst[i], src64);
nft_reg_store64(&dst64[i], src64);
}
break;
}

View File

@ -63,7 +63,7 @@ nft_meta_get_eval_time(enum nft_meta_keys key,
{
switch (key) {
case NFT_META_TIME_NS:
nft_reg_store64(dest, ktime_get_real_ns());
nft_reg_store64((u64 *)dest, ktime_get_real_ns());
break;
case NFT_META_TIME_DAY:
nft_reg_store8(dest, nft_meta_weekday());

View File

@ -624,14 +624,12 @@ static void nft_rbtree_gc(struct nft_set *set)
{
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe, *rbe_end = NULL;
struct nftables_pernet *nft_net;
struct rb_node *node, *next;
struct nft_trans_gc *gc;
struct net *net;
set = nft_set_container_of(priv);
net = read_pnet(&set->net);
nft_net = nft_pernet(net);
gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL);
if (!gc)

View File

@ -1549,6 +1549,9 @@ static int tcf_ct_offload_act_setup(struct tc_action *act, void *entry_data,
if (bind) {
struct flow_action_entry *entry = entry_data;
if (tcf_ct_helper(act))
return -EOPNOTSUPP;
entry->id = FLOW_ACTION_CT;
entry->ct.action = tcf_ct_action(act);
entry->ct.zone = tcf_ct_zone(act);

View File

@ -102,6 +102,7 @@ static int tipc_add_tlv(struct sk_buff *skb, u16 type, void *data, u16 len)
return -EMSGSIZE;
skb_put(skb, TLV_SPACE(len));
memset(tlv, 0, TLV_SPACE(len));
tlv->tlv_type = htons(type);
tlv->tlv_len = htons(TLV_LENGTH(len));
if (len && data)

View File

@ -2581,15 +2581,16 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state)
if (!(state->flags & MSG_PEEK))
WRITE_ONCE(u->oob_skb, NULL);
else
skb_get(oob_skb);
unix_state_unlock(sk);
chunk = state->recv_actor(oob_skb, 0, chunk, state);
if (!(state->flags & MSG_PEEK)) {
if (!(state->flags & MSG_PEEK))
UNIXCB(oob_skb).consumed += 1;
kfree_skb(oob_skb);
}
consume_skb(oob_skb);
mutex_unlock(&u->iolock);

View File

@ -97,4 +97,66 @@ l0_%=: r2 = r0; \
" ::: __clobber_all);
}
SEC("socket")
__description("conditional loop (2)")
__success
__failure_unpriv __msg_unpriv("back-edge from insn 10 to 11")
__naked void conditional_loop2(void)
{
asm volatile (" \
r9 = 2 ll; \
r3 = 0x20 ll; \
r4 = 0x35 ll; \
r8 = r4; \
goto l1_%=; \
l0_%=: r9 -= r3; \
r9 -= r4; \
r9 -= r8; \
l1_%=: r8 += r4; \
if r8 < 0x64 goto l0_%=; \
r0 = r9; \
exit; \
" ::: __clobber_all);
}
SEC("socket")
__description("unconditional loop after conditional jump")
__failure __msg("infinite loop detected")
__failure_unpriv __msg_unpriv("back-edge from insn 3 to 2")
__naked void uncond_loop_after_cond_jmp(void)
{
asm volatile (" \
r0 = 0; \
if r0 > 0 goto l1_%=; \
l0_%=: r0 = 1; \
goto l0_%=; \
l1_%=: exit; \
" ::: __clobber_all);
}
__naked __noinline __used
static unsigned long never_ending_subprog()
{
asm volatile (" \
r0 = r1; \
goto -1; \
" ::: __clobber_all);
}
SEC("socket")
__description("unconditional loop after conditional jump")
/* infinite loop is detected *after* check_cfg() */
__failure __msg("infinite loop detected")
__naked void uncond_loop_in_subprog_after_cond_jmp(void)
{
asm volatile (" \
r0 = 0; \
if r0 > 0 goto l1_%=; \
l0_%=: r0 += 1; \
call never_ending_subprog; \
l1_%=: exit; \
" ::: __clobber_all);
}
char _license[] SEC("license") = "GPL";

View File

@ -75,9 +75,10 @@ l0_%=: r0 += 1; \
" ::: __clobber_all);
}
SEC("tracepoint")
SEC("socket")
__description("bounded loop, start in the middle")
__failure __msg("back-edge")
__success
__failure_unpriv __msg_unpriv("back-edge")
__naked void loop_start_in_the_middle(void)
{
asm volatile (" \
@ -136,7 +137,9 @@ l0_%=: exit; \
SEC("tracepoint")
__description("bounded recursion")
__failure __msg("back-edge")
__failure
/* verifier limitation in detecting max stack depth */
__msg("the call stack of 8 frames is too deep !")
__naked void bounded_recursion(void)
{
asm volatile (" \

View File

@ -91,3 +91,43 @@ __naked int bpf_end_bswap(void)
}
#endif /* v4 instruction */
SEC("?raw_tp")
__success __log_level(2)
/*
* Without the bug fix there will be no history between "last_idx 3 first_idx 3"
* and "parent state regs=" lines. "R0_w=6" parts are here to help anchor
* expected log messages to the one specific mark_chain_precision operation.
*
* This is quite fragile: if verifier checkpointing heuristic changes, this
* might need adjusting.
*/
__msg("2: (07) r0 += 1 ; R0_w=6")
__msg("3: (35) if r0 >= 0xa goto pc+1")
__msg("mark_precise: frame0: last_idx 3 first_idx 3 subseq_idx -1")
__msg("mark_precise: frame0: regs=r0 stack= before 2: (07) r0 += 1")
__msg("mark_precise: frame0: regs=r0 stack= before 1: (07) r0 += 1")
__msg("mark_precise: frame0: regs=r0 stack= before 4: (05) goto pc-4")
__msg("mark_precise: frame0: regs=r0 stack= before 3: (35) if r0 >= 0xa goto pc+1")
__msg("mark_precise: frame0: parent state regs= stack=: R0_rw=P4")
__msg("3: R0_w=6")
__naked int state_loop_first_last_equal(void)
{
asm volatile (
"r0 = 0;"
"l0_%=:"
"r0 += 1;"
"r0 += 1;"
/* every few iterations we'll have a checkpoint here with
* first_idx == last_idx, potentially confusing precision
* backtracking logic
*/
"if r0 >= 10 goto l1_%=;" /* checkpoint + mark_precise */
"goto l0_%=;"
"l1_%=:"
"exit;"
::: __clobber_common
);
}
char _license[] SEC("license") = "GPL";

View File

@ -442,7 +442,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
.errstr = "back-edge from insn 0 to 0",
.errstr = "the call stack of 9 frames is too deep",
.result = REJECT,
},
{
@ -799,7 +799,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
.errstr = "back-edge",
.errstr = "the call stack of 9 frames is too deep",
.result = REJECT,
},
{
@ -811,7 +811,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
.errstr = "back-edge",
.errstr = "the call stack of 9 frames is too deep",
.result = REJECT,
},
{

View File

@ -9,8 +9,8 @@
BPF_MOV64_IMM(BPF_REG_0, 2),
BPF_EXIT_INSN(),
},
.errstr = "invalid BPF_LD_IMM insn",
.errstr_unpriv = "R1 pointer comparison",
.errstr = "jump into the middle of ldimm64 insn 1",
.errstr_unpriv = "jump into the middle of ldimm64 insn 1",
.result = REJECT,
},
{
@ -23,8 +23,8 @@
BPF_LD_IMM64(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
.errstr = "invalid BPF_LD_IMM insn",
.errstr_unpriv = "R1 pointer comparison",
.errstr = "jump into the middle of ldimm64 insn 1",
.errstr_unpriv = "jump into the middle of ldimm64 insn 1",
.result = REJECT,
},
{

View File

@ -908,8 +908,9 @@ static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr)
struct xdp_info *meta = data - sizeof(struct xdp_info);
if (meta->count != pkt->pkt_nb) {
ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%d]\n",
__func__, pkt->pkt_nb, meta->count);
ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%llu]\n",
__func__, pkt->pkt_nb,
(unsigned long long)meta->count);
return false;
}
@ -926,11 +927,13 @@ static bool is_frag_valid(struct xsk_umem_info *umem, u64 addr, u32 len, u32 exp
if (addr >= umem->num_frames * umem->frame_size ||
addr + len > umem->num_frames * umem->frame_size) {
ksft_print_msg("Frag invalid addr: %llx len: %u\n", addr, len);
ksft_print_msg("Frag invalid addr: %llx len: %u\n",
(unsigned long long)addr, len);
return false;
}
if (!umem->unaligned_mode && addr % umem->frame_size + len > umem->frame_size) {
ksft_print_msg("Frag crosses frame boundary addr: %llx len: %u\n", addr, len);
ksft_print_msg("Frag crosses frame boundary addr: %llx len: %u\n",
(unsigned long long)addr, len);
return false;
}
@ -1029,7 +1032,8 @@ static int complete_pkts(struct xsk_socket_info *xsk, int batch_size)
u64 addr = *xsk_ring_cons__comp_addr(&xsk->umem->cq, idx + rcvd - 1);
ksft_print_msg("[%s] Too many packets completed\n", __func__);
ksft_print_msg("Last completion address: %llx\n", addr);
ksft_print_msg("Last completion address: %llx\n",
(unsigned long long)addr);
return TEST_FAILURE;
}
@ -1513,8 +1517,9 @@ static int validate_tx_invalid_descs(struct ifobject *ifobject)
}
if (stats.tx_invalid_descs != ifobject->xsk->pkt_stream->nb_pkts / 2) {
ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%u] expected [%u]\n",
__func__, stats.tx_invalid_descs,
ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%llu] expected [%u]\n",
__func__,
(unsigned long long)stats.tx_invalid_descs,
ifobject->xsk->pkt_stream->nb_pkts);
return TEST_FAILURE;
}

View File

@ -3240,7 +3240,7 @@ fastclose_tests()
if reset_check_counter "fastclose server test" "MPTcpExtMPFastcloseRx"; then
test_linkfail=1024 fastclose=server \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 0 0 0
chk_join_nr 0 0 0 0 0 0 1
chk_fclose_nr 1 1 invert
chk_rst_nr 1 1
fi