1
0
mirror of https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2026-01-18 04:20:44 +00:00
Vadim Fedorenko ed3d74a754 selftests: net-drv: stats: sanity check FEC histogram
Simple tests to validate kernel's output. FEC bin range should be valid
means high boundary should be not less than low boundary. Bin boundaries
have to be provided as well as error counter value. Per-plane value
should match bin's value.

Signed-off-by: Vadim Fedorenko <vadim.fedorenko@linux.dev>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Link: https://patch.msgid.link/20250924124037.1508846-6-vadim.fedorenko@linux.dev
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-09-26 16:49:29 -07:00

321 lines
11 KiB
Python
Executable File

#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0
"""
Tests related to standard netdevice statistics.
"""
import errno
import subprocess
import time
from lib.py import ksft_run, ksft_exit, ksft_pr
from lib.py import ksft_ge, ksft_eq, ksft_is, ksft_in, ksft_lt, ksft_true, ksft_raises
from lib.py import KsftSkipEx, KsftFailEx
from lib.py import ksft_disruptive
from lib.py import EthtoolFamily, NetdevFamily, RtnlFamily, NlError
from lib.py import NetDrvEnv
from lib.py import cmd, ip, defer
ethnl = EthtoolFamily()
netfam = NetdevFamily()
rtnl = RtnlFamily()
def check_pause(cfg) -> None:
"""
Check that drivers which support Pause config also report standard
pause stats.
"""
try:
ethnl.pause_get({"header": {"dev-index": cfg.ifindex}})
except NlError as e:
if e.error == errno.EOPNOTSUPP:
raise KsftSkipEx("pause not supported by the device") from e
raise
data = ethnl.pause_get({"header": {"dev-index": cfg.ifindex,
"flags": {'stats'}}})
ksft_true(data['stats'], "driver does not report stats")
def check_fec(cfg) -> None:
"""
Check that drivers which support FEC config also report standard
FEC stats.
"""
try:
ethnl.fec_get({"header": {"dev-index": cfg.ifindex}})
except NlError as e:
if e.error == errno.EOPNOTSUPP:
raise KsftSkipEx("FEC not supported by the device") from e
raise
data = ethnl.fec_get({"header": {"dev-index": cfg.ifindex,
"flags": {'stats'}}})
ksft_true(data['stats'], "driver does not report stats")
def check_fec_hist(cfg) -> None:
"""
Check that drivers which support FEC histogram statistics report
reasonable values.
"""
try:
data = ethnl.fec_get({"header": {"dev-index": cfg.ifindex,
"flags": {'stats'}}})
except NlError as e:
if e.error == errno.EOPNOTSUPP:
raise KsftSkipEx("FEC not supported by the device") from e
raise
if 'stats' not in data:
raise KsftSkipEx("FEC stats not supported by the device")
if 'hist' not in data['stats']:
raise KsftSkipEx("FEC histogram not supported by the device")
hist = data['stats']['hist']
for fec_bin in hist:
for key in ['bin-low', 'bin-high', 'bin-val']:
ksft_in(key, fec_bin,
"Drivers should always report FEC bin range and value")
ksft_ge(fec_bin['bin-high'], fec_bin['bin-low'],
"FEC bin range should be valid")
if 'bin-val-per-lane' in fec_bin:
ksft_eq(sum(fec_bin['bin-val-per-lane']), fec_bin['bin-val'],
"FEC bin value should be equal to sum of per-plane values")
def pkt_byte_sum(cfg) -> None:
"""
Check that qstat and interface stats match in value.
"""
def get_qstat(test):
stats = netfam.qstats_get({}, dump=True)
if stats:
for qs in stats:
if qs["ifindex"]== test.ifindex:
return qs
return None
qstat = get_qstat(cfg)
if qstat is None:
raise KsftSkipEx("qstats not supported by the device")
for key in ['tx-packets', 'tx-bytes', 'rx-packets', 'rx-bytes']:
ksft_in(key, qstat, "Drivers should always report basic keys")
# Compare stats, rtnl stats and qstats must match,
# but the interface may be up, so do a series of dumps
# each time the more "recent" stats must be higher or same.
def stat_cmp(rstat, qstat):
for key in ['tx-packets', 'tx-bytes', 'rx-packets', 'rx-bytes']:
if rstat[key] != qstat[key]:
return rstat[key] - qstat[key]
return 0
for _ in range(10):
rtstat = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
if stat_cmp(rtstat, qstat) < 0:
raise KsftFailEx("RTNL stats are lower, fetched later")
qstat = get_qstat(cfg)
if stat_cmp(rtstat, qstat) > 0:
raise KsftFailEx("Qstats are lower, fetched later")
def qstat_by_ifindex(cfg) -> None:
""" Qstats Netlink API tests - querying by ifindex. """
# Construct a map ifindex -> [dump, by-index, dump]
ifindexes = {}
stats = netfam.qstats_get({}, dump=True)
for entry in stats:
ifindexes[entry['ifindex']] = [entry, None, None]
for ifindex in ifindexes:
entry = netfam.qstats_get({"ifindex": ifindex}, dump=True)
ksft_eq(len(entry), 1)
ifindexes[entry[0]['ifindex']][1] = entry[0]
stats = netfam.qstats_get({}, dump=True)
for entry in stats:
ifindexes[entry['ifindex']][2] = entry
if len(ifindexes) == 0:
raise KsftSkipEx("No ifindex supports qstats")
# Now make sure the stats match/make sense
for ifindex, triple in ifindexes.items():
all_keys = triple[0].keys() | triple[1].keys() | triple[2].keys()
for key in all_keys:
ksft_ge(triple[1][key], triple[0][key], comment="bad key: " + key)
ksft_ge(triple[2][key], triple[1][key], comment="bad key: " + key)
# Sanity check the dumps
queues = NetdevFamily(recv_size=4096).qstats_get({"scope": "queue"}, dump=True)
# Reformat the output into {ifindex: {rx: [id, id, ...], tx: [id, id, ...]}}
parsed = {}
for entry in queues:
ifindex = entry["ifindex"]
if ifindex not in parsed:
parsed[ifindex] = {"rx":[], "tx": []}
parsed[ifindex][entry["queue-type"]].append(entry['queue-id'])
# Now, validate
for ifindex, queues in parsed.items():
for qtype in ['rx', 'tx']:
ksft_eq(len(queues[qtype]), len(set(queues[qtype])),
comment="repeated queue keys")
ksft_eq(len(queues[qtype]), max(queues[qtype]) + 1,
comment="missing queue keys")
# Test invalid dumps
# 0 is invalid
with ksft_raises(NlError) as cm:
netfam.qstats_get({"ifindex": 0}, dump=True)
ksft_eq(cm.exception.nl_msg.error, -34)
ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex')
# loopback has no stats
with ksft_raises(NlError) as cm:
netfam.qstats_get({"ifindex": 1}, dump=True)
ksft_eq(cm.exception.nl_msg.error, -errno.EOPNOTSUPP)
ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex')
# Try to get stats for lowest unused ifindex but not 0
devs = rtnl.getlink({}, dump=True)
all_ifindexes = set(dev["ifi-index"] for dev in devs)
lowest = 2
while lowest in all_ifindexes:
lowest += 1
with ksft_raises(NlError) as cm:
netfam.qstats_get({"ifindex": lowest}, dump=True)
ksft_eq(cm.exception.nl_msg.error, -19)
ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex')
@ksft_disruptive
def check_down(cfg) -> None:
""" Test statistics (interface and qstat) are not impacted by ifdown """
try:
qstat = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
except NlError as e:
if e.error == errno.EOPNOTSUPP:
raise KsftSkipEx("qstats not supported by the device") from e
raise
ip(f"link set dev {cfg.dev['ifname']} down")
defer(ip, f"link set dev {cfg.dev['ifname']} up")
qstat2 = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
for k in qstat:
ksft_ge(qstat2[k], qstat[k], comment=f"{k} went backwards on device down")
# exercise per-queue API to make sure that "device down" state
# is handled correctly and doesn't crash
netfam.qstats_get({"ifindex": cfg.ifindex, "scope": "queue"}, dump=True)
def __run_inf_loop(body):
body = body.strip()
if body[-1] != ';':
body += ';'
return subprocess.Popen(f"while true; do {body} done", shell=True,
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
def __stats_increase_sanely(old, new) -> None:
for k in old.keys():
ksft_ge(new[k], old[k])
ksft_lt(new[k] - old[k], 1 << 31, comment="likely wrapping error")
def procfs_hammer(cfg) -> None:
"""
Reading stats via procfs only holds the RCU lock, which is not an exclusive
lock, make sure drivers can handle parallel reads of stats.
"""
one = __run_inf_loop("cat /proc/net/dev")
defer(one.kill)
two = __run_inf_loop("cat /proc/net/dev")
defer(two.kill)
time.sleep(1)
# Make sure the processes are running
ksft_is(one.poll(), None)
ksft_is(two.poll(), None)
rtstat1 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
time.sleep(2)
rtstat2 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
__stats_increase_sanely(rtstat1, rtstat2)
# defers will kill the loops
@ksft_disruptive
def procfs_downup_hammer(cfg) -> None:
"""
Reading stats via procfs only holds the RCU lock, drivers often try
to sleep when reading the stats, or don't protect against races.
"""
# Max out the queues, we'll flip between max and 1
channels = ethnl.channels_get({'header': {'dev-index': cfg.ifindex}})
if channels['combined-count'] == 0:
rx_type = 'rx'
else:
rx_type = 'combined'
cur_queue_cnt = channels[f'{rx_type}-count']
max_queue_cnt = channels[f'{rx_type}-max']
cmd(f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}")
defer(cmd, f"ethtool -L {cfg.ifname} {rx_type} {cur_queue_cnt}")
# Real test stats
stats = __run_inf_loop("cat /proc/net/dev")
defer(stats.kill)
ipset = f"ip link set dev {cfg.ifname}"
defer(ip, f"link set dev {cfg.ifname} up")
# The "echo -n 1" lets us count iterations below
updown = f"{ipset} down; sleep 0.05; {ipset} up; sleep 0.05; " + \
f"ethtool -L {cfg.ifname} {rx_type} 1; " + \
f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}; " + \
"echo -n 1"
updown = __run_inf_loop(updown)
kill_updown = defer(updown.kill)
time.sleep(1)
# Make sure the processes are running
ksft_is(stats.poll(), None)
ksft_is(updown.poll(), None)
rtstat1 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
# We're looking for crashes, give it extra time
time.sleep(9)
rtstat2 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
__stats_increase_sanely(rtstat1, rtstat2)
kill_updown.exec()
stdout, _ = updown.communicate(timeout=5)
ksft_pr("completed up/down cycles:", len(stdout.decode('utf-8')))
def main() -> None:
""" Ksft boiler plate main """
with NetDrvEnv(__file__, queue_count=100) as cfg:
ksft_run([check_pause, check_fec, check_fec_hist, pkt_byte_sum,
qstat_by_ifindex, check_down, procfs_hammer,
procfs_downup_hammer],
args=(cfg, ))
ksft_exit()
if __name__ == "__main__":
main()