1
0
mirror of https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2026-01-11 17:10:13 +00:00

RDMA v6.19 first rc request

- Fix several syzkaller found bugs:
  * Poor parsing of the RDMA_NL_LS_OP_IP_RESOLVE netlink
  * GID entry refcount leaking when CM destruction races with multicast
    establishment
  * Missing refcount put in ib_del_sub_device_and_put()
 
 - Fixup recently introduced uABI padding for 32 bit consistency
 
 - Avoid user triggered math overflow in MANA and AFA
 
 - Reading invalid netdev data during an event
 
 - kdoc fixes
 
 - Fix never-working gid copying in ib_get_gids_from_rdma_hdr
 
 - Typo in bnxt when validating the BAR
 
 - bnxt mis-parsed IB_SEND_IP_CSUM so it didn't work always
 
 - bnxt out of bounds access in bnxt related to the counters on new devices
 
 - Allocate the bnxt PDE table with the right sizing
 
 - Use dma_free_coherent() correctly in bnxt
 
 - Allow rxe to be unloadable when CONFIG_PROVE_LOCKING by adjusting the
   tracking of the global sockets it uses
 
 - Missing unlocking on error path in rxe
 
 - Compute the right number of pages in a MR in rtrs
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQRRRCHOFoQz/8F5bUaFwuHvBreFYQUCaVgCzAAKCRCFwuHvBreF
 YT1LAPsGcDQjgD6efMrE6HsRA+QJHmds714yxEMFYKCdg17gVgEA4KQItAWHOhqg
 gSfONZenWOQfTzXepId51rMt5fsSowU=
 =BaGT
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma fixes from Jason Gunthorpe:

 - Fix several syzkaller found bugs:
    - Poor parsing of the RDMA_NL_LS_OP_IP_RESOLVE netlink
    - GID entry refcount leaking when CM destruction races with
      multicast establishment
    - Missing refcount put in ib_del_sub_device_and_put()

 - Fixup recently introduced uABI padding for 32 bit consistency

 - Avoid user triggered math overflow in MANA and AFA

 - Reading invalid netdev data during an event

 - kdoc fixes

 - Fix never-working gid copying in ib_get_gids_from_rdma_hdr

 - Typo in bnxt when validating the BAR

 - bnxt mis-parsed IB_SEND_IP_CSUM so it didn't work always

 - bnxt out of bounds access in bnxt related to the counters on new
   devices

 - Allocate the bnxt PDE table with the right sizing

 - Use dma_free_coherent() correctly in bnxt

 - Allow rxe to be unloadable when CONFIG_PROVE_LOCKING by adjusting the
   tracking of the global sockets it uses

 - Missing unlocking on error path in rxe

 - Compute the right number of pages in a MR in rtrs

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma:
  RDMA/bnxt_re: fix dma_free_coherent() pointer
  RDMA/rtrs: Fix clt_path::max_pages_per_mr calculation
  IB/rxe: Fix missing umem_odp->umem_mutex unlock on error path
  RDMA/bnxt_re: Fix to use correct page size for PDE table
  RDMA/bnxt_re: Fix OOB write in bnxt_re_copy_err_stats()
  RDMA/bnxt_re: Fix IB_SEND_IP_CSUM handling in post_send
  RDMA/core: always drop device refcount in ib_del_sub_device_and_put()
  RDMA/rxe: let rxe_reclassify_recv_socket() call sk_owner_put()
  RDMA/bnxt_re: Fix incorrect BAR check in bnxt_qplib_map_creq_db()
  RDMA/core: Fix logic error in ib_get_gids_from_rdma_hdr()
  RDMA/efa: Remove possible negative shift
  RTRS/rtrs: clean up rtrs headers kernel-doc
  RDMA/irdma: avoid invalid read in irdma_net_event
  RDMA/mana_ib: check cqe length for kernel CQs
  RDMA/irdma: Fix irdma_alloc_ucontext_resp padding
  RDMA/ucma: Fix rdma_ucm_query_ib_service_resp struct padding
  RDMA/cm: Fix leaking the multicast GID table reference
  RDMA/core: Check for the presence of LS_NLA_TYPE_DGID correctly
This commit is contained in:
Linus Torvalds 2026-01-02 12:25:47 -08:00
commit 6ce4d44fb0
18 changed files with 107 additions and 68 deletions

View File

@ -80,37 +80,25 @@ static const struct nla_policy ib_nl_addr_policy[LS_NLA_TYPE_MAX] = {
.min = sizeof(struct rdma_nla_ls_gid)},
};
static inline bool ib_nl_is_good_ip_resp(const struct nlmsghdr *nlh)
static void ib_nl_process_ip_rsep(const struct nlmsghdr *nlh)
{
struct nlattr *tb[LS_NLA_TYPE_MAX] = {};
union ib_gid gid;
struct addr_req *req;
int found = 0;
int ret;
if (nlh->nlmsg_flags & RDMA_NL_LS_F_ERR)
return false;
return;
ret = nla_parse_deprecated(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
nlmsg_len(nlh), ib_nl_addr_policy, NULL);
if (ret)
return false;
return;
return true;
}
static void ib_nl_process_good_ip_rsep(const struct nlmsghdr *nlh)
{
const struct nlattr *head, *curr;
union ib_gid gid;
struct addr_req *req;
int len, rem;
int found = 0;
head = (const struct nlattr *)nlmsg_data(nlh);
len = nlmsg_len(nlh);
nla_for_each_attr(curr, head, len, rem) {
if (curr->nla_type == LS_NLA_TYPE_DGID)
memcpy(&gid, nla_data(curr), nla_len(curr));
}
if (!tb[LS_NLA_TYPE_DGID])
return;
memcpy(&gid, nla_data(tb[LS_NLA_TYPE_DGID]), sizeof(gid));
spin_lock_bh(&lock);
list_for_each_entry(req, &req_list, list) {
@ -137,8 +125,7 @@ int ib_nl_handle_ip_res_resp(struct sk_buff *skb,
!(NETLINK_CB(skb).sk))
return -EPERM;
if (ib_nl_is_good_ip_resp(nlh))
ib_nl_process_good_ip_rsep(nlh);
ib_nl_process_ip_rsep(nlh);
return 0;
}

View File

@ -2009,6 +2009,7 @@ static void destroy_mc(struct rdma_id_private *id_priv,
ib_sa_free_multicast(mc->sa_mc);
if (rdma_protocol_roce(id_priv->id.device, id_priv->id.port_num)) {
struct rdma_cm_event *event = &mc->iboe_join.event;
struct rdma_dev_addr *dev_addr =
&id_priv->id.route.addr.dev_addr;
struct net_device *ndev = NULL;
@ -2031,6 +2032,8 @@ static void destroy_mc(struct rdma_id_private *id_priv,
dev_put(ndev);
cancel_work_sync(&mc->iboe_join.work);
if (event->event == RDMA_CM_EVENT_MULTICAST_JOIN)
rdma_destroy_ah_attr(&event->param.ud.ah_attr);
}
kfree(mc);
}

View File

@ -2881,8 +2881,10 @@ int ib_del_sub_device_and_put(struct ib_device *sub)
{
struct ib_device *parent = sub->parent;
if (!parent)
if (!parent) {
ib_device_put(sub);
return -EOPNOTSUPP;
}
mutex_lock(&parent->subdev_lock);
list_del(&sub->subdev_list);

View File

@ -738,7 +738,7 @@ int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr,
(struct in6_addr *)dgid);
return 0;
} else if (net_type == RDMA_NETWORK_IPV6 ||
net_type == RDMA_NETWORK_IB || RDMA_NETWORK_ROCE_V1) {
net_type == RDMA_NETWORK_IB || net_type == RDMA_NETWORK_ROCE_V1) {
*dgid = hdr->ibgrh.dgid;
*sgid = hdr->ibgrh.sgid;
return 0;

View File

@ -89,6 +89,9 @@ enum bnxt_re_hw_stats {
BNXT_RE_RES_SRQ_LOAD_ERR,
BNXT_RE_RES_TX_PCI_ERR,
BNXT_RE_RES_RX_PCI_ERR,
BNXT_RE_REQ_CQE_ERROR,
BNXT_RE_RESP_CQE_ERROR,
BNXT_RE_RESP_REMOTE_ACCESS_ERRS,
BNXT_RE_OUT_OF_SEQ_ERR,
BNXT_RE_TX_ATOMIC_REQ,
BNXT_RE_TX_READ_REQ,
@ -110,9 +113,6 @@ enum bnxt_re_hw_stats {
BNXT_RE_TX_CNP,
BNXT_RE_RX_CNP,
BNXT_RE_RX_ECN,
BNXT_RE_REQ_CQE_ERROR,
BNXT_RE_RESP_CQE_ERROR,
BNXT_RE_RESP_REMOTE_ACCESS_ERRS,
BNXT_RE_NUM_EXT_COUNTERS
};

View File

@ -2919,14 +2919,9 @@ int bnxt_re_post_send(struct ib_qp *ib_qp, const struct ib_send_wr *wr,
wqe.rawqp1.lflags |=
SQ_SEND_RAWETH_QP1_LFLAGS_ROCE_CRC;
}
switch (wr->send_flags) {
case IB_SEND_IP_CSUM:
if (wr->send_flags & IB_SEND_IP_CSUM)
wqe.rawqp1.lflags |=
SQ_SEND_RAWETH_QP1_LFLAGS_IP_CHKSUM;
break;
default:
break;
}
fallthrough;
case IB_WR_SEND_WITH_INV:
rc = bnxt_re_build_send_wqe(qp, wr, &wqe);

View File

@ -1112,7 +1112,7 @@ static int bnxt_qplib_map_creq_db(struct bnxt_qplib_rcfw *rcfw, u32 reg_offt)
creq_db->dbinfo.flags = 0;
creq_db->reg.bar_id = RCFW_COMM_CONS_PCI_BAR_REGION;
creq_db->reg.bar_base = pci_resource_start(pdev, creq_db->reg.bar_id);
if (!creq_db->reg.bar_id)
if (!creq_db->reg.bar_base)
dev_err(&pdev->dev,
"QPLIB: CREQ BAR region %d resc start is 0!",
creq_db->reg.bar_id);

View File

@ -64,9 +64,7 @@ static void __free_pbl(struct bnxt_qplib_res *res, struct bnxt_qplib_pbl *pbl,
for (i = 0; i < pbl->pg_count; i++) {
if (pbl->pg_arr[i])
dma_free_coherent(&pdev->dev, pbl->pg_size,
(void *)((unsigned long)
pbl->pg_arr[i] &
PAGE_MASK),
pbl->pg_arr[i],
pbl->pg_map_arr[i]);
else
dev_warn(&pdev->dev,
@ -237,7 +235,7 @@ int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq,
if (npbl % BIT(MAX_PDL_LVL_SHIFT))
npde++;
/* Alloc PDE pages */
sginfo.pgsize = npde * pg_size;
sginfo.pgsize = npde * ROCE_PG_SIZE_4K;
sginfo.npages = 1;
rc = __alloc_pbl(res, &hwq->pbl[PBL_LVL_0], &sginfo);
if (rc)
@ -245,7 +243,7 @@ int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq,
/* Alloc PBL pages */
sginfo.npages = npbl;
sginfo.pgsize = PAGE_SIZE;
sginfo.pgsize = ROCE_PG_SIZE_4K;
rc = __alloc_pbl(res, &hwq->pbl[PBL_LVL_1], &sginfo);
if (rc)
goto fail;

View File

@ -1320,13 +1320,9 @@ static int umem_to_page_list(struct efa_dev *dev,
u32 hp_cnt,
u8 hp_shift)
{
u32 pages_in_hp = BIT(hp_shift - PAGE_SHIFT);
struct ib_block_iter biter;
unsigned int hp_idx = 0;
ibdev_dbg(&dev->ibdev, "hp_cnt[%u], pages_in_hp[%u]\n",
hp_cnt, pages_in_hp);
rdma_umem_for_each_dma_block(umem, &biter, BIT(hp_shift))
page_list[hp_idx++] = rdma_block_iter_dma_address(&biter);

View File

@ -251,7 +251,7 @@ int irdma_net_event(struct notifier_block *notifier, unsigned long event,
void *ptr)
{
struct neighbour *neigh = ptr;
struct net_device *real_dev, *netdev = (struct net_device *)neigh->dev;
struct net_device *real_dev, *netdev;
struct irdma_device *iwdev;
struct ib_device *ibdev;
__be32 *p;
@ -260,6 +260,7 @@ int irdma_net_event(struct notifier_block *notifier, unsigned long event,
switch (event) {
case NETEVENT_NEIGH_UPDATE:
netdev = neigh->dev;
real_dev = rdma_vlan_dev_real_dev(netdev);
if (!real_dev)
real_dev = netdev;

View File

@ -56,6 +56,10 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
doorbell = mana_ucontext->doorbell;
} else {
is_rnic_cq = true;
if (attr->cqe > U32_MAX / COMP_ENTRY_SIZE / 2 + 1) {
ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe);
return -EINVAL;
}
buf_size = MANA_PAGE_ALIGN(roundup_pow_of_two(attr->cqe * COMP_ENTRY_SIZE));
cq->cqe = buf_size / COMP_ENTRY_SIZE;
err = mana_ib_create_kernel_queue(mdev, buf_size, GDMA_CQ, &cq->queue);

View File

@ -64,7 +64,39 @@ static inline void rxe_reclassify_recv_socket(struct socket *sock)
break;
default:
WARN_ON_ONCE(1);
return;
}
/*
* sock_lock_init_class_and_name() calls
* sk_owner_set(sk, THIS_MODULE); in order
* to make sure the referenced global
* variables rxe_recv_slock_key and
* rxe_recv_sk_key are not removed
* before the socket is closed.
*
* However this prevents rxe_net_exit()
* from being called and 'rmmod rdma_rxe'
* is refused because of the references.
*
* For the global sockets in recv_sockets,
* we are sure that rxe_net_exit() will call
* rxe_release_udp_tunnel -> udp_tunnel_sock_release.
*
* So we don't need the additional reference to
* our own (THIS_MODULE).
*/
sk_owner_put(sk);
/*
* We also call sk_owner_clear() otherwise
* sk_owner_put(sk) in sk_prot_free will
* fail, which is called via
* sk_free -> __sk_free -> sk_destruct
* and sk_destruct calls __sk_destruct
* directly or via call_rcu()
* so sk_prot_free() might be called
* after rxe_net_exit().
*/
sk_owner_clear(sk);
#endif /* CONFIG_DEBUG_LOCK_ALLOC */
}

View File

@ -179,9 +179,11 @@ static int rxe_odp_map_range_and_lock(struct rxe_mr *mr, u64 iova, int length, u
return err;
need_fault = rxe_check_pagefault(umem_odp, iova, length);
if (need_fault)
if (need_fault) {
mutex_unlock(&umem_odp->umem_mutex);
return -EFAULT;
}
}
return 0;
}

View File

@ -1464,6 +1464,7 @@ static void query_fast_reg_mode(struct rtrs_clt_path *clt_path)
mr_page_shift = max(12, ffs(ib_dev->attrs.page_size_cap) - 1);
max_pages_per_mr = ib_dev->attrs.max_mr_size;
do_div(max_pages_per_mr, (1ull << mr_page_shift));
max_pages_per_mr = min_not_zero((u32)max_pages_per_mr, U32_MAX);
clt_path->max_pages_per_mr =
min3(clt_path->max_pages_per_mr, (u32)max_pages_per_mr,
ib_dev->attrs.max_fast_reg_page_list_len);

View File

@ -150,7 +150,7 @@ enum rtrs_msg_types {
/**
* enum rtrs_msg_flags - RTRS message flags.
* @RTRS_NEED_INVAL: Send invalidation in response.
* @RTRS_MSG_NEED_INVAL_F: Send invalidation in response.
* @RTRS_MSG_NEW_RKEY_F: Send refreshed rkey in response.
*/
enum rtrs_msg_flags {
@ -179,16 +179,19 @@ struct rtrs_sg_desc {
* @recon_cnt: Reconnections counter
* @sess_uuid: UUID of a session (path)
* @paths_uuid: UUID of a group of sessions (paths)
*
* @first_conn: %1 if the connection request is the first for that session,
* otherwise %0
* NOTE: max size 56 bytes, see man rdma_connect().
*/
struct rtrs_msg_conn_req {
/* Is set to 0 by cma.c in case of AF_IB, do not touch that.
* see https://www.spinics.net/lists/linux-rdma/msg22397.html
/**
* @__cma_version: Is set to 0 by cma.c in case of AF_IB, do not touch
* that. See https://www.spinics.net/lists/linux-rdma/msg22397.html
*/
u8 __cma_version;
/* On sender side that should be set to 0, or cma_save_ip_info()
* extract garbage and will fail.
/**
* @__ip_version: On sender side that should be set to 0, or
* cma_save_ip_info() extract garbage and will fail.
*/
u8 __ip_version;
__le16 magic;
@ -199,6 +202,7 @@ struct rtrs_msg_conn_req {
uuid_t sess_uuid;
uuid_t paths_uuid;
u8 first_conn : 1;
/* private: */
u8 reserved_bits : 7;
u8 reserved[11];
};
@ -211,6 +215,7 @@ struct rtrs_msg_conn_req {
* @queue_depth: max inflight messages (queue-depth) in this session
* @max_io_size: max io size server supports
* @max_hdr_size: max msg header size server supports
* @flags: RTRS message flags for this message
*
* NOTE: size is 56 bytes, max possible is 136 bytes, see man rdma_accept().
*/
@ -222,22 +227,24 @@ struct rtrs_msg_conn_rsp {
__le32 max_io_size;
__le32 max_hdr_size;
__le32 flags;
/* private: */
u8 reserved[36];
};
/**
* struct rtrs_msg_info_req
* struct rtrs_msg_info_req - client additional info request
* @type: @RTRS_MSG_INFO_REQ
* @pathname: Path name chosen by client
*/
struct rtrs_msg_info_req {
__le16 type;
u8 pathname[NAME_MAX];
/* private: */
u8 reserved[15];
};
/**
* struct rtrs_msg_info_rsp
* struct rtrs_msg_info_rsp - server additional info response
* @type: @RTRS_MSG_INFO_RSP
* @sg_cnt: Number of @desc entries
* @desc: RDMA buffers where the client can write to server
@ -245,12 +252,14 @@ struct rtrs_msg_info_req {
struct rtrs_msg_info_rsp {
__le16 type;
__le16 sg_cnt;
/* private: */
u8 reserved[4];
/* public: */
struct rtrs_sg_desc desc[];
};
/**
* struct rtrs_msg_rkey_rsp
* struct rtrs_msg_rkey_rsp - server refreshed rkey response
* @type: @RTRS_MSG_RKEY_RSP
* @buf_id: RDMA buf_id of the new rkey
* @rkey: new remote key for RDMA buffers id from server
@ -264,6 +273,7 @@ struct rtrs_msg_rkey_rsp {
/**
* struct rtrs_msg_rdma_read - RDMA data transfer request from client
* @type: always @RTRS_MSG_READ
* @flags: RTRS message flags (enum rtrs_msg_flags)
* @usr_len: length of user payload
* @sg_cnt: number of @desc entries
* @desc: RDMA buffers where the server can write the result to
@ -277,7 +287,7 @@ struct rtrs_msg_rdma_read {
};
/**
* struct_msg_rdma_write - Message transferred to server with RDMA-Write
* struct rtrs_msg_rdma_write - Message transferred to server with RDMA-Write
* @type: always @RTRS_MSG_WRITE
* @usr_len: length of user payload
*/
@ -287,7 +297,7 @@ struct rtrs_msg_rdma_write {
};
/**
* struct_msg_rdma_hdr - header for read or write request
* struct rtrs_msg_rdma_hdr - header for read or write request
* @type: @RTRS_MSG_WRITE | @RTRS_MSG_READ
*/
struct rtrs_msg_rdma_hdr {

View File

@ -24,8 +24,8 @@ struct rtrs_srv_op;
/**
* enum rtrs_clt_link_ev - Events about connectivity state of a client
* @RTRS_CLT_LINK_EV_RECONNECTED Client was reconnected.
* @RTRS_CLT_LINK_EV_DISCONNECTED Client was disconnected.
* @RTRS_CLT_LINK_EV_RECONNECTED: Client was reconnected.
* @RTRS_CLT_LINK_EV_DISCONNECTED: Client was disconnected.
*/
enum rtrs_clt_link_ev {
RTRS_CLT_LINK_EV_RECONNECTED,
@ -33,7 +33,9 @@ enum rtrs_clt_link_ev {
};
/**
* Source and destination address of a path to be established
* struct rtrs_addr - Source and destination address of a path to be established
* @src: source address
* @dst: destination address
*/
struct rtrs_addr {
struct sockaddr_storage *src;
@ -41,7 +43,7 @@ struct rtrs_addr {
};
/**
* rtrs_clt_ops - it holds the link event callback and private pointer.
* struct rtrs_clt_ops - it holds the link event callback and private pointer.
* @priv: User supplied private data.
* @link_ev: Event notification callback function for connection state changes
* @priv: User supplied data that was passed to rtrs_clt_open()
@ -67,10 +69,10 @@ enum wait_type {
};
/**
* enum rtrs_clt_con_type() type of ib connection to use with a given
* enum rtrs_clt_con_type - type of ib connection to use with a given
* rtrs_permit
* @ADMIN_CON - use connection reserved for "service" messages
* @IO_CON - use a connection reserved for IO
* @RTRS_ADMIN_CON: use connection reserved for "service" messages
* @RTRS_IO_CON: use a connection reserved for IO
*/
enum rtrs_clt_con_type {
RTRS_ADMIN_CON,
@ -85,7 +87,7 @@ void rtrs_clt_put_permit(struct rtrs_clt_sess *sess,
struct rtrs_permit *permit);
/**
* rtrs_clt_req_ops - it holds the request confirmation callback
* struct rtrs_clt_req_ops - it holds the request confirmation callback
* and a private pointer.
* @priv: User supplied private data.
* @conf_fn: callback function to be called as confirmation
@ -105,7 +107,11 @@ int rtrs_clt_request(int dir, struct rtrs_clt_req_ops *ops,
int rtrs_clt_rdma_cq_direct(struct rtrs_clt_sess *clt, unsigned int index);
/**
* rtrs_attrs - RTRS session attributes
* struct rtrs_attrs - RTRS session attributes
* @queue_depth: queue_depth saved from rtrs_clt_sess message
* @max_io_size: max_io_size from rtrs_clt_sess message, capped to
* @max_segments * %SZ_4K
* @max_segments: max_segments saved from rtrs_clt_sess message
*/
struct rtrs_attrs {
u32 queue_depth;

View File

@ -57,8 +57,8 @@ struct irdma_alloc_ucontext_resp {
__u8 rsvd2;
__aligned_u64 comp_mask;
__u16 min_hw_wq_size;
__u8 revd3[2];
__u32 max_hw_srq_quanta;
__u8 rsvd3[2];
};
struct irdma_alloc_pd_resp {

View File

@ -192,6 +192,7 @@ struct rdma_ucm_query_path_resp {
struct rdma_ucm_query_ib_service_resp {
__u32 num_service_recs;
__u32 reserved;
struct ib_user_service_rec recs[];
};
@ -354,7 +355,7 @@ enum {
#define RDMA_USER_CM_IB_SERVICE_NAME_SIZE 64
struct rdma_ucm_ib_service {
__u64 service_id;
__aligned_u64 service_id;
__u8 service_name[RDMA_USER_CM_IB_SERVICE_NAME_SIZE];
__u32 flags;
__u32 reserved;
@ -362,6 +363,7 @@ struct rdma_ucm_ib_service {
struct rdma_ucm_resolve_ib_service {
__u32 id;
__u32 reserved;
struct rdma_ucm_ib_service ibs;
};